/[pearpc]/src/cpu/cpu_jitc_x86/ppc_fpu.h
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /src/cpu/cpu_jitc_x86/ppc_fpu.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Wed Sep 5 17:11:21 2007 UTC (16 years, 6 months ago) by dpavlin
File MIME type: text/plain
File size: 14152 byte(s)
import upstream CVS
1 /*
2 * PearPC
3 * ppc_fpu.h
4 *
5 * Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net)
6 * Copyright (C) 2003, 2004 Stefan Weyergraf
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21
22 #ifndef __PPC_FPU_H__
23 #define __PPC_FPU_H__
24
25
26 #define FPU_SIGN_BIT (0x8000000000000000ULL)
27
28 #define FPD_SIGN(v) (((v)&FPU_SIGN_BIT)?1:0)
29 #define FPD_EXP(v) ((v)>>52)
30 #define FPD_FRAC(v) ((v)&0x000fffffffffffffULL)
31
32 #define FPS_SIGN(v) ((v)&0x80000000)
33 #define FPS_EXP(v) ((v)>>23)
34 #define FPS_FRAC(v) ((v)&0x007fffff)
35
36 // m must be uint64
37 #define FPD_PACK_VAR(f, s, e, m) (f) = ((s)?FPU_SIGN_BIT:0ULL)|((((uint64)(e))&0x7ff)<<52)|((m)&((1ULL<<52)-1))
38 #define FPD_UNPACK_VAR(f, s, e, m) {(s)=FPD_SIGN(f);(e)=FPD_EXP(f)&0x7ff;(m)=FPD_FRAC(f);}
39
40 #define FPS_PACK_VAR(f, s, e, m) (f) = ((s)?0x80000000:0)|((e)<<23)|((m)&0x7fffff)
41 #define FPS_UNPACK_VAR(f, s, e, m) {(s)=FPS_SIGN(f);(e)=FPS_EXP(f)&0xff;(m)=FPS_FRAC(f);}
42
43 #define FPD_UNPACK(freg, fvar) FPD_UNPACK(freg, fvar.s, fvar.e, fvar.m)
44
45
46 void ppc_fpu_test();
47
48 enum ppc_fpr_type {
49 ppc_fpr_norm,
50 ppc_fpr_zero,
51 ppc_fpr_NaN,
52 ppc_fpr_Inf,
53 };
54
55 struct ppc_quadro {
56 ppc_fpr_type type;
57 int s;
58 int e;
59 uint64 m0; // most significant
60 uint64 m1; // least significant
61 };
62
63 struct ppc_double {
64 ppc_fpr_type type;
65 int s;
66 int e;
67 uint64 m;
68 };
69
70 struct ppc_single {
71 ppc_fpr_type type;
72 int s;
73 int e;
74 uint m;
75 };
76
77 inline int ppc_count_leading_zeros(uint64 i)
78 {
79 int ret;
80 uint32 dd = i >> 32;
81 if (dd) {
82 ret = 31;
83 if (dd > 0xffff) { ret -= 16; dd >>= 16; }
84 if (dd > 0xff) { ret -= 8; dd >>= 8; }
85 if (dd & 0xf0) { ret -= 4; dd >>= 4; }
86 if (dd & 0xc) { ret -= 2; dd >>= 2; }
87 if (dd & 0x2) ret--;
88 } else {
89 dd = (uint32)i;
90 ret = 63;
91 if (dd > 0xffff) { ret -= 16; dd >>= 16; }
92 if (dd > 0xff) { ret -= 8; dd >>= 8; }
93 if (dd & 0xf0) { ret -= 4; dd >>= 4; }
94 if (dd & 0xc) { ret -= 2; dd >>= 2; }
95 if (dd & 0x2) ret--;
96 }
97 return ret;
98 }
99
100 inline int ppc_fpu_normalize_quadro(ppc_quadro &d)
101 {
102 int ret = d.m0 ? ppc_count_leading_zeros(d.m0) : 64 + ppc_count_leading_zeros(d.m1);
103 return ret;
104 }
105
106 inline int ppc_fpu_normalize(ppc_double &d)
107 {
108 return ppc_count_leading_zeros(d.m);
109 }
110
111 inline int ppc_fpu_normalize_single(ppc_single &s)
112 {
113 int ret;
114 uint32 dd = s.m;
115 ret = 31;
116 if (dd > 0xffff) { ret -= 16; dd >>= 16; }
117 if (dd > 0xff) { ret -= 8; dd >>= 8; }
118 if (dd & 0xf0) { ret -= 4; dd >>= 4; }
119 if (dd & 0xc) { ret -= 2; dd >>= 2; }
120 if (dd & 0x2) ret--;
121 return ret;
122 }
123
124 #include "tools/snprintf.h"
125 inline void ppc_fpu_unpack_double(ppc_double &res, uint64 d)
126 {
127 FPD_UNPACK_VAR(d, res.s, res.e, res.m);
128 // ht_printf("ud: %qx: s:%d e:%d m:%qx\n", d, res.s, res.e, res.m);
129 // .124
130 if (res.e == 2047) {
131 if (res.m == 0) {
132 res.type = ppc_fpr_Inf;
133 } else {
134 res.type = ppc_fpr_NaN;
135 }
136 } else if (res.e == 0) {
137 if (res.m == 0) {
138 res.type = ppc_fpr_zero;
139 } else {
140 // normalize denormalized exponent
141 int diff = ppc_fpu_normalize(res) - 8;
142 res.m <<= diff+3;
143 res.e -= 1023 - 1 + diff;
144 res.type = ppc_fpr_norm;
145 }
146 } else {
147 res.e -= 1023; // unbias exponent
148 res.type = ppc_fpr_norm;
149 // add implied bit
150 res.m |= 1ULL<<52;
151 res.m <<= 3;
152 }
153 // ht_printf("ud: %qx: s:%d e:%d m:%qx\n", d, res.s, res.e, res.m);
154 }
155
156
157 inline void ppc_fpu_unpack_single(ppc_single &res, uint32 d)
158 {
159 FPS_UNPACK_VAR(d, res.s, res.e, res.m);
160 // .124
161 if (res.e == 255) {
162 if (res.m == 0) {
163 res.type = ppc_fpr_Inf;
164 } else {
165 res.type = ppc_fpr_NaN;
166 }
167 } else if (res.e == 0) {
168 if (res.m == 0) {
169 res.type = ppc_fpr_zero;
170 } else {
171 // normalize denormalized exponent
172 int diff = ppc_fpu_normalize_single(res) - 8;
173 res.m <<= diff+3;
174 res.e -= 127 - 1 + diff;
175 res.type = ppc_fpr_norm;
176 }
177 } else {
178 res.e -= 127; // unbias exponent
179 res.type = ppc_fpr_norm;
180 // add implied bit
181 res.m |= 1<<23;
182 res.m <<= 3;
183 }
184 }
185
186 inline uint32 ppc_fpu_round(ppc_double &d)
187 {
188 #if 1
189 // .132
190 switch (FPSCR_RN(gCPU.fpscr)) {
191 case FPSCR_RN_NEAR:
192 if (d.m & 4) {
193 // guard == 1
194 if (d.m & 3) {
195 // round || sticky
196 d.m += 8;
197 } else if (d.m & 8) {
198 // lsb set
199 d.m += 8;
200 }
201 return FPSCR_XX;
202 } else {
203 // guard == 0
204 return ((d.m & 7) == 0) ? 0 : FPSCR_XX;
205 }
206 /* if (d.m & 0x7) {
207 if ((d.m & 0x7) != 4) {
208 d.m += 4;
209 } else if (d.m & 8) {
210 d.m += 4;
211 }
212 return FPSCR_XX;
213 }*/
214 return 0;
215 case FPSCR_RN_ZERO:
216 if (d.m & 0x7) {
217 return FPSCR_XX;
218 }
219 return 0;
220 case FPSCR_RN_PINF:
221 if (!d.s && (d.m & 0x7)) {
222 d.m += 8;
223 return FPSCR_XX;
224 }
225 return 0;
226 case FPSCR_RN_MINF:
227 if (d.s && (d.m & 0x7)) {
228 d.m += 8;
229 return FPSCR_XX;
230 }
231 return 0;
232 }
233 return 0;
234 #else
235 // .132
236 switch (FPSCR_RN(gCPU.fpscr)) {
237 case FPSCR_RN_NEAR:
238 if (d.m & 0x7) {
239 if ((d.m & 0x7) != 4) {
240 d.m += 4;
241 } else if (d.m & 8) {
242 d.m += 4;
243 }
244 return FPSCR_XX;
245 }
246 return 0;
247 case FPSCR_RN_ZERO:
248 if (d.m & 0x7) {
249 return FPSCR_XX;
250 }
251 return 0;
252 case FPSCR_RN_PINF:
253 if (!d.s && (d.m & 0x7)) {
254 d.m += 8;
255 return FPSCR_XX;
256 }
257 return 0;
258 case FPSCR_RN_MINF:
259 if (d.s && (d.m & 0x7)) {
260 d.m += 8;
261 return FPSCR_XX;
262 }
263 return 0;
264 }
265 return 0;
266 #endif
267 }
268
269 inline uint32 ppc_fpu_round_single(ppc_single &s)
270 {
271 switch (FPSCR_RN(gCPU.fpscr)) {
272 case FPSCR_RN_NEAR:
273 if (s.m & 0x7) {
274 if ((s.m & 0x7) != 4) {
275 s.m += 4;
276 } else if (s.m & 8) {
277 s.m += 4;
278 }
279 return FPSCR_XX;
280 }
281 return 0;
282 case FPSCR_RN_ZERO:
283 if (s.m & 0x7) {
284 return FPSCR_XX;
285 }
286 return 0;
287 case FPSCR_RN_PINF:
288 if (!s.s && (s.m & 0x7)) {
289 s.m += 8;
290 return FPSCR_XX;
291 }
292 return 0;
293 case FPSCR_RN_MINF:
294 if (s.s && (s.m & 0x7)) {
295 s.m += 8;
296 return FPSCR_XX;
297 }
298 return 0;
299 }
300 return 0;
301 }
302
303 inline uint32 ppc_fpu_round_single(ppc_double &s)
304 {
305 switch (FPSCR_RN(gCPU.fpscr)) {
306 case FPSCR_RN_NEAR:
307 if (s.m & 0x7) {
308 if ((s.m & 0x7) != 4) {
309 s.m += 4;
310 } else if (s.m & 8) {
311 s.m += 4;
312 }
313 return FPSCR_XX;
314 }
315 return 0;
316 case FPSCR_RN_ZERO:
317 if (s.m & 0x7) {
318 return FPSCR_XX;
319 }
320 return 0;
321 case FPSCR_RN_PINF:
322 if (!s.s && (s.m & 0x7)) {
323 s.m += 8;
324 return FPSCR_XX;
325 }
326 return 0;
327 case FPSCR_RN_MINF:
328 if (s.s && (s.m & 0x7)) {
329 s.m += 8;
330 return FPSCR_XX;
331 }
332 return 0;
333 }
334 return 0;
335 }
336
337 inline uint32 ppc_fpu_pack_double(ppc_double &d, uint64 &res)
338 {
339 // .124
340 uint32 ret = 0;
341 // ht_printf("pd_type: %d\n", d.type);
342 switch (d.type) {
343 case ppc_fpr_norm:
344 // ht_printf("pd: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m);
345 d.e += 1023; // bias exponent
346 // ht_printf("pd: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m);
347 if (d.e > 0) {
348 bool guardbit = d.m & 4;
349 ret |= ppc_fpu_round(d);
350 if (d.m & (1ULL<<56)) {
351 d.e++;
352 d.m >>= 4;
353 } else {
354 d.m >>= 3;
355 }
356 if (d.e >= 2047) {
357 if (guardbit) {
358 d.e = 2047;
359 d.m = 0;
360 ret |= FPSCR_OX;
361 } else {
362 d.e = 2046;
363 d.m = (1ULL<<53)-1;
364 }
365 }
366 } else {
367 // number is denormalized
368 d.e = -d.e+1;
369 if (d.e <= 56) {
370 d.m >>= d.e;
371 ret |= ppc_fpu_round(d);
372 d.m <<= 1;
373 if (d.m & (1ULL<<56)) {
374 d.e = 1;
375 d.m = 0;
376 } else {
377 d.e = 0;
378 d.m >>= 4;
379 ret |= FPSCR_UX;
380 }
381 } else {
382 // underflow to zero
383 d.e = 0;
384 d.m = 0;
385 ret |= FPSCR_UX;
386 }
387 }
388 break;
389 case ppc_fpr_zero:
390 d.e = 0;
391 d.m = 0;
392 break;
393 case ppc_fpr_NaN:
394 d.e = 2047;
395 d.m = 1;
396 break;
397 case ppc_fpr_Inf:
398 d.e = 2047;
399 d.m = 0;
400 break;
401 }
402 // ht_printf("pd: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m);
403 FPD_PACK_VAR(res, d.s, d.e, d.m);
404 return ret;
405 }
406
407 inline uint32 ppc_fpu_pack_single(ppc_double &d, uint32 &res)
408 {
409 // .124
410 uint32 ret = 0;
411 switch (d.type) {
412 case ppc_fpr_norm:
413 // ht_printf("ps: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m);
414 d.e += 127; // bias exponent
415 d.m >>= 29;
416 // ht_printf("ps: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m);
417 if (d.e > 0) {
418 ret |= ppc_fpu_round_single(d);
419 if (d.m & (1ULL<<27)) {
420 d.e++;
421 d.m >>= 4;
422 } else {
423 d.m >>= 3;
424 }
425 if (d.e >= 255) {
426 d.e = 255;
427 d.m = 0;
428 ret |= FPSCR_OX;
429 }
430 } else {
431 // number is denormalized
432 d.e = -d.e+1;
433 if (d.e <= 27) {
434 d.m >>= d.e;
435 ret |= ppc_fpu_round_single(d);
436 d.m <<= 1;
437 if (d.m & (1ULL<<27)) {
438 d.e = 1;
439 d.m = 0;
440 } else {
441 d.e = 0;
442 d.m >>= 4;
443 ret |= FPSCR_UX;
444 }
445 } else {
446 // underflow to zero
447 d.e = 0;
448 d.m = 0;
449 ret |= FPSCR_UX;
450 }
451 }
452 break;
453 case ppc_fpr_zero:
454 d.e = 0;
455 d.m = 0;
456 break;
457 case ppc_fpr_NaN:
458 d.e = 255;
459 d.m = 1;
460 break;
461 case ppc_fpr_Inf:
462 d.e = 255;
463 d.m = 0;
464 break;
465 }
466 // ht_printf("ps: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m);
467 FPS_PACK_VAR(res, d.s, d.e, d.m);
468 return ret;
469 }
470
471 inline void ppc_fpu_single_to_double(ppc_single &s, ppc_double &d)
472 {
473 d.s = s.s;
474 d.e = s.e;
475 d.m = ((uint64)s.m)<<29;
476 d.type = s.type;
477 }
478
479 inline uint32 ppc_fpu_pack_double_as_single(ppc_double &d, uint64 &res)
480 {
481 ppc_single s;
482 s.m = d.m >> 29;
483 s.e = d.e;
484 s.s = d.s;
485 s.type = d.type;
486 uint32 ret = 0;
487
488 switch (s.type) {
489 case ppc_fpr_norm:
490 s.e = d.e+127;
491 if (s.e > 0) {
492 ret |= ppc_fpu_round_single(s);
493 if (s.m & (1<<27)) {
494 s.e++;
495 s.m >>= 4;
496 } else {
497 s.m >>= 3;
498 }
499 if (s.e >= 255) {
500 s.type = ppc_fpr_Inf;
501 s.e = 255;
502 s.m = 0;
503 ret |= FPSCR_OX;
504 }
505 d.e = s.e-127;
506 } else {
507 // number is denormalized
508 s.e = -s.e+1;
509 if (s.e <= 27) {
510 s.m >>= s.e;
511 ret |= ppc_fpu_round_single(s);
512 s.m <<= 1;
513 if (s.m & (1<<27)) {
514 s.e = 1;
515 s.m = 0;
516 } else {
517 s.e = 0;
518 s.m >>= 4;
519 ret |= FPSCR_UX;
520 }
521 } else {
522 // underflow to zero
523 s.type = ppc_fpr_zero;
524 s.e = 0;
525 s.m = 0;
526 ret |= FPSCR_UX;
527 }
528 }
529 break;
530 case ppc_fpr_zero:
531 s.e = 0;
532 s.m = 0;
533 break;
534 case ppc_fpr_NaN:
535 s.e = 2047;
536 s.m = 1;
537 break;
538 case ppc_fpr_Inf:
539 s.e = 2047;
540 s.m = 0;
541 break;
542 }
543 if (s.type == ppc_fpr_norm) {
544 d.m = ((uint64)(s.m))<<32;
545 } else {
546 d.m = s.m;
547 }
548 // ht_printf("dm: %qx\n", d.m);
549 ret |= ppc_fpu_pack_double(d, res);
550 return ret;
551 }
552
553 inline uint32 ppc_fpu_double_to_int(ppc_double &d)
554 {
555 switch (d.type) {
556 case ppc_fpr_norm: {
557 if (d.e < 0) {
558 switch (FPSCR_RN(gCPU.fpscr)) {
559 case FPSCR_RN_NEAR:
560 if (d.e < -1) {
561 return 0;
562 } else {
563 return d.s ? (uint32)-1 : 1;
564 }
565 case FPSCR_RN_ZERO:
566 return 0;
567 case FPSCR_RN_PINF:
568 if (d.s) {
569 return 0;
570 } else {
571 return 1;
572 }
573 case FPSCR_RN_MINF:
574 if (d.s) {
575 return (uint32)-1;
576 } else {
577 return 0;
578 }
579 }
580 }
581 if (d.e >= 31) {
582 if (d.s) {
583 return 0x80000000;
584 } else {
585 return 0x7fffffff;
586 }
587 }
588 int i=0;
589 uint64 mask = (1ULL<<(56 - d.e - 1))-1;
590 // we have to round
591 switch (FPSCR_RN(gCPU.fpscr)) {
592 case FPSCR_RN_NEAR:
593 if (d.m & mask) {
594 if (d.m & (1ULL<<(56 - d.e - 2))) {
595 i = 1;
596 }
597 }
598 break;
599 case FPSCR_RN_ZERO:
600 break;
601 case FPSCR_RN_PINF:
602 if (!d.s && (d.m & mask)) {
603 i = 1;
604 }
605 break;
606 case FPSCR_RN_MINF:
607 if (d.s && (d.m & mask)) {
608 i = 1;
609 }
610 break;
611 }
612 d.m >>= 56 - d.e - 1;
613 d.m += i;
614 if (d.m & 0x80000000) {
615 if (d.s) {
616 return 0x80000000;
617 } else {
618 return 0x7fffffff;
619 }
620 }
621 return d.s ? -d.m : d.m;
622 }
623 case ppc_fpr_zero:
624 return 0;
625 case ppc_fpr_Inf:
626 case ppc_fpr_NaN:
627 if (d.s) {
628 return 0x80000000;
629 } else {
630 return 0x7fffffff;
631 }
632 }
633 return 0;
634 }
635
636 double ppc_fpu_get_double(uint64 d);
637 double ppc_fpu_get_double(ppc_double &d);
638
639 #include "jitc.h"
640 #include "jitc_asm.h"
641 #include "x86asm.h"
642 #include "ppc_exc.h"
643
644 static UNUSED void ppc_opc_gen_check_fpu()
645 {
646 if (!gJITC.checkedFloat) {
647 jitcFloatRegisterClobberAll();
648 jitcFlushVectorRegister();
649 jitcClobberCarryAndFlags();
650
651 NativeReg r1 = jitcGetClientRegister(PPC_MSR);
652 asmALU(X86_TEST, r1, MSR_FP);
653 NativeAddress fixup = asmJxxFixup(X86_NZ);
654
655 jitcFlushRegisterDirty();
656 asmALU(X86_MOV, ESI, gJITC.pc);
657 asmJMP((NativeAddress)ppc_no_fpu_exception_asm);
658
659 asmResolveFixup(fixup);
660 gJITC.checkedFloat = true;
661 }
662 }
663
664 void ppc_opc_fabsx();
665 void ppc_opc_faddx();
666 void ppc_opc_faddsx();
667 void ppc_opc_fcmpo();
668 void ppc_opc_fcmpu();
669 void ppc_opc_fctiwx();
670 void ppc_opc_fctiwzx();
671 void ppc_opc_fdivx();
672 void ppc_opc_fdivsx();
673 void ppc_opc_fmaddx();
674 void ppc_opc_fmaddsx();
675 void ppc_opc_fmrx();
676 void ppc_opc_fmsubx();
677 void ppc_opc_fmsubsx();
678 void ppc_opc_fmulx();
679 void ppc_opc_fmulsx();
680 void ppc_opc_fnabsx();
681 void ppc_opc_fnegx();
682 void ppc_opc_fnmaddx();
683 void ppc_opc_fnmaddsx();
684 void ppc_opc_fnmsubx();
685 void ppc_opc_fnmsubsx();
686 void ppc_opc_fresx();
687 void ppc_opc_frspx();
688 void ppc_opc_frsqrtex();
689 void ppc_opc_fselx();
690 void ppc_opc_fsqrtx();
691 void ppc_opc_fsqrtsx();
692 void ppc_opc_fsubx();
693 void ppc_opc_fsubsx();
694
695 JITCFlow ppc_opc_gen_fabsx();
696 JITCFlow ppc_opc_gen_faddx();
697 JITCFlow ppc_opc_gen_faddsx();
698 JITCFlow ppc_opc_gen_fcmpo();
699 JITCFlow ppc_opc_gen_fcmpu();
700 JITCFlow ppc_opc_gen_fctiwx();
701 JITCFlow ppc_opc_gen_fctiwzx();
702 JITCFlow ppc_opc_gen_fdivx();
703 JITCFlow ppc_opc_gen_fdivsx();
704 JITCFlow ppc_opc_gen_fmaddx();
705 JITCFlow ppc_opc_gen_fmaddsx();
706 JITCFlow ppc_opc_gen_fmrx();
707 JITCFlow ppc_opc_gen_fmsubx();
708 JITCFlow ppc_opc_gen_fmsubsx();
709 JITCFlow ppc_opc_gen_fmulx();
710 JITCFlow ppc_opc_gen_fmulsx();
711 JITCFlow ppc_opc_gen_fnabsx();
712 JITCFlow ppc_opc_gen_fnegx();
713 JITCFlow ppc_opc_gen_fnmaddx();
714 JITCFlow ppc_opc_gen_fnmaddsx();
715 JITCFlow ppc_opc_gen_fnmsubx();
716 JITCFlow ppc_opc_gen_fnmsubsx();
717 JITCFlow ppc_opc_gen_fresx();
718 JITCFlow ppc_opc_gen_frspx();
719 JITCFlow ppc_opc_gen_frsqrtex();
720 JITCFlow ppc_opc_gen_fselx();
721 JITCFlow ppc_opc_gen_fsqrtx();
722 JITCFlow ppc_opc_gen_fsqrtsx();
723 JITCFlow ppc_opc_gen_fsubx();
724 JITCFlow ppc_opc_gen_fsubsx();
725
726 #endif

  ViewVC Help
Powered by ViewVC 1.1.26