/[pearpc]/src/cpu/cpu_generic/ppc_fpu.h
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /src/cpu/cpu_generic/ppc_fpu.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Wed Sep 5 17:11:21 2007 UTC (16 years, 7 months ago) by dpavlin
File MIME type: text/plain
File size: 11663 byte(s)
import upstream CVS
1 /*
2 * PearPC
3 * ppc_fpu.h
4 *
5 * Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21 #ifndef __PPC_FPU_H__
22 #define __PPC_FPU_H__
23
24
25 #define FPU_SIGN_BIT (0x8000000000000000ULL)
26
27 #define FPD_SIGN(v) (((v)&FPU_SIGN_BIT)?1:0)
28 #define FPD_EXP(v) ((v)>>52)
29 #define FPD_FRAC(v) ((v)&0x000fffffffffffffULL)
30
31 #define FPS_SIGN(v) ((v)&0x80000000)
32 #define FPS_EXP(v) ((v)>>23)
33 #define FPS_FRAC(v) ((v)&0x007fffff)
34
35 // m must be uint64
36 #define FPD_PACK_VAR(f, s, e, m) (f) = ((s)?FPU_SIGN_BIT:0ULL)|((((uint64)(e))&0x7ff)<<52)|((m)&((1ULL<<52)-1))
37 #define FPD_UNPACK_VAR(f, s, e, m) {(s)=FPD_SIGN(f);(e)=FPD_EXP(f)&0x7ff;(m)=FPD_FRAC(f);}
38
39 #define FPS_PACK_VAR(f, s, e, m) (f) = ((s)?0x80000000:0)|((e)<<23)|((m)&0x7fffff)
40 #define FPS_UNPACK_VAR(f, s, e, m) {(s)=FPS_SIGN(f);(e)=FPS_EXP(f)&0xff;(m)=FPS_FRAC(f);}
41
42 #define FPD_UNPACK(freg, fvar) FPD_UNPACK(freg, fvar.s, fvar.e, fvar.m)
43
44
45 void ppc_fpu_test();
46
47 enum ppc_fpr_type {
48 ppc_fpr_norm,
49 ppc_fpr_zero,
50 ppc_fpr_NaN,
51 ppc_fpr_Inf,
52 };
53
54 struct ppc_quadro {
55 ppc_fpr_type type;
56 int s;
57 int e;
58 uint64 m0; // most significant
59 uint64 m1; // least significant
60 };
61
62 struct ppc_double {
63 ppc_fpr_type type;
64 int s;
65 int e;
66 uint64 m;
67 };
68
69 struct ppc_single {
70 ppc_fpr_type type;
71 int s;
72 int e;
73 uint m;
74 };
75
76 inline int ppc_count_leading_zeros(uint64 i)
77 {
78 int ret;
79 uint32 dd = i >> 32;
80 if (dd) {
81 ret = 31;
82 if (dd > 0xffff) { ret -= 16; dd >>= 16; }
83 if (dd > 0xff) { ret -= 8; dd >>= 8; }
84 if (dd & 0xf0) { ret -= 4; dd >>= 4; }
85 if (dd & 0xc) { ret -= 2; dd >>= 2; }
86 if (dd & 0x2) ret--;
87 } else {
88 dd = (uint32)i;
89 ret = 63;
90 if (dd > 0xffff) { ret -= 16; dd >>= 16; }
91 if (dd > 0xff) { ret -= 8; dd >>= 8; }
92 if (dd & 0xf0) { ret -= 4; dd >>= 4; }
93 if (dd & 0xc) { ret -= 2; dd >>= 2; }
94 if (dd & 0x2) ret--;
95 }
96 return ret;
97 }
98
99 inline int ppc_fpu_normalize_quadro(ppc_quadro &d)
100 {
101 int ret = d.m0 ? ppc_count_leading_zeros(d.m0) : 64 + ppc_count_leading_zeros(d.m1);
102 return ret;
103 }
104
105 inline int ppc_fpu_normalize(ppc_double &d)
106 {
107 return ppc_count_leading_zeros(d.m);
108 }
109
110 inline int ppc_fpu_normalize_single(ppc_single &s)
111 {
112 int ret;
113 uint32 dd = s.m;
114 ret = 31;
115 if (dd > 0xffff) { ret -= 16; dd >>= 16; }
116 if (dd > 0xff) { ret -= 8; dd >>= 8; }
117 if (dd & 0xf0) { ret -= 4; dd >>= 4; }
118 if (dd & 0xc) { ret -= 2; dd >>= 2; }
119 if (dd & 0x2) ret--;
120 return ret;
121 }
122
123 #include "tools/snprintf.h"
124 inline void ppc_fpu_unpack_double(ppc_double &res, uint64 d)
125 {
126 FPD_UNPACK_VAR(d, res.s, res.e, res.m);
127 // ht_printf("ud: %qx: s:%d e:%d m:%qx\n", d, res.s, res.e, res.m);
128 // .124
129 if (res.e == 2047) {
130 if (res.m == 0) {
131 res.type = ppc_fpr_Inf;
132 } else {
133 res.type = ppc_fpr_NaN;
134 }
135 } else if (res.e == 0) {
136 if (res.m == 0) {
137 res.type = ppc_fpr_zero;
138 } else {
139 // normalize denormalized exponent
140 int diff = ppc_fpu_normalize(res) - 8;
141 res.m <<= diff+3;
142 res.e -= 1023 - 1 + diff;
143 res.type = ppc_fpr_norm;
144 }
145 } else {
146 res.e -= 1023; // unbias exponent
147 res.type = ppc_fpr_norm;
148 // add implied bit
149 res.m |= 1ULL<<52;
150 res.m <<= 3;
151 }
152 // ht_printf("ud: %qx: s:%d e:%d m:%qx\n", d, res.s, res.e, res.m);
153 }
154
155
156 inline void ppc_fpu_unpack_single(ppc_single &res, uint32 d)
157 {
158 FPS_UNPACK_VAR(d, res.s, res.e, res.m);
159 // .124
160 if (res.e == 255) {
161 if (res.m == 0) {
162 res.type = ppc_fpr_Inf;
163 } else {
164 res.type = ppc_fpr_NaN;
165 }
166 } else if (res.e == 0) {
167 if (res.m == 0) {
168 res.type = ppc_fpr_zero;
169 } else {
170 // normalize denormalized exponent
171 int diff = ppc_fpu_normalize_single(res) - 8;
172 res.m <<= diff+3;
173 res.e -= 127 - 1 + diff;
174 res.type = ppc_fpr_norm;
175 }
176 } else {
177 res.e -= 127; // unbias exponent
178 res.type = ppc_fpr_norm;
179 // add implied bit
180 res.m |= 1<<23;
181 res.m <<= 3;
182 }
183 }
184
185 inline uint32 ppc_fpu_round(ppc_double &d)
186 {
187 // .132
188 switch (FPSCR_RN(gCPU.fpscr)) {
189 case FPSCR_RN_NEAR:
190 if (d.m & 0x7) {
191 if ((d.m & 0x7) != 4) {
192 d.m += 4;
193 } else if (d.m & 8) {
194 d.m += 4;
195 }
196 return FPSCR_XX;
197 }
198 return 0;
199 case FPSCR_RN_ZERO:
200 if (d.m & 0x7) {
201 return FPSCR_XX;
202 }
203 return 0;
204 case FPSCR_RN_PINF:
205 if (!d.s && (d.m & 0x7)) {
206 d.m += 8;
207 return FPSCR_XX;
208 }
209 return 0;
210 case FPSCR_RN_MINF:
211 if (d.s && (d.m & 0x7)) {
212 d.m += 8;
213 return FPSCR_XX;
214 }
215 return 0;
216 }
217 return 0;
218 }
219
220 inline uint32 ppc_fpu_round_single(ppc_single &s)
221 {
222 switch (FPSCR_RN(gCPU.fpscr)) {
223 case FPSCR_RN_NEAR:
224 if (s.m & 0x7) {
225 if ((s.m & 0x7) != 4) {
226 s.m += 4;
227 } else if (s.m & 8) {
228 s.m += 4;
229 }
230 return FPSCR_XX;
231 }
232 return 0;
233 case FPSCR_RN_ZERO:
234 if (s.m & 0x7) {
235 return FPSCR_XX;
236 }
237 return 0;
238 case FPSCR_RN_PINF:
239 if (!s.s && (s.m & 0x7)) {
240 s.m += 8;
241 return FPSCR_XX;
242 }
243 return 0;
244 case FPSCR_RN_MINF:
245 if (s.s && (s.m & 0x7)) {
246 s.m += 8;
247 return FPSCR_XX;
248 }
249 return 0;
250 }
251 return 0;
252 }
253
254 inline uint32 ppc_fpu_round_single(ppc_double &s)
255 {
256 switch (FPSCR_RN(gCPU.fpscr)) {
257 case FPSCR_RN_NEAR:
258 if (s.m & 0x7) {
259 if ((s.m & 0x7) != 4) {
260 s.m += 4;
261 } else if (s.m & 8) {
262 s.m += 4;
263 }
264 return FPSCR_XX;
265 }
266 return 0;
267 case FPSCR_RN_ZERO:
268 if (s.m & 0x7) {
269 return FPSCR_XX;
270 }
271 return 0;
272 case FPSCR_RN_PINF:
273 if (!s.s && (s.m & 0x7)) {
274 s.m += 8;
275 return FPSCR_XX;
276 }
277 return 0;
278 case FPSCR_RN_MINF:
279 if (s.s && (s.m & 0x7)) {
280 s.m += 8;
281 return FPSCR_XX;
282 }
283 return 0;
284 }
285 return 0;
286 }
287
288 inline uint32 ppc_fpu_pack_double(ppc_double &d, uint64 &res)
289 {
290 // .124
291 uint32 ret = 0;
292 // ht_printf("pd_type: %d\n", d.type);
293 switch (d.type) {
294 case ppc_fpr_norm:
295 // ht_printf("pd: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m);
296 d.e += 1023; // bias exponent
297 // ht_printf("pd: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m);
298 if (d.e > 0) {
299 ret |= ppc_fpu_round(d);
300 if (d.m & (1ULL<<56)) {
301 d.e++;
302 d.m >>= 4;
303 } else {
304 d.m >>= 3;
305 }
306 if (d.e >= 2047) {
307 d.e = 2047;
308 d.m = 0;
309 ret |= FPSCR_OX;
310 }
311 } else {
312 // number is denormalized
313 d.e = -d.e+1;
314 if (d.e <= 56) {
315 d.m >>= d.e;
316 ret |= ppc_fpu_round(d);
317 d.m <<= 1;
318 if (d.m & (1ULL<<56)) {
319 d.e = 1;
320 d.m = 0;
321 } else {
322 d.e = 0;
323 d.m >>= 4;
324 ret |= FPSCR_UX;
325 }
326 } else {
327 // underflow to zero
328 d.e = 0;
329 d.m = 0;
330 ret |= FPSCR_UX;
331 }
332 }
333 break;
334 case ppc_fpr_zero:
335 d.e = 0;
336 d.m = 0;
337 break;
338 case ppc_fpr_NaN:
339 d.e = 2047;
340 d.m = 1;
341 break;
342 case ppc_fpr_Inf:
343 d.e = 2047;
344 d.m = 0;
345 break;
346 }
347 // ht_printf("pd: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m);
348 FPD_PACK_VAR(res, d.s, d.e, d.m);
349 return ret;
350 }
351
352 inline uint32 ppc_fpu_pack_single(ppc_double &d, uint32 &res)
353 {
354 // .124
355 uint32 ret = 0;
356 switch (d.type) {
357 case ppc_fpr_norm:
358 // ht_printf("ps: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m);
359 d.e += 127; // bias exponent
360 d.m >>= 29;
361 // ht_printf("ps: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m);
362 if (d.e > 0) {
363 ret |= ppc_fpu_round_single(d);
364 if (d.m & (1ULL<<27)) {
365 d.e++;
366 d.m >>= 4;
367 } else {
368 d.m >>= 3;
369 }
370 if (d.e >= 255) {
371 d.e = 255;
372 d.m = 0;
373 ret |= FPSCR_OX;
374 }
375 } else {
376 // number is denormalized
377 d.e = -d.e+1;
378 if (d.e <= 27) {
379 d.m >>= d.e;
380 ret |= ppc_fpu_round_single(d);
381 d.m <<= 1;
382 if (d.m & (1ULL<<27)) {
383 d.e = 1;
384 d.m = 0;
385 } else {
386 d.e = 0;
387 d.m >>= 4;
388 ret |= FPSCR_UX;
389 }
390 } else {
391 // underflow to zero
392 d.e = 0;
393 d.m = 0;
394 ret |= FPSCR_UX;
395 }
396 }
397 break;
398 case ppc_fpr_zero:
399 d.e = 0;
400 d.m = 0;
401 break;
402 case ppc_fpr_NaN:
403 d.e = 255;
404 d.m = 1;
405 break;
406 case ppc_fpr_Inf:
407 d.e = 255;
408 d.m = 0;
409 break;
410 }
411 // ht_printf("ps: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m);
412 FPS_PACK_VAR(res, d.s, d.e, d.m);
413 return ret;
414 }
415
416 inline void ppc_fpu_single_to_double(ppc_single &s, ppc_double &d)
417 {
418 d.s = s.s;
419 d.e = s.e;
420 d.m = ((uint64)s.m)<<29;
421 d.type = s.type;
422 }
423
424 inline uint32 ppc_fpu_pack_double_as_single(ppc_double &d, uint64 &res)
425 {
426 // .757
427 ppc_single s;
428 s.m = d.m >> 29;
429 s.e = d.e;
430 s.s = d.s;
431 s.type = d.type;
432 uint32 ret = 0;
433
434 switch (s.type) {
435 case ppc_fpr_norm:
436 s.e = d.e+127;
437 if (s.e > 0) {
438 ret |= ppc_fpu_round_single(s);
439 if (s.m & (1<<27)) {
440 s.e++;
441 s.m >>= 4;
442 } else {
443 s.m >>= 3;
444 }
445 if (s.e >= 255) {
446 s.type = ppc_fpr_Inf;
447 s.e = 255;
448 s.m = 0;
449 ret |= FPSCR_OX;
450 }
451 d.e = s.e-127;
452 } else {
453 // number is denormalized
454 s.e = -s.e+1;
455 if (s.e <= 27) {
456 s.m >>= s.e;
457 ret |= ppc_fpu_round_single(s);
458 s.m <<= 1;
459 if (s.m & (1<<27)) {
460 s.e = 1;
461 s.m = 0;
462 } else {
463 s.e = 0;
464 s.m >>= 4;
465 ret |= FPSCR_UX;
466 }
467 } else {
468 // underflow to zero
469 s.type = ppc_fpr_zero;
470 s.e = 0;
471 s.m = 0;
472 ret |= FPSCR_UX;
473 }
474 }
475 break;
476 case ppc_fpr_zero:
477 s.e = 0;
478 s.m = 0;
479 break;
480 case ppc_fpr_NaN:
481 s.e = 2047;
482 s.m = 1;
483 break;
484 case ppc_fpr_Inf:
485 s.e = 2047;
486 s.m = 0;
487 break;
488 }
489 if (s.type == ppc_fpr_norm) {
490 d.m = ((uint64)(s.m))<<32;
491 } else {
492 d.m = s.m;
493 }
494 // ht_printf("dm: %qx\n", d.m);
495 ret |= ppc_fpu_pack_double(d, res);
496 return ret;
497 }
498
499 inline uint32 ppc_fpu_double_to_int(ppc_double &d)
500 {
501 switch (d.type) {
502 case ppc_fpr_norm: {
503 if (d.e < 0) {
504 switch (FPSCR_RN(gCPU.fpscr)) {
505 case FPSCR_RN_NEAR:
506 if (d.e < -1) {
507 return 0;
508 } else {
509 return d.s ? (uint32)-1 : 1;
510 }
511 case FPSCR_RN_ZERO:
512 return 0;
513 case FPSCR_RN_PINF:
514 if (d.s) {
515 return 0;
516 } else {
517 return 1;
518 }
519 case FPSCR_RN_MINF:
520 if (d.s) {
521 return (uint32)-1;
522 } else {
523 return 0;
524 }
525 }
526 }
527 if (d.e >= 31) {
528 if (d.s) {
529 return 0x80000000;
530 } else {
531 return 0x7fffffff;
532 }
533 }
534 int i=0;
535 uint64 mask = (1ULL<<(56 - d.e - 1))-1;
536 // we have to round
537 switch (FPSCR_RN(gCPU.fpscr)) {
538 case FPSCR_RN_NEAR:
539 if (d.m & mask) {
540 if (d.m & (1ULL<<(56 - d.e - 2))) {
541 i = 1;
542 }
543 }
544 break;
545 case FPSCR_RN_ZERO:
546 break;
547 case FPSCR_RN_PINF:
548 if (!d.s && (d.m & mask)) {
549 i = 1;
550 }
551 break;
552 case FPSCR_RN_MINF:
553 if (d.s && (d.m & mask)) {
554 i = 1;
555 }
556 break;
557 }
558 d.m >>= 56 - d.e - 1;
559 d.m += i;
560 return d.s ? -d.m : d.m;
561 }
562 case ppc_fpr_zero:
563 return 0;
564 case ppc_fpr_Inf:
565 case ppc_fpr_NaN:
566 if (d.s) {
567 return 0x80000000;
568 } else {
569 return 0x7fffffff;
570 }
571 }
572 return 0;
573 }
574
575 double ppc_fpu_get_double(uint64 d);
576 double ppc_fpu_get_double(ppc_double &d);
577
578 void ppc_opc_fabsx();
579 void ppc_opc_faddx();
580 void ppc_opc_faddsx();
581 void ppc_opc_fcmpo();
582 void ppc_opc_fcmpu();
583 void ppc_opc_fctiwx();
584 void ppc_opc_fctiwzx();
585 void ppc_opc_fdivx();
586 void ppc_opc_fdivsx();
587 void ppc_opc_fmaddx();
588 void ppc_opc_fmaddsx();
589 void ppc_opc_fmrx();
590 void ppc_opc_fmsubx();
591 void ppc_opc_fmsubsx();
592 void ppc_opc_fmulx();
593 void ppc_opc_fmulsx();
594 void ppc_opc_fnabsx();
595 void ppc_opc_fnegx();
596 void ppc_opc_fnmaddx();
597 void ppc_opc_fnmaddsx();
598 void ppc_opc_fnmsubx();
599 void ppc_opc_fnmsubsx();
600 void ppc_opc_fresx();
601 void ppc_opc_frspx();
602 void ppc_opc_frsqrtex();
603 void ppc_opc_fselx();
604 void ppc_opc_fsqrtx();
605 void ppc_opc_fsqrtsx();
606 void ppc_opc_fsubx();
607 void ppc_opc_fsubsx();
608
609 #endif

  ViewVC Help
Powered by ViewVC 1.1.26