1 |
dpavlin |
1 |
/* |
2 |
|
|
* PearPC |
3 |
|
|
* ppc_fpu.h |
4 |
|
|
* |
5 |
|
|
* Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net) |
6 |
|
|
* Copyright (C) 2003, 2004 Stefan Weyergraf |
7 |
|
|
* |
8 |
|
|
* This program is free software; you can redistribute it and/or modify |
9 |
|
|
* it under the terms of the GNU General Public License version 2 as |
10 |
|
|
* published by the Free Software Foundation. |
11 |
|
|
* |
12 |
|
|
* This program is distributed in the hope that it will be useful, |
13 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 |
|
|
* GNU General Public License for more details. |
16 |
|
|
* |
17 |
|
|
* You should have received a copy of the GNU General Public License |
18 |
|
|
* along with this program; if not, write to the Free Software |
19 |
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
20 |
|
|
*/ |
21 |
|
|
|
22 |
|
|
#ifndef __PPC_FPU_H__ |
23 |
|
|
#define __PPC_FPU_H__ |
24 |
|
|
|
25 |
|
|
|
26 |
|
|
#define FPU_SIGN_BIT (0x8000000000000000ULL) |
27 |
|
|
|
28 |
|
|
#define FPD_SIGN(v) (((v)&FPU_SIGN_BIT)?1:0) |
29 |
|
|
#define FPD_EXP(v) ((v)>>52) |
30 |
|
|
#define FPD_FRAC(v) ((v)&0x000fffffffffffffULL) |
31 |
|
|
|
32 |
|
|
#define FPS_SIGN(v) ((v)&0x80000000) |
33 |
|
|
#define FPS_EXP(v) ((v)>>23) |
34 |
|
|
#define FPS_FRAC(v) ((v)&0x007fffff) |
35 |
|
|
|
36 |
|
|
// m must be uint64 |
37 |
|
|
#define FPD_PACK_VAR(f, s, e, m) (f) = ((s)?FPU_SIGN_BIT:0ULL)|((((uint64)(e))&0x7ff)<<52)|((m)&((1ULL<<52)-1)) |
38 |
|
|
#define FPD_UNPACK_VAR(f, s, e, m) {(s)=FPD_SIGN(f);(e)=FPD_EXP(f)&0x7ff;(m)=FPD_FRAC(f);} |
39 |
|
|
|
40 |
|
|
#define FPS_PACK_VAR(f, s, e, m) (f) = ((s)?0x80000000:0)|((e)<<23)|((m)&0x7fffff) |
41 |
|
|
#define FPS_UNPACK_VAR(f, s, e, m) {(s)=FPS_SIGN(f);(e)=FPS_EXP(f)&0xff;(m)=FPS_FRAC(f);} |
42 |
|
|
|
43 |
|
|
#define FPD_UNPACK(freg, fvar) FPD_UNPACK(freg, fvar.s, fvar.e, fvar.m) |
44 |
|
|
|
45 |
|
|
|
46 |
|
|
void ppc_fpu_test(); |
47 |
|
|
|
48 |
|
|
enum ppc_fpr_type { |
49 |
|
|
ppc_fpr_norm, |
50 |
|
|
ppc_fpr_zero, |
51 |
|
|
ppc_fpr_NaN, |
52 |
|
|
ppc_fpr_Inf, |
53 |
|
|
}; |
54 |
|
|
|
55 |
|
|
struct ppc_quadro { |
56 |
|
|
ppc_fpr_type type; |
57 |
|
|
int s; |
58 |
|
|
int e; |
59 |
|
|
uint64 m0; // most significant |
60 |
|
|
uint64 m1; // least significant |
61 |
|
|
}; |
62 |
|
|
|
63 |
|
|
struct ppc_double { |
64 |
|
|
ppc_fpr_type type; |
65 |
|
|
int s; |
66 |
|
|
int e; |
67 |
|
|
uint64 m; |
68 |
|
|
}; |
69 |
|
|
|
70 |
|
|
struct ppc_single { |
71 |
|
|
ppc_fpr_type type; |
72 |
|
|
int s; |
73 |
|
|
int e; |
74 |
|
|
uint m; |
75 |
|
|
}; |
76 |
|
|
|
77 |
|
|
inline int ppc_count_leading_zeros(uint64 i) |
78 |
|
|
{ |
79 |
|
|
int ret; |
80 |
|
|
uint32 dd = i >> 32; |
81 |
|
|
if (dd) { |
82 |
|
|
ret = 31; |
83 |
|
|
if (dd > 0xffff) { ret -= 16; dd >>= 16; } |
84 |
|
|
if (dd > 0xff) { ret -= 8; dd >>= 8; } |
85 |
|
|
if (dd & 0xf0) { ret -= 4; dd >>= 4; } |
86 |
|
|
if (dd & 0xc) { ret -= 2; dd >>= 2; } |
87 |
|
|
if (dd & 0x2) ret--; |
88 |
|
|
} else { |
89 |
|
|
dd = (uint32)i; |
90 |
|
|
ret = 63; |
91 |
|
|
if (dd > 0xffff) { ret -= 16; dd >>= 16; } |
92 |
|
|
if (dd > 0xff) { ret -= 8; dd >>= 8; } |
93 |
|
|
if (dd & 0xf0) { ret -= 4; dd >>= 4; } |
94 |
|
|
if (dd & 0xc) { ret -= 2; dd >>= 2; } |
95 |
|
|
if (dd & 0x2) ret--; |
96 |
|
|
} |
97 |
|
|
return ret; |
98 |
|
|
} |
99 |
|
|
|
100 |
|
|
inline int ppc_fpu_normalize_quadro(ppc_quadro &d) |
101 |
|
|
{ |
102 |
|
|
int ret = d.m0 ? ppc_count_leading_zeros(d.m0) : 64 + ppc_count_leading_zeros(d.m1); |
103 |
|
|
return ret; |
104 |
|
|
} |
105 |
|
|
|
106 |
|
|
inline int ppc_fpu_normalize(ppc_double &d) |
107 |
|
|
{ |
108 |
|
|
return ppc_count_leading_zeros(d.m); |
109 |
|
|
} |
110 |
|
|
|
111 |
|
|
inline int ppc_fpu_normalize_single(ppc_single &s) |
112 |
|
|
{ |
113 |
|
|
int ret; |
114 |
|
|
uint32 dd = s.m; |
115 |
|
|
ret = 31; |
116 |
|
|
if (dd > 0xffff) { ret -= 16; dd >>= 16; } |
117 |
|
|
if (dd > 0xff) { ret -= 8; dd >>= 8; } |
118 |
|
|
if (dd & 0xf0) { ret -= 4; dd >>= 4; } |
119 |
|
|
if (dd & 0xc) { ret -= 2; dd >>= 2; } |
120 |
|
|
if (dd & 0x2) ret--; |
121 |
|
|
return ret; |
122 |
|
|
} |
123 |
|
|
|
124 |
|
|
#include "tools/snprintf.h" |
125 |
|
|
inline void ppc_fpu_unpack_double(ppc_double &res, uint64 d) |
126 |
|
|
{ |
127 |
|
|
FPD_UNPACK_VAR(d, res.s, res.e, res.m); |
128 |
|
|
// ht_printf("ud: %qx: s:%d e:%d m:%qx\n", d, res.s, res.e, res.m); |
129 |
|
|
// .124 |
130 |
|
|
if (res.e == 2047) { |
131 |
|
|
if (res.m == 0) { |
132 |
|
|
res.type = ppc_fpr_Inf; |
133 |
|
|
} else { |
134 |
|
|
res.type = ppc_fpr_NaN; |
135 |
|
|
} |
136 |
|
|
} else if (res.e == 0) { |
137 |
|
|
if (res.m == 0) { |
138 |
|
|
res.type = ppc_fpr_zero; |
139 |
|
|
} else { |
140 |
|
|
// normalize denormalized exponent |
141 |
|
|
int diff = ppc_fpu_normalize(res) - 8; |
142 |
|
|
res.m <<= diff+3; |
143 |
|
|
res.e -= 1023 - 1 + diff; |
144 |
|
|
res.type = ppc_fpr_norm; |
145 |
|
|
} |
146 |
|
|
} else { |
147 |
|
|
res.e -= 1023; // unbias exponent |
148 |
|
|
res.type = ppc_fpr_norm; |
149 |
|
|
// add implied bit |
150 |
|
|
res.m |= 1ULL<<52; |
151 |
|
|
res.m <<= 3; |
152 |
|
|
} |
153 |
|
|
// ht_printf("ud: %qx: s:%d e:%d m:%qx\n", d, res.s, res.e, res.m); |
154 |
|
|
} |
155 |
|
|
|
156 |
|
|
|
157 |
|
|
inline void ppc_fpu_unpack_single(ppc_single &res, uint32 d) |
158 |
|
|
{ |
159 |
|
|
FPS_UNPACK_VAR(d, res.s, res.e, res.m); |
160 |
|
|
// .124 |
161 |
|
|
if (res.e == 255) { |
162 |
|
|
if (res.m == 0) { |
163 |
|
|
res.type = ppc_fpr_Inf; |
164 |
|
|
} else { |
165 |
|
|
res.type = ppc_fpr_NaN; |
166 |
|
|
} |
167 |
|
|
} else if (res.e == 0) { |
168 |
|
|
if (res.m == 0) { |
169 |
|
|
res.type = ppc_fpr_zero; |
170 |
|
|
} else { |
171 |
|
|
// normalize denormalized exponent |
172 |
|
|
int diff = ppc_fpu_normalize_single(res) - 8; |
173 |
|
|
res.m <<= diff+3; |
174 |
|
|
res.e -= 127 - 1 + diff; |
175 |
|
|
res.type = ppc_fpr_norm; |
176 |
|
|
} |
177 |
|
|
} else { |
178 |
|
|
res.e -= 127; // unbias exponent |
179 |
|
|
res.type = ppc_fpr_norm; |
180 |
|
|
// add implied bit |
181 |
|
|
res.m |= 1<<23; |
182 |
|
|
res.m <<= 3; |
183 |
|
|
} |
184 |
|
|
} |
185 |
|
|
|
186 |
|
|
inline uint32 ppc_fpu_round(ppc_double &d) |
187 |
|
|
{ |
188 |
|
|
#if 1 |
189 |
|
|
// .132 |
190 |
|
|
switch (FPSCR_RN(gCPU.fpscr)) { |
191 |
|
|
case FPSCR_RN_NEAR: |
192 |
|
|
if (d.m & 4) { |
193 |
|
|
// guard == 1 |
194 |
|
|
if (d.m & 3) { |
195 |
|
|
// round || sticky |
196 |
|
|
d.m += 8; |
197 |
|
|
} else if (d.m & 8) { |
198 |
|
|
// lsb set |
199 |
|
|
d.m += 8; |
200 |
|
|
} |
201 |
|
|
return FPSCR_XX; |
202 |
|
|
} else { |
203 |
|
|
// guard == 0 |
204 |
|
|
return ((d.m & 7) == 0) ? 0 : FPSCR_XX; |
205 |
|
|
} |
206 |
|
|
/* if (d.m & 0x7) { |
207 |
|
|
if ((d.m & 0x7) != 4) { |
208 |
|
|
d.m += 4; |
209 |
|
|
} else if (d.m & 8) { |
210 |
|
|
d.m += 4; |
211 |
|
|
} |
212 |
|
|
return FPSCR_XX; |
213 |
|
|
}*/ |
214 |
|
|
return 0; |
215 |
|
|
case FPSCR_RN_ZERO: |
216 |
|
|
if (d.m & 0x7) { |
217 |
|
|
return FPSCR_XX; |
218 |
|
|
} |
219 |
|
|
return 0; |
220 |
|
|
case FPSCR_RN_PINF: |
221 |
|
|
if (!d.s && (d.m & 0x7)) { |
222 |
|
|
d.m += 8; |
223 |
|
|
return FPSCR_XX; |
224 |
|
|
} |
225 |
|
|
return 0; |
226 |
|
|
case FPSCR_RN_MINF: |
227 |
|
|
if (d.s && (d.m & 0x7)) { |
228 |
|
|
d.m += 8; |
229 |
|
|
return FPSCR_XX; |
230 |
|
|
} |
231 |
|
|
return 0; |
232 |
|
|
} |
233 |
|
|
return 0; |
234 |
|
|
#else |
235 |
|
|
// .132 |
236 |
|
|
switch (FPSCR_RN(gCPU.fpscr)) { |
237 |
|
|
case FPSCR_RN_NEAR: |
238 |
|
|
if (d.m & 0x7) { |
239 |
|
|
if ((d.m & 0x7) != 4) { |
240 |
|
|
d.m += 4; |
241 |
|
|
} else if (d.m & 8) { |
242 |
|
|
d.m += 4; |
243 |
|
|
} |
244 |
|
|
return FPSCR_XX; |
245 |
|
|
} |
246 |
|
|
return 0; |
247 |
|
|
case FPSCR_RN_ZERO: |
248 |
|
|
if (d.m & 0x7) { |
249 |
|
|
return FPSCR_XX; |
250 |
|
|
} |
251 |
|
|
return 0; |
252 |
|
|
case FPSCR_RN_PINF: |
253 |
|
|
if (!d.s && (d.m & 0x7)) { |
254 |
|
|
d.m += 8; |
255 |
|
|
return FPSCR_XX; |
256 |
|
|
} |
257 |
|
|
return 0; |
258 |
|
|
case FPSCR_RN_MINF: |
259 |
|
|
if (d.s && (d.m & 0x7)) { |
260 |
|
|
d.m += 8; |
261 |
|
|
return FPSCR_XX; |
262 |
|
|
} |
263 |
|
|
return 0; |
264 |
|
|
} |
265 |
|
|
return 0; |
266 |
|
|
#endif |
267 |
|
|
} |
268 |
|
|
|
269 |
|
|
inline uint32 ppc_fpu_round_single(ppc_single &s) |
270 |
|
|
{ |
271 |
|
|
switch (FPSCR_RN(gCPU.fpscr)) { |
272 |
|
|
case FPSCR_RN_NEAR: |
273 |
|
|
if (s.m & 0x7) { |
274 |
|
|
if ((s.m & 0x7) != 4) { |
275 |
|
|
s.m += 4; |
276 |
|
|
} else if (s.m & 8) { |
277 |
|
|
s.m += 4; |
278 |
|
|
} |
279 |
|
|
return FPSCR_XX; |
280 |
|
|
} |
281 |
|
|
return 0; |
282 |
|
|
case FPSCR_RN_ZERO: |
283 |
|
|
if (s.m & 0x7) { |
284 |
|
|
return FPSCR_XX; |
285 |
|
|
} |
286 |
|
|
return 0; |
287 |
|
|
case FPSCR_RN_PINF: |
288 |
|
|
if (!s.s && (s.m & 0x7)) { |
289 |
|
|
s.m += 8; |
290 |
|
|
return FPSCR_XX; |
291 |
|
|
} |
292 |
|
|
return 0; |
293 |
|
|
case FPSCR_RN_MINF: |
294 |
|
|
if (s.s && (s.m & 0x7)) { |
295 |
|
|
s.m += 8; |
296 |
|
|
return FPSCR_XX; |
297 |
|
|
} |
298 |
|
|
return 0; |
299 |
|
|
} |
300 |
|
|
return 0; |
301 |
|
|
} |
302 |
|
|
|
303 |
|
|
inline uint32 ppc_fpu_round_single(ppc_double &s) |
304 |
|
|
{ |
305 |
|
|
switch (FPSCR_RN(gCPU.fpscr)) { |
306 |
|
|
case FPSCR_RN_NEAR: |
307 |
|
|
if (s.m & 0x7) { |
308 |
|
|
if ((s.m & 0x7) != 4) { |
309 |
|
|
s.m += 4; |
310 |
|
|
} else if (s.m & 8) { |
311 |
|
|
s.m += 4; |
312 |
|
|
} |
313 |
|
|
return FPSCR_XX; |
314 |
|
|
} |
315 |
|
|
return 0; |
316 |
|
|
case FPSCR_RN_ZERO: |
317 |
|
|
if (s.m & 0x7) { |
318 |
|
|
return FPSCR_XX; |
319 |
|
|
} |
320 |
|
|
return 0; |
321 |
|
|
case FPSCR_RN_PINF: |
322 |
|
|
if (!s.s && (s.m & 0x7)) { |
323 |
|
|
s.m += 8; |
324 |
|
|
return FPSCR_XX; |
325 |
|
|
} |
326 |
|
|
return 0; |
327 |
|
|
case FPSCR_RN_MINF: |
328 |
|
|
if (s.s && (s.m & 0x7)) { |
329 |
|
|
s.m += 8; |
330 |
|
|
return FPSCR_XX; |
331 |
|
|
} |
332 |
|
|
return 0; |
333 |
|
|
} |
334 |
|
|
return 0; |
335 |
|
|
} |
336 |
|
|
|
337 |
|
|
inline uint32 ppc_fpu_pack_double(ppc_double &d, uint64 &res) |
338 |
|
|
{ |
339 |
|
|
// .124 |
340 |
|
|
uint32 ret = 0; |
341 |
|
|
// ht_printf("pd_type: %d\n", d.type); |
342 |
|
|
switch (d.type) { |
343 |
|
|
case ppc_fpr_norm: |
344 |
|
|
// ht_printf("pd: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m); |
345 |
|
|
d.e += 1023; // bias exponent |
346 |
|
|
// ht_printf("pd: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m); |
347 |
|
|
if (d.e > 0) { |
348 |
|
|
bool guardbit = d.m & 4; |
349 |
|
|
ret |= ppc_fpu_round(d); |
350 |
|
|
if (d.m & (1ULL<<56)) { |
351 |
|
|
d.e++; |
352 |
|
|
d.m >>= 4; |
353 |
|
|
} else { |
354 |
|
|
d.m >>= 3; |
355 |
|
|
} |
356 |
|
|
if (d.e >= 2047) { |
357 |
|
|
if (guardbit) { |
358 |
|
|
d.e = 2047; |
359 |
|
|
d.m = 0; |
360 |
|
|
ret |= FPSCR_OX; |
361 |
|
|
} else { |
362 |
|
|
d.e = 2046; |
363 |
|
|
d.m = (1ULL<<53)-1; |
364 |
|
|
} |
365 |
|
|
} |
366 |
|
|
} else { |
367 |
|
|
// number is denormalized |
368 |
|
|
d.e = -d.e+1; |
369 |
|
|
if (d.e <= 56) { |
370 |
|
|
d.m >>= d.e; |
371 |
|
|
ret |= ppc_fpu_round(d); |
372 |
|
|
d.m <<= 1; |
373 |
|
|
if (d.m & (1ULL<<56)) { |
374 |
|
|
d.e = 1; |
375 |
|
|
d.m = 0; |
376 |
|
|
} else { |
377 |
|
|
d.e = 0; |
378 |
|
|
d.m >>= 4; |
379 |
|
|
ret |= FPSCR_UX; |
380 |
|
|
} |
381 |
|
|
} else { |
382 |
|
|
// underflow to zero |
383 |
|
|
d.e = 0; |
384 |
|
|
d.m = 0; |
385 |
|
|
ret |= FPSCR_UX; |
386 |
|
|
} |
387 |
|
|
} |
388 |
|
|
break; |
389 |
|
|
case ppc_fpr_zero: |
390 |
|
|
d.e = 0; |
391 |
|
|
d.m = 0; |
392 |
|
|
break; |
393 |
|
|
case ppc_fpr_NaN: |
394 |
|
|
d.e = 2047; |
395 |
|
|
d.m = 1; |
396 |
|
|
break; |
397 |
|
|
case ppc_fpr_Inf: |
398 |
|
|
d.e = 2047; |
399 |
|
|
d.m = 0; |
400 |
|
|
break; |
401 |
|
|
} |
402 |
|
|
// ht_printf("pd: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m); |
403 |
|
|
FPD_PACK_VAR(res, d.s, d.e, d.m); |
404 |
|
|
return ret; |
405 |
|
|
} |
406 |
|
|
|
407 |
|
|
inline uint32 ppc_fpu_pack_single(ppc_double &d, uint32 &res) |
408 |
|
|
{ |
409 |
|
|
// .124 |
410 |
|
|
uint32 ret = 0; |
411 |
|
|
switch (d.type) { |
412 |
|
|
case ppc_fpr_norm: |
413 |
|
|
// ht_printf("ps: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m); |
414 |
|
|
d.e += 127; // bias exponent |
415 |
|
|
d.m >>= 29; |
416 |
|
|
// ht_printf("ps: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m); |
417 |
|
|
if (d.e > 0) { |
418 |
|
|
ret |= ppc_fpu_round_single(d); |
419 |
|
|
if (d.m & (1ULL<<27)) { |
420 |
|
|
d.e++; |
421 |
|
|
d.m >>= 4; |
422 |
|
|
} else { |
423 |
|
|
d.m >>= 3; |
424 |
|
|
} |
425 |
|
|
if (d.e >= 255) { |
426 |
|
|
d.e = 255; |
427 |
|
|
d.m = 0; |
428 |
|
|
ret |= FPSCR_OX; |
429 |
|
|
} |
430 |
|
|
} else { |
431 |
|
|
// number is denormalized |
432 |
|
|
d.e = -d.e+1; |
433 |
|
|
if (d.e <= 27) { |
434 |
|
|
d.m >>= d.e; |
435 |
|
|
ret |= ppc_fpu_round_single(d); |
436 |
|
|
d.m <<= 1; |
437 |
|
|
if (d.m & (1ULL<<27)) { |
438 |
|
|
d.e = 1; |
439 |
|
|
d.m = 0; |
440 |
|
|
} else { |
441 |
|
|
d.e = 0; |
442 |
|
|
d.m >>= 4; |
443 |
|
|
ret |= FPSCR_UX; |
444 |
|
|
} |
445 |
|
|
} else { |
446 |
|
|
// underflow to zero |
447 |
|
|
d.e = 0; |
448 |
|
|
d.m = 0; |
449 |
|
|
ret |= FPSCR_UX; |
450 |
|
|
} |
451 |
|
|
} |
452 |
|
|
break; |
453 |
|
|
case ppc_fpr_zero: |
454 |
|
|
d.e = 0; |
455 |
|
|
d.m = 0; |
456 |
|
|
break; |
457 |
|
|
case ppc_fpr_NaN: |
458 |
|
|
d.e = 255; |
459 |
|
|
d.m = 1; |
460 |
|
|
break; |
461 |
|
|
case ppc_fpr_Inf: |
462 |
|
|
d.e = 255; |
463 |
|
|
d.m = 0; |
464 |
|
|
break; |
465 |
|
|
} |
466 |
|
|
// ht_printf("ps: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m); |
467 |
|
|
FPS_PACK_VAR(res, d.s, d.e, d.m); |
468 |
|
|
return ret; |
469 |
|
|
} |
470 |
|
|
|
471 |
|
|
inline void ppc_fpu_single_to_double(ppc_single &s, ppc_double &d) |
472 |
|
|
{ |
473 |
|
|
d.s = s.s; |
474 |
|
|
d.e = s.e; |
475 |
|
|
d.m = ((uint64)s.m)<<29; |
476 |
|
|
d.type = s.type; |
477 |
|
|
} |
478 |
|
|
|
479 |
|
|
inline uint32 ppc_fpu_pack_double_as_single(ppc_double &d, uint64 &res) |
480 |
|
|
{ |
481 |
|
|
ppc_single s; |
482 |
|
|
s.m = d.m >> 29; |
483 |
|
|
s.e = d.e; |
484 |
|
|
s.s = d.s; |
485 |
|
|
s.type = d.type; |
486 |
|
|
uint32 ret = 0; |
487 |
|
|
|
488 |
|
|
switch (s.type) { |
489 |
|
|
case ppc_fpr_norm: |
490 |
|
|
s.e = d.e+127; |
491 |
|
|
if (s.e > 0) { |
492 |
|
|
ret |= ppc_fpu_round_single(s); |
493 |
|
|
if (s.m & (1<<27)) { |
494 |
|
|
s.e++; |
495 |
|
|
s.m >>= 4; |
496 |
|
|
} else { |
497 |
|
|
s.m >>= 3; |
498 |
|
|
} |
499 |
|
|
if (s.e >= 255) { |
500 |
|
|
s.type = ppc_fpr_Inf; |
501 |
|
|
s.e = 255; |
502 |
|
|
s.m = 0; |
503 |
|
|
ret |= FPSCR_OX; |
504 |
|
|
} |
505 |
|
|
d.e = s.e-127; |
506 |
|
|
} else { |
507 |
|
|
// number is denormalized |
508 |
|
|
s.e = -s.e+1; |
509 |
|
|
if (s.e <= 27) { |
510 |
|
|
s.m >>= s.e; |
511 |
|
|
ret |= ppc_fpu_round_single(s); |
512 |
|
|
s.m <<= 1; |
513 |
|
|
if (s.m & (1<<27)) { |
514 |
|
|
s.e = 1; |
515 |
|
|
s.m = 0; |
516 |
|
|
} else { |
517 |
|
|
s.e = 0; |
518 |
|
|
s.m >>= 4; |
519 |
|
|
ret |= FPSCR_UX; |
520 |
|
|
} |
521 |
|
|
} else { |
522 |
|
|
// underflow to zero |
523 |
|
|
s.type = ppc_fpr_zero; |
524 |
|
|
s.e = 0; |
525 |
|
|
s.m = 0; |
526 |
|
|
ret |= FPSCR_UX; |
527 |
|
|
} |
528 |
|
|
} |
529 |
|
|
break; |
530 |
|
|
case ppc_fpr_zero: |
531 |
|
|
s.e = 0; |
532 |
|
|
s.m = 0; |
533 |
|
|
break; |
534 |
|
|
case ppc_fpr_NaN: |
535 |
|
|
s.e = 2047; |
536 |
|
|
s.m = 1; |
537 |
|
|
break; |
538 |
|
|
case ppc_fpr_Inf: |
539 |
|
|
s.e = 2047; |
540 |
|
|
s.m = 0; |
541 |
|
|
break; |
542 |
|
|
} |
543 |
|
|
if (s.type == ppc_fpr_norm) { |
544 |
|
|
d.m = ((uint64)(s.m))<<32; |
545 |
|
|
} else { |
546 |
|
|
d.m = s.m; |
547 |
|
|
} |
548 |
|
|
// ht_printf("dm: %qx\n", d.m); |
549 |
|
|
ret |= ppc_fpu_pack_double(d, res); |
550 |
|
|
return ret; |
551 |
|
|
} |
552 |
|
|
|
553 |
|
|
inline uint32 ppc_fpu_double_to_int(ppc_double &d) |
554 |
|
|
{ |
555 |
|
|
switch (d.type) { |
556 |
|
|
case ppc_fpr_norm: { |
557 |
|
|
if (d.e < 0) { |
558 |
|
|
switch (FPSCR_RN(gCPU.fpscr)) { |
559 |
|
|
case FPSCR_RN_NEAR: |
560 |
|
|
if (d.e < -1) { |
561 |
|
|
return 0; |
562 |
|
|
} else { |
563 |
|
|
return d.s ? (uint32)-1 : 1; |
564 |
|
|
} |
565 |
|
|
case FPSCR_RN_ZERO: |
566 |
|
|
return 0; |
567 |
|
|
case FPSCR_RN_PINF: |
568 |
|
|
if (d.s) { |
569 |
|
|
return 0; |
570 |
|
|
} else { |
571 |
|
|
return 1; |
572 |
|
|
} |
573 |
|
|
case FPSCR_RN_MINF: |
574 |
|
|
if (d.s) { |
575 |
|
|
return (uint32)-1; |
576 |
|
|
} else { |
577 |
|
|
return 0; |
578 |
|
|
} |
579 |
|
|
} |
580 |
|
|
} |
581 |
|
|
if (d.e >= 31) { |
582 |
|
|
if (d.s) { |
583 |
|
|
return 0x80000000; |
584 |
|
|
} else { |
585 |
|
|
return 0x7fffffff; |
586 |
|
|
} |
587 |
|
|
} |
588 |
|
|
int i=0; |
589 |
|
|
uint64 mask = (1ULL<<(56 - d.e - 1))-1; |
590 |
|
|
// we have to round |
591 |
|
|
switch (FPSCR_RN(gCPU.fpscr)) { |
592 |
|
|
case FPSCR_RN_NEAR: |
593 |
|
|
if (d.m & mask) { |
594 |
|
|
if (d.m & (1ULL<<(56 - d.e - 2))) { |
595 |
|
|
i = 1; |
596 |
|
|
} |
597 |
|
|
} |
598 |
|
|
break; |
599 |
|
|
case FPSCR_RN_ZERO: |
600 |
|
|
break; |
601 |
|
|
case FPSCR_RN_PINF: |
602 |
|
|
if (!d.s && (d.m & mask)) { |
603 |
|
|
i = 1; |
604 |
|
|
} |
605 |
|
|
break; |
606 |
|
|
case FPSCR_RN_MINF: |
607 |
|
|
if (d.s && (d.m & mask)) { |
608 |
|
|
i = 1; |
609 |
|
|
} |
610 |
|
|
break; |
611 |
|
|
} |
612 |
|
|
d.m >>= 56 - d.e - 1; |
613 |
|
|
d.m += i; |
614 |
|
|
if (d.m & 0x80000000) { |
615 |
|
|
if (d.s) { |
616 |
|
|
return 0x80000000; |
617 |
|
|
} else { |
618 |
|
|
return 0x7fffffff; |
619 |
|
|
} |
620 |
|
|
} |
621 |
|
|
return d.s ? -d.m : d.m; |
622 |
|
|
} |
623 |
|
|
case ppc_fpr_zero: |
624 |
|
|
return 0; |
625 |
|
|
case ppc_fpr_Inf: |
626 |
|
|
case ppc_fpr_NaN: |
627 |
|
|
if (d.s) { |
628 |
|
|
return 0x80000000; |
629 |
|
|
} else { |
630 |
|
|
return 0x7fffffff; |
631 |
|
|
} |
632 |
|
|
} |
633 |
|
|
return 0; |
634 |
|
|
} |
635 |
|
|
|
636 |
|
|
double ppc_fpu_get_double(uint64 d); |
637 |
|
|
double ppc_fpu_get_double(ppc_double &d); |
638 |
|
|
|
639 |
|
|
#include "jitc.h" |
640 |
|
|
#include "jitc_asm.h" |
641 |
|
|
#include "x86asm.h" |
642 |
|
|
#include "ppc_exc.h" |
643 |
|
|
|
644 |
|
|
static UNUSED void ppc_opc_gen_check_fpu() |
645 |
|
|
{ |
646 |
|
|
if (!gJITC.checkedFloat) { |
647 |
|
|
jitcFloatRegisterClobberAll(); |
648 |
|
|
jitcFlushVectorRegister(); |
649 |
|
|
jitcClobberCarryAndFlags(); |
650 |
|
|
|
651 |
|
|
NativeReg r1 = jitcGetClientRegister(PPC_MSR); |
652 |
|
|
asmALU(X86_TEST, r1, MSR_FP); |
653 |
|
|
NativeAddress fixup = asmJxxFixup(X86_NZ); |
654 |
|
|
|
655 |
|
|
jitcFlushRegisterDirty(); |
656 |
|
|
asmALU(X86_MOV, ESI, gJITC.pc); |
657 |
|
|
asmJMP((NativeAddress)ppc_no_fpu_exception_asm); |
658 |
|
|
|
659 |
|
|
asmResolveFixup(fixup); |
660 |
|
|
gJITC.checkedFloat = true; |
661 |
|
|
} |
662 |
|
|
} |
663 |
|
|
|
664 |
|
|
void ppc_opc_fabsx(); |
665 |
|
|
void ppc_opc_faddx(); |
666 |
|
|
void ppc_opc_faddsx(); |
667 |
|
|
void ppc_opc_fcmpo(); |
668 |
|
|
void ppc_opc_fcmpu(); |
669 |
|
|
void ppc_opc_fctiwx(); |
670 |
|
|
void ppc_opc_fctiwzx(); |
671 |
|
|
void ppc_opc_fdivx(); |
672 |
|
|
void ppc_opc_fdivsx(); |
673 |
|
|
void ppc_opc_fmaddx(); |
674 |
|
|
void ppc_opc_fmaddsx(); |
675 |
|
|
void ppc_opc_fmrx(); |
676 |
|
|
void ppc_opc_fmsubx(); |
677 |
|
|
void ppc_opc_fmsubsx(); |
678 |
|
|
void ppc_opc_fmulx(); |
679 |
|
|
void ppc_opc_fmulsx(); |
680 |
|
|
void ppc_opc_fnabsx(); |
681 |
|
|
void ppc_opc_fnegx(); |
682 |
|
|
void ppc_opc_fnmaddx(); |
683 |
|
|
void ppc_opc_fnmaddsx(); |
684 |
|
|
void ppc_opc_fnmsubx(); |
685 |
|
|
void ppc_opc_fnmsubsx(); |
686 |
|
|
void ppc_opc_fresx(); |
687 |
|
|
void ppc_opc_frspx(); |
688 |
|
|
void ppc_opc_frsqrtex(); |
689 |
|
|
void ppc_opc_fselx(); |
690 |
|
|
void ppc_opc_fsqrtx(); |
691 |
|
|
void ppc_opc_fsqrtsx(); |
692 |
|
|
void ppc_opc_fsubx(); |
693 |
|
|
void ppc_opc_fsubsx(); |
694 |
|
|
|
695 |
|
|
JITCFlow ppc_opc_gen_fabsx(); |
696 |
|
|
JITCFlow ppc_opc_gen_faddx(); |
697 |
|
|
JITCFlow ppc_opc_gen_faddsx(); |
698 |
|
|
JITCFlow ppc_opc_gen_fcmpo(); |
699 |
|
|
JITCFlow ppc_opc_gen_fcmpu(); |
700 |
|
|
JITCFlow ppc_opc_gen_fctiwx(); |
701 |
|
|
JITCFlow ppc_opc_gen_fctiwzx(); |
702 |
|
|
JITCFlow ppc_opc_gen_fdivx(); |
703 |
|
|
JITCFlow ppc_opc_gen_fdivsx(); |
704 |
|
|
JITCFlow ppc_opc_gen_fmaddx(); |
705 |
|
|
JITCFlow ppc_opc_gen_fmaddsx(); |
706 |
|
|
JITCFlow ppc_opc_gen_fmrx(); |
707 |
|
|
JITCFlow ppc_opc_gen_fmsubx(); |
708 |
|
|
JITCFlow ppc_opc_gen_fmsubsx(); |
709 |
|
|
JITCFlow ppc_opc_gen_fmulx(); |
710 |
|
|
JITCFlow ppc_opc_gen_fmulsx(); |
711 |
|
|
JITCFlow ppc_opc_gen_fnabsx(); |
712 |
|
|
JITCFlow ppc_opc_gen_fnegx(); |
713 |
|
|
JITCFlow ppc_opc_gen_fnmaddx(); |
714 |
|
|
JITCFlow ppc_opc_gen_fnmaddsx(); |
715 |
|
|
JITCFlow ppc_opc_gen_fnmsubx(); |
716 |
|
|
JITCFlow ppc_opc_gen_fnmsubsx(); |
717 |
|
|
JITCFlow ppc_opc_gen_fresx(); |
718 |
|
|
JITCFlow ppc_opc_gen_frspx(); |
719 |
|
|
JITCFlow ppc_opc_gen_frsqrtex(); |
720 |
|
|
JITCFlow ppc_opc_gen_fselx(); |
721 |
|
|
JITCFlow ppc_opc_gen_fsqrtx(); |
722 |
|
|
JITCFlow ppc_opc_gen_fsqrtsx(); |
723 |
|
|
JITCFlow ppc_opc_gen_fsubx(); |
724 |
|
|
JITCFlow ppc_opc_gen_fsubsx(); |
725 |
|
|
|
726 |
|
|
#endif |