/[pearpc]/src/cpu/cpu_generic/ppc_vec.cc
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /src/cpu/cpu_generic/ppc_vec.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Wed Sep 5 17:11:21 2007 UTC (12 years, 2 months ago) by dpavlin
File size: 68507 byte(s)
import upstream CVS
1 /*
2 * PearPC
3 * ppc_vec.cc
4 *
5 * Copyright (C) 2004 Daniel Foesch (dfoesch@cs.nsmu.edu)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21 /* Pages marked: v.???
22 * From: IBM PowerPC MicroProcessor Family: Altivec(tm) Technology...
23 * Programming Environments Manual
24 */
25
26 #include <math.h>
27
28 /*
29 * FIXME: put somewhere appropriate
30 */
31 #ifndef HAS_LOG2
32 #define log2(x) log(x)/log(2)
33 #endif /* HAS_LOG2 */
34
35 #ifndef HAS_EXP2
36 #define exp2(x) pow(2, x)
37 #endif /* HAS_EXP2 */
38
39 #include "debug/tracers.h"
40 #include "ppc_cpu.h"
41 #include "ppc_dec.h"
42 #include "ppc_fpu.h"
43 #include "ppc_vec.h"
44
45 #define SIGN32 0x80000000
46
47 /* PACK_PIXEL Packs a uint32 pixel to uint16 pixel
48 * v.219
49 */
50 static inline uint16 PACK_PIXEL(uint32 clr)
51 {
52 return (((clr & 0x000000f8) >> 3) | \
53 ((clr & 0x0000f800) >> 6) | \
54 ((clr & 0x01f80000) >> 9));
55 }
56
57 /* UNPACK_PIXEL Unpacks a uint16 pixel to uint32 pixel
58 * v.276 & v.279
59 */
60 static inline uint32 UNPACK_PIXEL(uint16 clr)
61 {
62 return (((uint32)(clr & 0x001f)) | \
63 ((uint32)(clr & 0x03E0) << 3) | \
64 ((uint32)(clr & 0x7c00) << 6) | \
65 (((clr) & 0x8000) ? 0xff000000 : 0));
66 }
67
68 static inline uint8 SATURATE_UB(uint16 val)
69 {
70 if (val & 0xff00) {
71 gCPU.vscr |= VSCR_SAT;
72 return 0xff;
73 }
74 return val;
75 }
76 static inline uint8 SATURATE_0B(uint16 val)
77 {
78 if (val & 0xff00) {
79 gCPU.vscr |= VSCR_SAT;
80 return 0;
81 }
82 return val;
83 }
84
85 static inline uint16 SATURATE_UH(uint32 val)
86 {
87 if (val & 0xffff0000) {
88 gCPU.vscr |= VSCR_SAT;
89 return 0xffff;
90 }
91 return val;
92 }
93
94 static inline uint16 SATURATE_0H(uint32 val)
95 {
96 if (val & 0xffff0000) {
97 gCPU.vscr |= VSCR_SAT;
98 return 0;
99 }
100 return val;
101 }
102
103 static inline sint8 SATURATE_SB(sint16 val)
104 {
105 if (val > 127) { // 0x7F
106 gCPU.vscr |= VSCR_SAT;
107 return 127;
108 } else if (val < -128) { // 0x80
109 gCPU.vscr |= VSCR_SAT;
110 return -128;
111 }
112 return val;
113 }
114
115 static inline uint8 SATURATE_USB(sint16 val)
116 {
117 if (val > 0xff) {
118 gCPU.vscr |= VSCR_SAT;
119 return 0xff;
120 } else if (val < 0) {
121 gCPU.vscr |= VSCR_SAT;
122 return 0;
123 }
124 return (uint8)val;
125 }
126
127 static inline sint16 SATURATE_SH(sint32 val)
128 {
129 if (val > 32767) { // 0x7fff
130 gCPU.vscr |= VSCR_SAT;
131 return 32767;
132 } else if (val < -32768) { // 0x8000
133 gCPU.vscr |= VSCR_SAT;
134 return -32768;
135 }
136 return val;
137 }
138
139 static inline uint16 SATURATE_USH(sint32 val)
140 {
141 if (val > 0xffff) {
142 gCPU.vscr |= VSCR_SAT;
143 return 0xffff;
144 } else if (val < 0) {
145 gCPU.vscr |= VSCR_SAT;
146 return 0;
147 }
148 return (uint16)val;
149 }
150
151 static inline sint32 SATURATE_UW(sint64 val)
152 {
153 if (val > 0xffffffffLL) {
154 gCPU.vscr |= VSCR_SAT;
155 return 0xffffffffLL;
156 }
157 return val;
158 }
159
160 static inline sint32 SATURATE_SW(sint64 val)
161 {
162 if (val > 2147483647LL) { // 0x7fffffff
163 gCPU.vscr |= VSCR_SAT;
164 return 2147483647LL;
165 } else if (val < -2147483648LL) { // 0x80000000
166 gCPU.vscr |= VSCR_SAT;
167 return -2147483648LL;
168 }
169 return val;
170 }
171
172 /* vperm Vector Permutation
173 * v.218
174 */
175 void ppc_opc_vperm()
176 {
177 VECTOR_DEBUG_COMMON;
178 int vrD, vrA, vrB, vrC;
179 int sel;
180 Vector_t r;
181 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
182 for (int i=0; i<16; i++) {
183 sel = gCPU.vr[vrC].b[i];
184 if (sel & 0x10)
185 r.b[i] = VECT_B(gCPU.vr[vrB], sel & 0xf);
186 else
187 r.b[i] = VECT_B(gCPU.vr[vrA], sel & 0xf);
188 }
189
190 gCPU.vr[vrD] = r;
191 }
192
193 /* vsel Vector Select
194 * v.238
195 */
196 void ppc_opc_vsel()
197 {
198 VECTOR_DEBUG;
199 int vrD, vrA, vrB, vrC;
200 uint64 mask, val;
201 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
202
203 mask = gCPU.vr[vrC].d[0];
204 val = gCPU.vr[vrB].d[0] & mask;
205 val |= gCPU.vr[vrA].d[0] & ~mask;
206 gCPU.vr[vrD].d[0] = val;
207
208 mask = gCPU.vr[vrC].d[1];
209 val = gCPU.vr[vrB].d[1] & mask;
210 val |= gCPU.vr[vrA].d[1] & ~mask;
211 gCPU.vr[vrD].d[1] = val;
212 }
213
214 /* vsrb Vector Shift Right Byte
215 * v.256
216 */
217 void ppc_opc_vsrb()
218 {
219 VECTOR_DEBUG;
220 int vrD, vrA, vrB;
221 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
222 for (int i=0; i<16; i++) {
223 gCPU.vr[vrD].b[i] = gCPU.vr[vrA].b[i] >> (gCPU.vr[vrB].b[i] & 0x7);
224 }
225 }
226
227 /* vsrh Vector Shift Right Half Word
228 * v.257
229 */
230 void ppc_opc_vsrh()
231 {
232 VECTOR_DEBUG;
233 int vrD, vrA, vrB;
234 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
235 for (int i=0; i<8; i++) {
236 gCPU.vr[vrD].h[i] = gCPU.vr[vrA].h[i] >> (gCPU.vr[vrB].h[i] & 0xf);
237 }
238 }
239
240 /* vsrw Vector Shift Right Word
241 * v.259
242 */
243 void ppc_opc_vsrw()
244 {
245 VECTOR_DEBUG;
246 int vrD, vrA, vrB;
247 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
248 for (int i=0; i<4; i++) {
249 gCPU.vr[vrD].w[i] = gCPU.vr[vrA].w[i] >> (gCPU.vr[vrB].w[i] & 0x1f);
250 }
251 }
252
253 /* vsrab Vector Shift Right Arithmetic Byte
254 * v.253
255 */
256 void ppc_opc_vsrab()
257 {
258 VECTOR_DEBUG;
259 int vrD, vrA, vrB;
260 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
261 for (int i=0; i<16; i++) {
262 gCPU.vr[vrD].sb[i] = gCPU.vr[vrA].sb[i] >> (gCPU.vr[vrB].b[i] & 0x7);
263 }
264 }
265
266 /* vsrah Vector Shift Right Arithmetic Half Word
267 * v.254
268 */
269 void ppc_opc_vsrah()
270 {
271 VECTOR_DEBUG;
272 int vrD, vrA, vrB;
273 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
274 for (int i=0; i<8; i++) {
275 gCPU.vr[vrD].sh[i] = gCPU.vr[vrA].sh[i] >> (gCPU.vr[vrB].h[i] & 0xf);
276 }
277 }
278
279 /* vsraw Vector Shift Right Arithmetic Word
280 * v.255
281 */
282 void ppc_opc_vsraw()
283 {
284 VECTOR_DEBUG;
285 int vrD, vrA, vrB;
286 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
287 for (int i=0; i<4; i++) {
288 gCPU.vr[vrD].sw[i] = gCPU.vr[vrA].sw[i] >> (gCPU.vr[vrB].w[i] & 0x1f);
289 }
290 }
291
292 /* vslb Vector Shift Left Byte
293 * v.240
294 */
295 void ppc_opc_vslb()
296 {
297 VECTOR_DEBUG;
298 int vrD, vrA, vrB;
299 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
300 for (int i=0; i<16; i++) {
301 gCPU.vr[vrD].b[i] = gCPU.vr[vrA].b[i] << (gCPU.vr[vrB].b[i] & 0x7);
302 }
303 }
304
305 /* vslh Vector Shift Left Half Word
306 * v.242
307 */
308 void ppc_opc_vslh()
309 {
310 VECTOR_DEBUG;
311 int vrD, vrA, vrB;
312 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
313 for (int i=0; i<8; i++) {
314 gCPU.vr[vrD].h[i] = gCPU.vr[vrA].h[i] << (gCPU.vr[vrB].h[i] & 0xf);
315 }
316 }
317
318 /* vslw Vector Shift Left Word
319 * v.244
320 */
321 void ppc_opc_vslw()
322 {
323 VECTOR_DEBUG;
324 int vrD, vrA, vrB;
325 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
326 for (int i=0; i<4; i++) {
327 gCPU.vr[vrD].w[i] = gCPU.vr[vrA].w[i] << (gCPU.vr[vrB].w[i] & 0x1f);
328 }
329 }
330
331 /* vsr Vector Shift Right
332 * v.251
333 */
334 void ppc_opc_vsr()
335 {
336 VECTOR_DEBUG;
337 int vrD, vrA, vrB;
338 Vector_t r;
339 int shift;
340 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
341
342 /* Specs say that the low-order 3 bits of all byte elements in vB
343 * must be the same, or the result is undefined. So we can just
344 * use the same low-order 3 bits for all of our shifts.
345 */
346 shift = gCPU.vr[vrB].w[0] & 0x7;
347
348 r.d[0] = gCPU.vr[vrA].d[0] >> shift;
349 r.d[1] = gCPU.vr[vrA].d[1] >> shift;
350
351 VECT_D(r, 1) |= VECT_D(gCPU.vr[vrA], 0) << (64 - shift);
352
353 gCPU.vr[vrD] = r;
354 }
355
356 /* vsro Vector Shift Right Octet
357 * v.258
358 */
359 void ppc_opc_vsro()
360 {
361 VECTOR_DEBUG;
362 int vrD, vrA, vrB;
363 Vector_t r;
364 int shift, i;
365 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
366
367 shift = (gCPU.vr[vrB].w[0] >> 3) & 0xf;
368 #if HOST_ENDIANESS == HOST_ENDIANESS_LE
369 for (i=0; i<(16-shift); i++) {
370 r.b[i] = gCPU.vr[vrA].b[i+shift];
371 }
372
373 for (; i<16; i++) {
374 r.b[i] = 0;
375 }
376 #elif HOST_ENDIANESS == HOST_ENDIANESS_BE
377 for (i=0; i<shift; i++) {
378 r.b[i] = 0;
379 }
380
381 for (; i<16; i++) {
382 r.b[i] = gCPU.vr[vrA].b[i-shift];
383 }
384 #else
385 #error Endianess not supported!
386 #endif
387
388 gCPU.vr[vrD] = r;
389 }
390
391 /* vsl Vector Shift Left
392 * v.239
393 */
394 void ppc_opc_vsl()
395 {
396 VECTOR_DEBUG;
397 int vrD, vrA, vrB;
398 Vector_t r;
399 int shift;
400 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
401
402 /* Specs say that the low-order 3 bits of all byte elements in vB
403 * must be the same, or the result is undefined. So we can just
404 * use the same low-order 3 bits for all of our shifts.
405 */
406 shift = gCPU.vr[vrB].w[0] & 0x7;
407
408 r.d[0] = gCPU.vr[vrA].d[0] << shift;
409 r.d[1] = gCPU.vr[vrA].d[1] << shift;
410
411 VECT_D(r, 0) |= VECT_D(gCPU.vr[vrA], 1) >> (64 - shift);
412
413 gCPU.vr[vrD] = r;
414 }
415
416 /* vslo Vector Shift Left Octet
417 * v.243
418 */
419 void ppc_opc_vslo()
420 {
421 VECTOR_DEBUG;
422 int vrD, vrA, vrB;
423 Vector_t r;
424 int shift, i;
425 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
426
427 shift = (gCPU.vr[vrB].w[0] >> 3) & 0xf;
428 #if HOST_ENDIANESS == HOST_ENDIANESS_LE
429 for (i=0; i<shift; i++) {
430 r.b[i] = 0;
431 }
432
433 for (; i<16; i++) {
434 r.b[i] = gCPU.vr[vrA].b[i-shift];
435 }
436 #elif HOST_ENDIANESS == HOST_ENDIANESS_BE
437 for (i=0; i<(16-shift); i++) {
438 r.b[i] = gCPU.vr[vrA].b[i+shift];
439 }
440
441 for (; i<16; i++) {
442 r.b[i] = 0;
443 }
444 #else
445 #error Endianess not supported!
446 #endif
447
448 gCPU.vr[vrD] = r;
449 }
450
451 /* vsldoi Vector Shift Left Double by Octet Immediate
452 * v.241
453 */
454 void ppc_opc_vsldoi()
455 {
456 VECTOR_DEBUG_COMMON;
457 int vrD, vrA, vrB, shift, ashift;
458 int i;
459 Vector_t r;
460 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, shift);
461
462 shift &= 0xf;
463 ashift = 16 - shift;
464
465 #if HOST_ENDIANESS == HOST_ENDIANESS_LE
466 for (i=0; i<shift; i++) {
467 r.b[i] = gCPU.vr[vrB].b[i+ashift];
468 }
469
470 for (; i<16; i++) {
471 r.b[i] = gCPU.vr[vrA].b[i-shift];
472 }
473 #elif HOST_ENDIANESS == HOST_ENDIANESS_BE
474 for (i=0; i<ashift; i++) {
475 r.b[i] = gCPU.vr[vrA].b[i+shift];
476 }
477
478 for (; i<16; i++) {
479 r.b[i] = gCPU.vr[vrB].b[i-ashift];
480 }
481 #else
482 #error Endianess not supported!
483 #endif
484
485 gCPU.vr[vrD] = r;
486 }
487
488 /* vrlb Vector Rotate Left Byte
489 * v.234
490 */
491 void ppc_opc_vrlb()
492 {
493 VECTOR_DEBUG;
494 int vrD, vrA, vrB, shift;
495 Vector_t r;
496 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
497
498 for (int i=0; i<16; i++) {
499 shift = (gCPU.vr[vrB].b[i] & 0x7);
500
501 r.b[i] = gCPU.vr[vrA].b[i] << shift;
502 r.b[i] |= gCPU.vr[vrA].b[i] >> (8 - shift);
503 }
504
505 gCPU.vr[vrD] = r;
506 }
507
508 /* vrlh Vector Rotate Left Half Word
509 * v.235
510 */
511 void ppc_opc_vrlh()
512 {
513 VECTOR_DEBUG;
514 int vrD, vrA, vrB, shift;
515 Vector_t r;
516 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
517
518 for (int i=0; i<8; i++) {
519 shift = (gCPU.vr[vrB].h[i] & 0xf);
520
521 r.h[i] = gCPU.vr[vrA].h[i] << shift;
522 r.h[i] |= gCPU.vr[vrA].h[i] >> (16 - shift);
523 }
524
525 gCPU.vr[vrD] = r;
526 }
527
528 /* vrlw Vector Rotate Left Word
529 * v.236
530 */
531 void ppc_opc_vrlw()
532 {
533 VECTOR_DEBUG;
534 int vrD, vrA, vrB, shift;
535 Vector_t r;
536 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
537
538 for (int i=0; i<4; i++) {
539 shift = (gCPU.vr[vrB].w[i] & 0x1F);
540
541 r.w[i] = gCPU.vr[vrA].w[i] << shift;
542 r.w[i] |= gCPU.vr[vrA].w[i] >> (32 - shift);
543 }
544
545 gCPU.vr[vrD] = r;
546 }
547
548 /* With the merges, I just don't see any point in risking that a compiler
549 * might generate actual alu code to calculate anything when it's
550 * compile-time known. Plus, it's easier to validate it like this.
551 */
552
553 /* vmrghb Vector Merge High Byte
554 * v.195
555 */
556 void ppc_opc_vmrghb()
557 {
558 VECTOR_DEBUG;
559 int vrD, vrA, vrB;
560 Vector_t r;
561 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
562
563 VECT_B(r, 0) = VECT_B(gCPU.vr[vrA], 0);
564 VECT_B(r, 1) = VECT_B(gCPU.vr[vrB], 0);
565 VECT_B(r, 2) = VECT_B(gCPU.vr[vrA], 1);
566 VECT_B(r, 3) = VECT_B(gCPU.vr[vrB], 1);
567 VECT_B(r, 4) = VECT_B(gCPU.vr[vrA], 2);
568 VECT_B(r, 5) = VECT_B(gCPU.vr[vrB], 2);
569 VECT_B(r, 6) = VECT_B(gCPU.vr[vrA], 3);
570 VECT_B(r, 7) = VECT_B(gCPU.vr[vrB], 3);
571 VECT_B(r, 8) = VECT_B(gCPU.vr[vrA], 4);
572 VECT_B(r, 9) = VECT_B(gCPU.vr[vrB], 4);
573 VECT_B(r,10) = VECT_B(gCPU.vr[vrA], 5);
574 VECT_B(r,11) = VECT_B(gCPU.vr[vrB], 5);
575 VECT_B(r,12) = VECT_B(gCPU.vr[vrA], 6);
576 VECT_B(r,13) = VECT_B(gCPU.vr[vrB], 6);
577 VECT_B(r,14) = VECT_B(gCPU.vr[vrA], 7);
578 VECT_B(r,15) = VECT_B(gCPU.vr[vrB], 7);
579
580 gCPU.vr[vrD] = r;
581 }
582
583 /* vmrghh Vector Merge High Half Word
584 * v.196
585 */
586 void ppc_opc_vmrghh()
587 {
588 VECTOR_DEBUG;
589 int vrD, vrA, vrB;
590 Vector_t r;
591 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
592
593 VECT_H(r, 0) = VECT_H(gCPU.vr[vrA], 0);
594 VECT_H(r, 1) = VECT_H(gCPU.vr[vrB], 0);
595 VECT_H(r, 2) = VECT_H(gCPU.vr[vrA], 1);
596 VECT_H(r, 3) = VECT_H(gCPU.vr[vrB], 1);
597 VECT_H(r, 4) = VECT_H(gCPU.vr[vrA], 2);
598 VECT_H(r, 5) = VECT_H(gCPU.vr[vrB], 2);
599 VECT_H(r, 6) = VECT_H(gCPU.vr[vrA], 3);
600 VECT_H(r, 7) = VECT_H(gCPU.vr[vrB], 3);
601
602 gCPU.vr[vrD] = r;
603 }
604
605 /* vmrghw Vector Merge High Word
606 * v.197
607 */
608 void ppc_opc_vmrghw()
609 {
610 VECTOR_DEBUG;
611 int vrD, vrA, vrB;
612 Vector_t r;
613 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
614
615 VECT_W(r, 0) = VECT_W(gCPU.vr[vrA], 0);
616 VECT_W(r, 1) = VECT_W(gCPU.vr[vrB], 0);
617 VECT_W(r, 2) = VECT_W(gCPU.vr[vrA], 1);
618 VECT_W(r, 3) = VECT_W(gCPU.vr[vrB], 1);
619
620 gCPU.vr[vrD] = r;
621 }
622
623 /* vmrglb Vector Merge Low Byte
624 * v.198
625 */
626 void ppc_opc_vmrglb()
627 {
628 VECTOR_DEBUG;
629 int vrD, vrA, vrB;
630 Vector_t r;
631 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
632
633 VECT_B(r, 0) = VECT_B(gCPU.vr[vrA], 8);
634 VECT_B(r, 1) = VECT_B(gCPU.vr[vrB], 8);
635 VECT_B(r, 2) = VECT_B(gCPU.vr[vrA], 9);
636 VECT_B(r, 3) = VECT_B(gCPU.vr[vrB], 9);
637 VECT_B(r, 4) = VECT_B(gCPU.vr[vrA],10);
638 VECT_B(r, 5) = VECT_B(gCPU.vr[vrB],10);
639 VECT_B(r, 6) = VECT_B(gCPU.vr[vrA],11);
640 VECT_B(r, 7) = VECT_B(gCPU.vr[vrB],11);
641 VECT_B(r, 8) = VECT_B(gCPU.vr[vrA],12);
642 VECT_B(r, 9) = VECT_B(gCPU.vr[vrB],12);
643 VECT_B(r,10) = VECT_B(gCPU.vr[vrA],13);
644 VECT_B(r,11) = VECT_B(gCPU.vr[vrB],13);
645 VECT_B(r,12) = VECT_B(gCPU.vr[vrA],14);
646 VECT_B(r,13) = VECT_B(gCPU.vr[vrB],14);
647 VECT_B(r,14) = VECT_B(gCPU.vr[vrA],15);
648 VECT_B(r,15) = VECT_B(gCPU.vr[vrB],15);
649
650 gCPU.vr[vrD] = r;
651 }
652
653 /* vmrglh Vector Merge Low Half Word
654 * v.199
655 */
656 void ppc_opc_vmrglh()
657 {
658 VECTOR_DEBUG;
659 int vrD, vrA, vrB;
660 Vector_t r;
661 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
662
663 VECT_H(r, 0) = VECT_H(gCPU.vr[vrA], 4);
664 VECT_H(r, 1) = VECT_H(gCPU.vr[vrB], 4);
665 VECT_H(r, 2) = VECT_H(gCPU.vr[vrA], 5);
666 VECT_H(r, 3) = VECT_H(gCPU.vr[vrB], 5);
667 VECT_H(r, 4) = VECT_H(gCPU.vr[vrA], 6);
668 VECT_H(r, 5) = VECT_H(gCPU.vr[vrB], 6);
669 VECT_H(r, 6) = VECT_H(gCPU.vr[vrA], 7);
670 VECT_H(r, 7) = VECT_H(gCPU.vr[vrB], 7);
671
672 gCPU.vr[vrD] = r;
673 }
674
675 /* vmrglw Vector Merge Low Word
676 * v.200
677 */
678 void ppc_opc_vmrglw()
679 {
680 VECTOR_DEBUG;
681 int vrD, vrA, vrB;
682 Vector_t r;
683 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
684
685 VECT_W(r, 0) = VECT_W(gCPU.vr[vrA], 2);
686 VECT_W(r, 1) = VECT_W(gCPU.vr[vrB], 2);
687 VECT_W(r, 2) = VECT_W(gCPU.vr[vrA], 3);
688 VECT_W(r, 3) = VECT_W(gCPU.vr[vrB], 3);
689
690 gCPU.vr[vrD] = r;
691 }
692
693 /* vspltb Vector Splat Byte
694 * v.245
695 */
696 void ppc_opc_vspltb()
697 {
698 VECTOR_DEBUG;
699 int vrD, vrB;
700 uint32 uimm;
701 uint64 val;
702 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
703
704 /* The documentation doesn't stipulate what a value higher than 0xf
705 * will do. Thus, this is by default an undefined value. We
706 * are thus doing this the fastest way that won't crash us.
707 */
708 val = VECT_B(gCPU.vr[vrB], uimm & 0xf);
709 val |= (val << 8);
710 val |= (val << 16);
711 val |= (val << 32);
712
713 gCPU.vr[vrD].d[0] = val;
714 gCPU.vr[vrD].d[1] = val;
715 }
716
717 /* vsplth Vector Splat Half Word
718 * v.246
719 */
720 void ppc_opc_vsplth()
721 {
722 VECTOR_DEBUG;
723 int vrD, vrB;
724 uint32 uimm;
725 uint64 val;
726 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
727
728 /* The documentation doesn't stipulate what a value higher than 0x7
729 * will do. Thus, this is by default an undefined value. We
730 * are thus doing this the fastest way that won't crash us.
731 */
732 val = VECT_H(gCPU.vr[vrB], uimm & 0x7);
733 val |= (val << 16);
734 val |= (val << 32);
735
736 gCPU.vr[vrD].d[0] = val;
737 gCPU.vr[vrD].d[1] = val;
738 }
739
740 /* vspltw Vector Splat Word
741 * v.250
742 */
743 void ppc_opc_vspltw()
744 {
745 VECTOR_DEBUG;
746 int vrD, vrB;
747 uint32 uimm;
748 uint64 val;
749 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
750
751 /* The documentation doesn't stipulate what a value higher than 0x3
752 * will do. Thus, this is by default an undefined value. We
753 * are thus doing this the fastest way that won't crash us.
754 */
755 val = VECT_W(gCPU.vr[vrB], uimm & 0x3);
756 val |= (val << 32);
757
758 gCPU.vr[vrD].d[0] = val;
759 gCPU.vr[vrD].d[1] = val;
760 }
761
762 /* vspltisb Vector Splat Immediate Signed Byte
763 * v.247
764 */
765 void ppc_opc_vspltisb()
766 {
767 VECTOR_DEBUG_COMMON;
768 int vrD, vrB;
769 uint32 simm;
770 uint64 val;
771 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, simm, vrB);
772 PPC_OPC_ASSERT(vrB==0);
773
774 val = (simm & 0x10) ? (simm | 0xE0) : simm;
775 val |= (val << 8);
776 val |= (val << 16);
777 val |= (val << 32);
778
779 gCPU.vr[vrD].d[0] = val;
780 gCPU.vr[vrD].d[1] = val;
781 }
782
783 /* vspltish Vector Splat Immediate Signed Half Word
784 * v.248
785 */
786 void ppc_opc_vspltish()
787 {
788 VECTOR_DEBUG_COMMON;
789 int vrD, vrB;
790 uint32 simm;
791 uint64 val;
792 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, simm, vrB);
793 PPC_OPC_ASSERT(vrB==0);
794
795 val = (simm & 0x10) ? (simm | 0xFFE0) : simm;
796 val |= (val << 16);
797 val |= (val << 32);
798
799 gCPU.vr[vrD].d[0] = val;
800 gCPU.vr[vrD].d[1] = val;
801 }
802
803 /* vspltisw Vector Splat Immediate Signed Word
804 * v.249
805 */
806 void ppc_opc_vspltisw()
807 {
808 VECTOR_DEBUG_COMMON;
809 int vrD, vrB;
810 uint32 simm;
811 uint64 val;
812 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, simm, vrB);
813 PPC_OPC_ASSERT(vrB==0);
814
815 val = (simm & 0x10) ? (simm | 0xFFFFFFE0) : simm;
816 val |= (val << 32);
817
818 gCPU.vr[vrD].d[0] = val;
819 gCPU.vr[vrD].d[1] = val;
820 }
821
822 /* mfvscr Move from Vector Status and Control Register
823 * v.129
824 */
825 void ppc_opc_mfvscr()
826 {
827 VECTOR_DEBUG_COMMON;
828 int vrD, vrA, vrB;
829 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
830 PPC_OPC_ASSERT(vrA==0);
831 PPC_OPC_ASSERT(vrB==0);
832
833 VECT_W(gCPU.vr[vrD], 3) = gCPU.vscr;
834 VECT_W(gCPU.vr[vrD], 2) = 0;
835 VECT_D(gCPU.vr[vrD], 0) = 0;
836 }
837
838 /* mtvscr Move to Vector Status and Control Register
839 * v.130
840 */
841 void ppc_opc_mtvscr()
842 {
843 VECTOR_DEBUG_COMMON;
844 int vrD, vrA, vrB;
845 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
846 PPC_OPC_ASSERT(vrA==0);
847 PPC_OPC_ASSERT(vrD==0);
848
849 gCPU.vscr = VECT_W(gCPU.vr[vrB], 3);
850 }
851
852 /* vpkuhum Vector Pack Unsigned Half Word Unsigned Modulo
853 * v.224
854 */
855 void ppc_opc_vpkuhum()
856 {
857 VECTOR_DEBUG;
858 int vrD, vrA, vrB;
859 Vector_t r;
860 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
861
862 VECT_B(r, 0) = VECT_B(gCPU.vr[vrA], 1);
863 VECT_B(r, 1) = VECT_B(gCPU.vr[vrA], 3);
864 VECT_B(r, 2) = VECT_B(gCPU.vr[vrA], 5);
865 VECT_B(r, 3) = VECT_B(gCPU.vr[vrA], 7);
866 VECT_B(r, 4) = VECT_B(gCPU.vr[vrA], 9);
867 VECT_B(r, 5) = VECT_B(gCPU.vr[vrA],11);
868 VECT_B(r, 6) = VECT_B(gCPU.vr[vrA],13);
869 VECT_B(r, 7) = VECT_B(gCPU.vr[vrA],15);
870
871 VECT_B(r, 8) = VECT_B(gCPU.vr[vrB], 1);
872 VECT_B(r, 9) = VECT_B(gCPU.vr[vrB], 3);
873 VECT_B(r,10) = VECT_B(gCPU.vr[vrB], 5);
874 VECT_B(r,11) = VECT_B(gCPU.vr[vrB], 7);
875 VECT_B(r,12) = VECT_B(gCPU.vr[vrB], 9);
876 VECT_B(r,13) = VECT_B(gCPU.vr[vrB],11);
877 VECT_B(r,14) = VECT_B(gCPU.vr[vrB],13);
878 VECT_B(r,15) = VECT_B(gCPU.vr[vrB],15);
879
880 gCPU.vr[vrD] = r;
881 }
882
883 /* vpkuwum Vector Pack Unsigned Word Unsigned Modulo
884 * v.226
885 */
886 void ppc_opc_vpkuwum()
887 {
888 VECTOR_DEBUG;
889 int vrD, vrA, vrB;
890 Vector_t r;
891 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
892
893 VECT_H(r, 0) = VECT_H(gCPU.vr[vrA], 1);
894 VECT_H(r, 1) = VECT_H(gCPU.vr[vrA], 3);
895 VECT_H(r, 2) = VECT_H(gCPU.vr[vrA], 5);
896 VECT_H(r, 3) = VECT_H(gCPU.vr[vrA], 7);
897
898 VECT_H(r, 4) = VECT_H(gCPU.vr[vrB], 1);
899 VECT_H(r, 5) = VECT_H(gCPU.vr[vrB], 3);
900 VECT_H(r, 6) = VECT_H(gCPU.vr[vrB], 5);
901 VECT_H(r, 7) = VECT_H(gCPU.vr[vrB], 7);
902
903 gCPU.vr[vrD] = r;
904 }
905
906 /* vpkpx Vector Pack Pixel32
907 * v.219
908 */
909 void ppc_opc_vpkpx()
910 {
911 VECTOR_DEBUG;
912 int vrD, vrA, vrB;
913 Vector_t r;
914 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
915
916 VECT_H(r, 0) = PACK_PIXEL(VECT_W(gCPU.vr[vrA], 0));
917 VECT_H(r, 1) = PACK_PIXEL(VECT_W(gCPU.vr[vrA], 1));
918 VECT_H(r, 2) = PACK_PIXEL(VECT_W(gCPU.vr[vrA], 2));
919 VECT_H(r, 3) = PACK_PIXEL(VECT_W(gCPU.vr[vrA], 3));
920
921 VECT_H(r, 4) = PACK_PIXEL(VECT_W(gCPU.vr[vrB], 0));
922 VECT_H(r, 5) = PACK_PIXEL(VECT_W(gCPU.vr[vrB], 1));
923 VECT_H(r, 6) = PACK_PIXEL(VECT_W(gCPU.vr[vrB], 2));
924 VECT_H(r, 7) = PACK_PIXEL(VECT_W(gCPU.vr[vrB], 3));
925
926 gCPU.vr[vrD] = r;
927 }
928
929
930 /* vpkuhus Vector Pack Unsigned Half Word Unsigned Saturate
931 * v.225
932 */
933 void ppc_opc_vpkuhus()
934 {
935 VECTOR_DEBUG;
936 int vrD, vrA, vrB;
937 Vector_t r;
938 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
939
940 VECT_B(r, 0) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 0));
941 VECT_B(r, 1) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 1));
942 VECT_B(r, 2) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 2));
943 VECT_B(r, 3) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 3));
944 VECT_B(r, 4) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 4));
945 VECT_B(r, 5) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 5));
946 VECT_B(r, 6) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 6));
947 VECT_B(r, 7) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 7));
948
949 VECT_B(r, 8) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 0));
950 VECT_B(r, 9) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 1));
951 VECT_B(r,10) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 2));
952 VECT_B(r,11) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 3));
953 VECT_B(r,12) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 4));
954 VECT_B(r,13) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 5));
955 VECT_B(r,14) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 6));
956 VECT_B(r,15) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 7));
957
958 gCPU.vr[vrD] = r;
959 }
960
961 /* vpkshss Vector Pack Signed Half Word Signed Saturate
962 * v.220
963 */
964 void ppc_opc_vpkshss()
965 {
966 VECTOR_DEBUG;
967 int vrD, vrA, vrB;
968 Vector_t r;
969 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
970
971 VECT_B(r, 0) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 0));
972 VECT_B(r, 1) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 1));
973 VECT_B(r, 2) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 2));
974 VECT_B(r, 3) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 3));
975 VECT_B(r, 4) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 4));
976 VECT_B(r, 5) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 5));
977 VECT_B(r, 6) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 6));
978 VECT_B(r, 7) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 7));
979
980 VECT_B(r, 8) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 0));
981 VECT_B(r, 9) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 1));
982 VECT_B(r,10) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 2));
983 VECT_B(r,11) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 3));
984 VECT_B(r,12) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 4));
985 VECT_B(r,13) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 5));
986 VECT_B(r,14) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 6));
987 VECT_B(r,15) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 7));
988
989 gCPU.vr[vrD] = r;
990 }
991
992 /* vpkuwus Vector Pack Unsigned Word Unsigned Saturate
993 * v.227
994 */
995 void ppc_opc_vpkuwus()
996 {
997 VECTOR_DEBUG;
998 int vrD, vrA, vrB;
999 Vector_t r;
1000 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1001
1002 VECT_H(r, 0) = SATURATE_UH(VECT_W(gCPU.vr[vrA], 0));
1003 VECT_H(r, 1) = SATURATE_UH(VECT_W(gCPU.vr[vrA], 1));
1004 VECT_H(r, 2) = SATURATE_UH(VECT_W(gCPU.vr[vrA], 2));
1005 VECT_H(r, 3) = SATURATE_UH(VECT_W(gCPU.vr[vrA], 3));
1006
1007 VECT_H(r, 4) = SATURATE_UH(VECT_W(gCPU.vr[vrB], 0));
1008 VECT_H(r, 5) = SATURATE_UH(VECT_W(gCPU.vr[vrB], 1));
1009 VECT_H(r, 6) = SATURATE_UH(VECT_W(gCPU.vr[vrB], 2));
1010 VECT_H(r, 7) = SATURATE_UH(VECT_W(gCPU.vr[vrB], 3));
1011
1012 gCPU.vr[vrD] = r;
1013 }
1014
1015 /* vpkswss Vector Pack Signed Word Signed Saturate
1016 * v.222
1017 */
1018 void ppc_opc_vpkswss()
1019 {
1020 VECTOR_DEBUG;
1021 int vrD, vrA, vrB;
1022 Vector_t r;
1023 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1024
1025 VECT_H(r, 0) = SATURATE_SH(VECT_W(gCPU.vr[vrA], 0));
1026 VECT_H(r, 1) = SATURATE_SH(VECT_W(gCPU.vr[vrA], 1));
1027 VECT_H(r, 2) = SATURATE_SH(VECT_W(gCPU.vr[vrA], 2));
1028 VECT_H(r, 3) = SATURATE_SH(VECT_W(gCPU.vr[vrA], 3));
1029
1030 VECT_H(r, 4) = SATURATE_SH(VECT_W(gCPU.vr[vrB], 0));
1031 VECT_H(r, 5) = SATURATE_SH(VECT_W(gCPU.vr[vrB], 1));
1032 VECT_H(r, 6) = SATURATE_SH(VECT_W(gCPU.vr[vrB], 2));
1033 VECT_H(r, 7) = SATURATE_SH(VECT_W(gCPU.vr[vrB], 3));
1034
1035 gCPU.vr[vrD] = r;
1036 }
1037
1038 /* vpkshus Vector Pack Signed Half Word Unsigned Saturate
1039 * v.221
1040 */
1041 void ppc_opc_vpkshus()
1042 {
1043 VECTOR_DEBUG;
1044 int vrD, vrA, vrB;
1045 Vector_t r;
1046 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1047
1048 VECT_B(r, 0) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 0));
1049 VECT_B(r, 1) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 1));
1050 VECT_B(r, 2) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 2));
1051 VECT_B(r, 3) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 3));
1052 VECT_B(r, 4) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 4));
1053 VECT_B(r, 5) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 5));
1054 VECT_B(r, 6) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 6));
1055 VECT_B(r, 7) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 7));
1056
1057 VECT_B(r, 8) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 0));
1058 VECT_B(r, 9) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 1));
1059 VECT_B(r,10) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 2));
1060 VECT_B(r,11) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 3));
1061 VECT_B(r,12) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 4));
1062 VECT_B(r,13) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 5));
1063 VECT_B(r,14) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 6));
1064 VECT_B(r,15) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 7));
1065
1066 gCPU.vr[vrD] = r;
1067 }
1068
1069 /* vpkswus Vector Pack Signed Word Unsigned Saturate
1070 * v.223
1071 */
1072 void ppc_opc_vpkswus()
1073 {
1074 VECTOR_DEBUG;
1075 int vrD, vrA, vrB;
1076 Vector_t r;
1077 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1078
1079 VECT_H(r, 0) = SATURATE_USH(VECT_W(gCPU.vr[vrA], 0));
1080 VECT_H(r, 1) = SATURATE_USH(VECT_W(gCPU.vr[vrA], 1));
1081 VECT_H(r, 2) = SATURATE_USH(VECT_W(gCPU.vr[vrA], 2));
1082 VECT_H(r, 3) = SATURATE_USH(VECT_W(gCPU.vr[vrA], 3));
1083
1084 VECT_H(r, 4) = SATURATE_USH(VECT_W(gCPU.vr[vrB], 0));
1085 VECT_H(r, 5) = SATURATE_USH(VECT_W(gCPU.vr[vrB], 1));
1086 VECT_H(r, 6) = SATURATE_USH(VECT_W(gCPU.vr[vrB], 2));
1087 VECT_H(r, 7) = SATURATE_USH(VECT_W(gCPU.vr[vrB], 3));
1088
1089 gCPU.vr[vrD] = r;
1090 }
1091
1092 /* vupkhsb Vector Unpack High Signed Byte
1093 * v.277
1094 */
1095 void ppc_opc_vupkhsb()
1096 {
1097 VECTOR_DEBUG;
1098 int vrD, vrA, vrB;
1099 Vector_t r;
1100 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1101 PPC_OPC_ASSERT(vrA==0);
1102
1103 VECT_SH(r, 0) = VECT_SB(gCPU.vr[vrB], 0);
1104 VECT_SH(r, 1) = VECT_SB(gCPU.vr[vrB], 1);
1105 VECT_SH(r, 2) = VECT_SB(gCPU.vr[vrB], 2);
1106 VECT_SH(r, 3) = VECT_SB(gCPU.vr[vrB], 3);
1107 VECT_SH(r, 4) = VECT_SB(gCPU.vr[vrB], 4);
1108 VECT_SH(r, 5) = VECT_SB(gCPU.vr[vrB], 5);
1109 VECT_SH(r, 6) = VECT_SB(gCPU.vr[vrB], 6);
1110 VECT_SH(r, 7) = VECT_SB(gCPU.vr[vrB], 7);
1111
1112 gCPU.vr[vrD] = r;
1113 }
1114
1115 /* vupkhpx Vector Unpack High Pixel32
1116 * v.279
1117 */
1118 void ppc_opc_vupkhpx()
1119 {
1120 VECTOR_DEBUG;
1121 int vrD, vrA, vrB;
1122 Vector_t r;
1123 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1124 PPC_OPC_ASSERT(vrA==0);
1125
1126 VECT_W(r, 0) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 0));
1127 VECT_W(r, 1) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 1));
1128 VECT_W(r, 2) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 2));
1129 VECT_W(r, 3) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 3));
1130
1131 gCPU.vr[vrD] = r;
1132 }
1133
1134 /* vupkhsh Vector Unpack High Signed Half Word
1135 * v.278
1136 */
1137 void ppc_opc_vupkhsh()
1138 {
1139 VECTOR_DEBUG;
1140 int vrD, vrA, vrB;
1141 Vector_t r;
1142 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1143 PPC_OPC_ASSERT(vrA==0);
1144
1145 VECT_SW(r, 0) = VECT_SH(gCPU.vr[vrB], 0);
1146 VECT_SW(r, 1) = VECT_SH(gCPU.vr[vrB], 1);
1147 VECT_SW(r, 2) = VECT_SH(gCPU.vr[vrB], 2);
1148 VECT_SW(r, 3) = VECT_SH(gCPU.vr[vrB], 3);
1149
1150 gCPU.vr[vrD] = r;
1151 }
1152
1153 /* vupklsb Vector Unpack Low Signed Byte
1154 * v.280
1155 */
1156 void ppc_opc_vupklsb()
1157 {
1158 VECTOR_DEBUG;
1159 int vrD, vrA, vrB;
1160 Vector_t r;
1161 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1162 PPC_OPC_ASSERT(vrA==0);
1163
1164 VECT_SH(r, 0) = VECT_SB(gCPU.vr[vrB], 8);
1165 VECT_SH(r, 1) = VECT_SB(gCPU.vr[vrB], 9);
1166 VECT_SH(r, 2) = VECT_SB(gCPU.vr[vrB],10);
1167 VECT_SH(r, 3) = VECT_SB(gCPU.vr[vrB],11);
1168 VECT_SH(r, 4) = VECT_SB(gCPU.vr[vrB],12);
1169 VECT_SH(r, 5) = VECT_SB(gCPU.vr[vrB],13);
1170 VECT_SH(r, 6) = VECT_SB(gCPU.vr[vrB],14);
1171 VECT_SH(r, 7) = VECT_SB(gCPU.vr[vrB],15);
1172
1173 gCPU.vr[vrD] = r;
1174 }
1175
1176 /* vupklpx Vector Unpack Low Pixel32
1177 * v.279
1178 */
1179 void ppc_opc_vupklpx()
1180 {
1181 VECTOR_DEBUG;
1182 int vrD, vrA, vrB;
1183 Vector_t r;
1184 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1185 PPC_OPC_ASSERT(vrA==0);
1186
1187 VECT_W(r, 0) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 4));
1188 VECT_W(r, 1) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 5));
1189 VECT_W(r, 2) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 6));
1190 VECT_W(r, 3) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 7));
1191
1192 gCPU.vr[vrD] = r;
1193 }
1194
1195 /* vupklsh Vector Unpack Low Signed Half Word
1196 * v.281
1197 */
1198 void ppc_opc_vupklsh()
1199 {
1200 VECTOR_DEBUG;
1201 int vrD, vrA, vrB;
1202 Vector_t r;
1203 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1204 PPC_OPC_ASSERT(vrA==0);
1205
1206 VECT_SW(r, 0) = VECT_SH(gCPU.vr[vrB], 4);
1207 VECT_SW(r, 1) = VECT_SH(gCPU.vr[vrB], 5);
1208 VECT_SW(r, 2) = VECT_SH(gCPU.vr[vrB], 6);
1209 VECT_SW(r, 3) = VECT_SH(gCPU.vr[vrB], 7);
1210
1211 gCPU.vr[vrD] = r;
1212 }
1213
1214 /* vaddubm Vector Add Unsigned Byte Modulo
1215 * v.141
1216 */
1217 void ppc_opc_vaddubm()
1218 {
1219 VECTOR_DEBUG;
1220 int vrD, vrA, vrB;
1221 uint8 res;
1222 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1223
1224 for (int i=0; i<16; i++) {
1225 res = gCPU.vr[vrA].b[i] + gCPU.vr[vrB].b[i];
1226 gCPU.vr[vrD].b[i] = res;
1227 }
1228 }
1229
1230 /* vadduhm Vector Add Unsigned Half Word Modulo
1231 * v.143
1232 */
1233 void ppc_opc_vadduhm()
1234 {
1235 VECTOR_DEBUG;
1236 int vrD, vrA, vrB;
1237 uint16 res;
1238 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1239
1240 for (int i=0; i<8; i++) {
1241 res = gCPU.vr[vrA].h[i] + gCPU.vr[vrB].h[i];
1242 gCPU.vr[vrD].h[i] = res;
1243 }
1244 }
1245
1246 /* vadduwm Vector Add Unsigned Word Modulo
1247 * v.145
1248 */
1249 void ppc_opc_vadduwm()
1250 {
1251 VECTOR_DEBUG;
1252 int vrD, vrA, vrB;
1253 uint32 res;
1254 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1255
1256 for (int i=0; i<4; i++) {
1257 res = gCPU.vr[vrA].w[i] + gCPU.vr[vrB].w[i];
1258 gCPU.vr[vrD].w[i] = res;
1259 }
1260 }
1261
1262 /* vaddfp Vector Add Float Point
1263 * v.137
1264 */
1265 void ppc_opc_vaddfp()
1266 {
1267 VECTOR_DEBUG;
1268 int vrD, vrA, vrB;
1269 float res;
1270 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1271
1272 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
1273 res = gCPU.vr[vrA].f[i] + gCPU.vr[vrB].f[i];
1274 gCPU.vr[vrD].f[i] = res;
1275 }
1276 }
1277
1278 /* vaddcuw Vector Add Carryout Unsigned Word
1279 * v.136
1280 */
1281 void ppc_opc_vaddcuw()
1282 {
1283 VECTOR_DEBUG;
1284 int vrD, vrA, vrB;
1285 uint32 res;
1286 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1287
1288 for (int i=0; i<4; i++) {
1289 res = gCPU.vr[vrA].w[i] + gCPU.vr[vrB].w[i];
1290 gCPU.vr[vrD].w[i] = (res < gCPU.vr[vrA].w[i]) ? 1 : 0;
1291 }
1292 }
1293
1294 /* vaddubs Vector Add Unsigned Byte Saturate
1295 * v.142
1296 */
1297 void ppc_opc_vaddubs()
1298 {
1299 VECTOR_DEBUG;
1300 int vrD, vrA, vrB;
1301 uint16 res;
1302 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1303
1304 for (int i=0; i<16; i++) {
1305 res = (uint16)gCPU.vr[vrA].b[i] + (uint16)gCPU.vr[vrB].b[i];
1306 gCPU.vr[vrD].b[i] = SATURATE_UB(res);
1307 }
1308 }
1309
1310 /* vaddsbs Vector Add Signed Byte Saturate
1311 * v.138
1312 */
1313 void ppc_opc_vaddsbs()
1314 {
1315 VECTOR_DEBUG;
1316 int vrD, vrA, vrB;
1317 sint16 res;
1318 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1319
1320 for (int i=0; i<16; i++) {
1321 res = (sint16)gCPU.vr[vrA].sb[i] + (sint16)gCPU.vr[vrB].sb[i];
1322 gCPU.vr[vrD].b[i] = SATURATE_SB(res);
1323 }
1324 }
1325
1326 /* vadduhs Vector Add Unsigned Half Word Saturate
1327 * v.144
1328 */
1329 void ppc_opc_vadduhs()
1330 {
1331 VECTOR_DEBUG;
1332 int vrD, vrA, vrB;
1333 uint32 res;
1334 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1335
1336 for (int i=0; i<8; i++) {
1337 res = (uint32)gCPU.vr[vrA].h[i] + (uint32)gCPU.vr[vrB].h[i];
1338 gCPU.vr[vrD].h[i] = SATURATE_UH(res);
1339 }
1340 }
1341
1342 /* vaddshs Vector Add Signed Half Word Saturate
1343 * v.139
1344 */
1345 void ppc_opc_vaddshs()
1346 {
1347 VECTOR_DEBUG;
1348 int vrD, vrA, vrB;
1349 sint32 res;
1350 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1351
1352 for (int i=0; i<8; i++) {
1353 res = (sint32)gCPU.vr[vrA].sh[i] + (sint32)gCPU.vr[vrB].sh[i];
1354 gCPU.vr[vrD].h[i] = SATURATE_SH(res);
1355 }
1356 }
1357
1358 /* vadduws Vector Add Unsigned Word Saturate
1359 * v.146
1360 */
1361 void ppc_opc_vadduws()
1362 {
1363 VECTOR_DEBUG;
1364 int vrD, vrA, vrB;
1365 uint32 res;
1366 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1367
1368 for (int i=0; i<4; i++) {
1369 res = gCPU.vr[vrA].w[i] + gCPU.vr[vrB].w[i];
1370
1371 // We do this to prevent us from having to do 64-bit math
1372 if (res < gCPU.vr[vrA].w[i]) {
1373 res = 0xFFFFFFFF;
1374 gCPU.vscr |= VSCR_SAT;
1375 }
1376
1377 /* 64-bit math | 32-bit hack
1378 * ------------------------+-------------------------------------
1379 * add, addc (a+b) | add (a+b)
1380 * sub, subb (r>ub) | sub (r<a)
1381 */
1382
1383 gCPU.vr[vrD].w[i] = res;
1384 }
1385 }
1386
1387 /* vaddsws Vector Add Signed Word Saturate
1388 * v.140
1389 */
1390 void ppc_opc_vaddsws()
1391 {
1392 VECTOR_DEBUG;
1393 int vrD, vrA, vrB;
1394 uint32 res;
1395 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1396
1397 for (int i=0; i<4; i++) {
1398 res = gCPU.vr[vrA].w[i] + gCPU.vr[vrB].w[i];
1399
1400 // We do this to prevent us from having to do 64-bit math
1401 if (((gCPU.vr[vrA].w[i] ^ gCPU.vr[vrB].w[i]) & SIGN32) == 0) {
1402 // the signs of both operands are the same
1403
1404 if (((res ^ gCPU.vr[vrA].w[i]) & SIGN32) != 0) {
1405 // sign of result != sign of operands
1406
1407 // if res is negative, should have been positive
1408 res = (res & SIGN32) ? (SIGN32 - 1) : SIGN32;
1409 gCPU.vscr |= VSCR_SAT;
1410 }
1411 }
1412
1413 /* 64-bit math | 32-bit hack
1414 * ------------------------+-------------------------------------
1415 * add, addc (a+b) | add (a+b)
1416 * sub, subb (r>ub) | xor, and (sign == sign)
1417 * sub, subb (r<lb) | xor, and (sign != sign)
1418 * | and (which)
1419 */
1420
1421 gCPU.vr[vrD].w[i] = res;
1422 }
1423 }
1424
1425 /* vsububm Vector Subtract Unsigned Byte Modulo
1426 * v.265
1427 */
1428 void ppc_opc_vsububm()
1429 {
1430 VECTOR_DEBUG;
1431 int vrD, vrA, vrB;
1432 uint8 res;
1433 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1434
1435 for (int i=0; i<16; i++) {
1436 res = gCPU.vr[vrA].b[i] - gCPU.vr[vrB].b[i];
1437 gCPU.vr[vrD].b[i] = res;
1438 }
1439 }
1440
1441 /* vsubuhm Vector Subtract Unsigned Half Word Modulo
1442 * v.267
1443 */
1444 void ppc_opc_vsubuhm()
1445 {
1446 VECTOR_DEBUG;
1447 int vrD, vrA, vrB;
1448 uint16 res;
1449 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1450
1451 for (int i=0; i<8; i++) {
1452 res = gCPU.vr[vrA].h[i] - gCPU.vr[vrB].h[i];
1453 gCPU.vr[vrD].h[i] = res;
1454 }
1455 }
1456
1457 /* vsubuwm Vector Subtract Unsigned Word Modulo
1458 * v.269
1459 */
1460 void ppc_opc_vsubuwm()
1461 {
1462 VECTOR_DEBUG;
1463 int vrD, vrA, vrB;
1464 uint32 res;
1465 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1466
1467 for (int i=0; i<4; i++) {
1468 res = gCPU.vr[vrA].w[i] - gCPU.vr[vrB].w[i];
1469 gCPU.vr[vrD].w[i] = res;
1470 }
1471 }
1472
1473 /* vsubfp Vector Subtract Float Point
1474 * v.261
1475 */
1476 void ppc_opc_vsubfp()
1477 {
1478 VECTOR_DEBUG;
1479 int vrD, vrA, vrB;
1480 float res;
1481 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1482
1483 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
1484 res = gCPU.vr[vrA].f[i] - gCPU.vr[vrB].f[i];
1485 gCPU.vr[vrD].f[i] = res;
1486 }
1487 }
1488
1489 /* vsubcuw Vector Subtract Carryout Unsigned Word
1490 * v.260
1491 */
1492 void ppc_opc_vsubcuw()
1493 {
1494 VECTOR_DEBUG;
1495 int vrD, vrA, vrB;
1496 uint32 res;
1497 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1498
1499 for (int i=0; i<4; i++) {
1500 res = gCPU.vr[vrA].w[i] - gCPU.vr[vrB].w[i];
1501 gCPU.vr[vrD].w[i] = (res <= gCPU.vr[vrA].w[i]) ? 1 : 0;
1502 }
1503 }
1504
1505 /* vsububs Vector Subtract Unsigned Byte Saturate
1506 * v.266
1507 */
1508 void ppc_opc_vsububs()
1509 {
1510 VECTOR_DEBUG;
1511 int vrD, vrA, vrB;
1512 uint16 res;
1513 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1514
1515 for (int i=0; i<16; i++) {
1516 res = (uint16)gCPU.vr[vrA].b[i] - (uint16)gCPU.vr[vrB].b[i];
1517
1518 gCPU.vr[vrD].b[i] = SATURATE_0B(res);
1519 }
1520 }
1521
1522 /* vsubsbs Vector Subtract Signed Byte Saturate
1523 * v.262
1524 */
1525 void ppc_opc_vsubsbs()
1526 {
1527 VECTOR_DEBUG;
1528 int vrD, vrA, vrB;
1529 sint16 res;
1530 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1531
1532 for (int i=0; i<16; i++) {
1533 res = (sint16)gCPU.vr[vrA].sb[i] - (sint16)gCPU.vr[vrB].sb[i];
1534
1535 gCPU.vr[vrD].sb[i] = SATURATE_SB(res);
1536 }
1537 }
1538
1539 /* vsubuhs Vector Subtract Unsigned Half Word Saturate
1540 * v.268
1541 */
1542 void ppc_opc_vsubuhs()
1543 {
1544 VECTOR_DEBUG;
1545 int vrD, vrA, vrB;
1546 uint32 res;
1547 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1548
1549 for (int i=0; i<8; i++) {
1550 res = (uint32)gCPU.vr[vrA].h[i] - (uint32)gCPU.vr[vrB].h[i];
1551
1552 gCPU.vr[vrD].h[i] = SATURATE_0H(res);
1553 }
1554 }
1555
1556 /* vsubshs Vector Subtract Signed Half Word Saturate
1557 * v.263
1558 */
1559 void ppc_opc_vsubshs()
1560 {
1561 VECTOR_DEBUG;
1562 int vrD, vrA, vrB;
1563 sint32 res;
1564 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1565
1566 for (int i=0; i<8; i++) {
1567 res = (sint32)gCPU.vr[vrA].sh[i] - (sint32)gCPU.vr[vrB].sh[i];
1568
1569 gCPU.vr[vrD].sh[i] = SATURATE_SH(res);
1570 }
1571 }
1572
1573 /* vsubuws Vector Subtract Unsigned Word Saturate
1574 * v.270
1575 */
1576 void ppc_opc_vsubuws()
1577 {
1578 VECTOR_DEBUG;
1579 int vrD, vrA, vrB;
1580 uint32 res;
1581 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1582
1583 for (int i=0; i<4; i++) {
1584 res = gCPU.vr[vrA].w[i] - gCPU.vr[vrB].w[i];
1585
1586 // We do this to prevent us from having to do 64-bit math
1587 if (res > gCPU.vr[vrA].w[i]) {
1588 res = 0;
1589 gCPU.vscr |= VSCR_SAT;
1590 }
1591
1592 /* 64-bit math | 32-bit hack
1593 * ------------------------+-------------------------------------
1594 * sub, subb (a+b) | sub (a+b)
1595 * sub, subb (r>ub) | sub (r<a)
1596 */
1597
1598 gCPU.vr[vrD].w[i] = res;
1599 }
1600 }
1601
1602 /* vsubsws Vector Subtract Signed Word Saturate
1603 * v.264
1604 */
1605 void ppc_opc_vsubsws()
1606 {
1607 VECTOR_DEBUG;
1608 int vrD, vrA, vrB;
1609 uint32 res, tmp;
1610 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1611
1612 for (int i=0; i<4; i++) {
1613 tmp = -gCPU.vr[vrB].w[i];
1614 res = gCPU.vr[vrA].w[i] + tmp;
1615
1616 // We do this to prevent us from having to do 64-bit math
1617 if (((gCPU.vr[vrA].w[i] ^ tmp) & SIGN32) == 0) {
1618 // the signs of both operands are the same
1619
1620 if (((res ^ tmp) & SIGN32) != 0) {
1621 // sign of result != sign of operands
1622
1623 // if res is negative, should have been positive
1624 res = (res & SIGN32) ? (SIGN32 - 1) : SIGN32;
1625 gCPU.vscr |= VSCR_SAT;
1626 }
1627 }
1628
1629 /* 64-bit math | 32-bit hack
1630 * ------------------------+-------------------------------------
1631 * sub, subc (a+b) | neg, add (a-b)
1632 * sub, subb (r>ub) | xor, and (sign == sign)
1633 * sub, subb (r<lb) | xor, and (sign != sign)
1634 * | and (which)
1635 */
1636
1637 gCPU.vr[vrD].w[i] = res;
1638 }
1639 }
1640
1641 /* vmuleub Vector Multiply Even Unsigned Byte
1642 * v.209
1643 */
1644 void ppc_opc_vmuleub()
1645 {
1646 VECTOR_DEBUG;
1647 int vrD, vrA, vrB;
1648 uint16 res;
1649 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1650
1651 for (int i=0; i<8; i++) {
1652 res = (uint16)gCPU.vr[vrA].b[VECT_EVEN(i)] *
1653 (uint16)gCPU.vr[vrB].b[VECT_EVEN(i)];
1654
1655 gCPU.vr[vrD].h[i] = res;
1656 }
1657 }
1658
1659 /* vmulesb Vector Multiply Even Signed Byte
1660 * v.207
1661 */
1662 void ppc_opc_vmulesb()
1663 {
1664 VECTOR_DEBUG;
1665 int vrD, vrA, vrB;
1666 sint16 res;
1667 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1668
1669 for (int i=0; i<8; i++) {
1670 res = (sint16)gCPU.vr[vrA].sb[VECT_EVEN(i)] *
1671 (sint16)gCPU.vr[vrB].sb[VECT_EVEN(i)];
1672
1673 gCPU.vr[vrD].sh[i] = res;
1674 }
1675 }
1676
1677 /* vmuleuh Vector Multiply Even Unsigned Half Word
1678 * v.210
1679 */
1680 void ppc_opc_vmuleuh()
1681 {
1682 VECTOR_DEBUG;
1683 int vrD, vrA, vrB;
1684 uint32 res;
1685 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1686
1687 for (int i=0; i<4; i++) {
1688 res = (uint32)gCPU.vr[vrA].h[VECT_EVEN(i)] *
1689 (uint32)gCPU.vr[vrB].h[VECT_EVEN(i)];
1690
1691 gCPU.vr[vrD].w[i] = res;
1692 }
1693 }
1694
1695 /* vmulesh Vector Multiply Even Signed Half Word
1696 * v.208
1697 */
1698 void ppc_opc_vmulesh()
1699 {
1700 VECTOR_DEBUG;
1701 int vrD, vrA, vrB;
1702 sint32 res;
1703 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1704
1705 for (int i=0; i<4; i++) {
1706 res = (sint32)gCPU.vr[vrA].sh[VECT_EVEN(i)] *
1707 (sint32)gCPU.vr[vrB].sh[VECT_EVEN(i)];
1708
1709 gCPU.vr[vrD].sw[i] = res;
1710 }
1711 }
1712
1713 /* vmuloub Vector Multiply Odd Unsigned Byte
1714 * v.213
1715 */
1716 void ppc_opc_vmuloub()
1717 {
1718 VECTOR_DEBUG;
1719 int vrD, vrA, vrB;
1720 uint16 res;
1721 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1722
1723 for (int i=0; i<8; i++) {
1724 res = (uint16)gCPU.vr[vrA].b[VECT_ODD(i)] *
1725 (uint16)gCPU.vr[vrB].b[VECT_ODD(i)];
1726
1727 gCPU.vr[vrD].h[i] = res;
1728 }
1729 }
1730
1731 /* vmulosb Vector Multiply Odd Signed Byte
1732 * v.211
1733 */
1734 void ppc_opc_vmulosb()
1735 {
1736 VECTOR_DEBUG;
1737 int vrD, vrA, vrB;
1738 sint16 res;
1739 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1740
1741 for (int i=0; i<8; i++) {
1742 res = (sint16)gCPU.vr[vrA].sb[VECT_ODD(i)] *
1743 (sint16)gCPU.vr[vrB].sb[VECT_ODD(i)];
1744
1745 gCPU.vr[vrD].sh[i] = res;
1746 }
1747 }
1748
1749 /* vmulouh Vector Multiply Odd Unsigned Half Word
1750 * v.214
1751 */
1752 void ppc_opc_vmulouh()
1753 {
1754 VECTOR_DEBUG;
1755 int vrD, vrA, vrB;
1756 uint32 res;
1757 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1758
1759 for (int i=0; i<4; i++) {
1760 res = (uint32)gCPU.vr[vrA].h[VECT_ODD(i)] *
1761 (uint32)gCPU.vr[vrB].h[VECT_ODD(i)];
1762
1763 gCPU.vr[vrD].w[i] = res;
1764 }
1765 }
1766
1767 /* vmulosh Vector Multiply Odd Signed Half Word
1768 * v.212
1769 */
1770 void ppc_opc_vmulosh()
1771 {
1772 VECTOR_DEBUG;
1773 int vrD, vrA, vrB;
1774 sint32 res;
1775 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1776
1777 for (int i=0; i<4; i++) {
1778 res = (sint32)gCPU.vr[vrA].sh[VECT_ODD(i)] *
1779 (sint32)gCPU.vr[vrB].sh[VECT_ODD(i)];
1780
1781 gCPU.vr[vrD].sw[i] = res;
1782 }
1783 }
1784
1785 /* vmaddfp Vector Multiply Add Floating Point
1786 * v.177
1787 */
1788 void ppc_opc_vmaddfp()
1789 {
1790 VECTOR_DEBUG;
1791 int vrD, vrA, vrB, vrC;
1792 double res;
1793 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1794
1795 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
1796 res = (double)gCPU.vr[vrA].f[i] * (double)gCPU.vr[vrC].f[i];
1797
1798 res = (double)gCPU.vr[vrB].f[i] + res;
1799
1800 gCPU.vr[vrD].f[i] = (float)res;
1801 }
1802 }
1803
1804 /* vmhaddshs Vector Multiply High and Add Signed Half Word Saturate
1805 * v.185
1806 */
1807 void ppc_opc_vmhaddshs()
1808 {
1809 VECTOR_DEBUG;
1810 int vrD, vrA, vrB, vrC;
1811 sint32 prod;
1812 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1813
1814 for (int i=0; i<8; i++) {
1815 prod = (sint32)gCPU.vr[vrA].sh[i] * (sint32)gCPU.vr[vrB].sh[i];
1816
1817 prod = (prod >> 15) + (sint32)gCPU.vr[vrC].sh[i];
1818
1819 gCPU.vr[vrD].sh[i] = SATURATE_SH(prod);
1820 }
1821 }
1822
1823 /* vmladduhm Vector Multiply Low and Add Unsigned Half Word Modulo
1824 * v.194
1825 */
1826 void ppc_opc_vmladduhm()
1827 {
1828 VECTOR_DEBUG;
1829 int vrD, vrA, vrB, vrC;
1830 uint32 prod;
1831 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1832
1833 for (int i=0; i<8; i++) {
1834 prod = (uint32)gCPU.vr[vrA].h[i] * (uint32)gCPU.vr[vrB].h[i];
1835
1836 prod = prod + (uint32)gCPU.vr[vrC].h[i];
1837
1838 gCPU.vr[vrD].h[i] = prod;
1839 }
1840 }
1841
1842 /* vmhraddshs Vector Multiply High Round and Add Signed Half Word Saturate
1843 * v.186
1844 */
1845 void ppc_opc_vmhraddshs()
1846 {
1847 VECTOR_DEBUG;
1848 int vrD, vrA, vrB, vrC;
1849 sint32 prod;
1850 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1851
1852 for (int i=0; i<8; i++) {
1853 prod = (sint32)gCPU.vr[vrA].sh[i] * (sint32)gCPU.vr[vrB].sh[i];
1854
1855 prod += 0x4000;
1856 prod = (prod >> 15) + (sint32)gCPU.vr[vrC].sh[i];
1857
1858 gCPU.vr[vrD].sh[i] = SATURATE_SH(prod);
1859 }
1860 }
1861
1862 /* vmsumubm Vector Multiply Sum Unsigned Byte Modulo
1863 * v.204
1864 */
1865 void ppc_opc_vmsumubm()
1866 {
1867 VECTOR_DEBUG;
1868 int vrD, vrA, vrB, vrC;
1869 uint32 temp;
1870 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1871
1872 for (int i=0; i<4; i++) {
1873 temp = gCPU.vr[vrC].w[i];
1874
1875 temp += (uint16)gCPU.vr[vrA].b[i<<2] *
1876 (uint16)gCPU.vr[vrB].b[i<<2];
1877
1878 temp += (uint16)gCPU.vr[vrA].b[(i<<2)+1] *
1879 (uint16)gCPU.vr[vrB].b[(i<<2)+1];
1880
1881 temp += (uint16)gCPU.vr[vrA].b[(i<<2)+2] *
1882 (uint16)gCPU.vr[vrB].b[(i<<2)+2];
1883
1884 temp += (uint16)gCPU.vr[vrA].b[(i<<2)+3] *
1885 (uint16)gCPU.vr[vrB].b[(i<<2)+3];
1886
1887 gCPU.vr[vrD].w[i] = temp;
1888 }
1889 }
1890
1891 /* vmsumuhm Vector Multiply Sum Unsigned Half Word Modulo
1892 * v.205
1893 */
1894 void ppc_opc_vmsumuhm()
1895 {
1896 VECTOR_DEBUG;
1897 int vrD, vrA, vrB, vrC;
1898 uint32 temp;
1899 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1900
1901 for (int i=0; i<4; i++) {
1902 temp = gCPU.vr[vrC].w[i];
1903
1904 temp += (uint32)gCPU.vr[vrA].h[i<<1] *
1905 (uint32)gCPU.vr[vrB].h[i<<1];
1906 temp += (uint32)gCPU.vr[vrA].h[(i<<1)+1] *
1907 (uint32)gCPU.vr[vrB].h[(i<<1)+1];
1908
1909 gCPU.vr[vrD].w[i] = temp;
1910 }
1911 }
1912
1913 /* vmsummbm Vector Multiply Sum Mixed-Sign Byte Modulo
1914 * v.201
1915 */
1916 void ppc_opc_vmsummbm()
1917 {
1918 VECTOR_DEBUG;
1919 int vrD, vrA, vrB, vrC;
1920 sint32 temp;
1921 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1922
1923 for (int i=0; i<4; i++) {
1924 temp = gCPU.vr[vrC].sw[i];
1925
1926 temp += (sint16)gCPU.vr[vrA].sb[i<<2] *
1927 (uint16)gCPU.vr[vrB].b[i<<2];
1928 temp += (sint16)gCPU.vr[vrA].sb[(i<<2)+1] *
1929 (uint16)gCPU.vr[vrB].b[(i<<2)+1];
1930 temp += (sint16)gCPU.vr[vrA].sb[(i<<2)+2] *
1931 (uint16)gCPU.vr[vrB].b[(i<<2)+2];
1932 temp += (sint16)gCPU.vr[vrA].sb[(i<<2)+3] *
1933 (uint16)gCPU.vr[vrB].b[(i<<2)+3];
1934
1935 gCPU.vr[vrD].sw[i] = temp;
1936 }
1937 }
1938
1939 /* vmsumshm Vector Multiply Sum Signed Half Word Modulo
1940 * v.202
1941 */
1942 void ppc_opc_vmsumshm()
1943 {
1944 VECTOR_DEBUG;
1945 int vrD, vrA, vrB, vrC;
1946 sint32 temp;
1947 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1948
1949 for (int i=0; i<4; i++) {
1950 temp = gCPU.vr[vrC].sw[i];
1951
1952 temp += (sint32)gCPU.vr[vrA].sh[i<<1] *
1953 (sint32)gCPU.vr[vrB].sh[i<<1];
1954 temp += (sint32)gCPU.vr[vrA].sh[(i<<1)+1] *
1955 (sint32)gCPU.vr[vrB].sh[(i<<1)+1];
1956
1957 gCPU.vr[vrD].sw[i] = temp;
1958 }
1959 }
1960
1961 /* vmsumuhs Vector Multiply Sum Unsigned Half Word Saturate
1962 * v.206
1963 */
1964 void ppc_opc_vmsumuhs()
1965 {
1966 VECTOR_DEBUG;
1967 int vrD, vrA, vrB, vrC;
1968 uint64 temp;
1969 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1970
1971 /* For this, there's no way to get around 64-bit math. If we use
1972 * the hacks used before, then we have to do it so often, that
1973 * we'll outpace the 64-bit math in execution time.
1974 */
1975 for (int i=0; i<4; i++) {
1976 temp = gCPU.vr[vrC].w[i];
1977
1978 temp += (uint32)gCPU.vr[vrA].h[i<<1] *
1979 (uint32)gCPU.vr[vrB].h[i<<1];
1980
1981 temp += (uint32)gCPU.vr[vrA].h[(i<<1)+1] *
1982 (uint32)gCPU.vr[vrB].h[(i<<1)+1];
1983
1984 gCPU.vr[vrD].w[i] = SATURATE_UW(temp);
1985 }
1986 }
1987
1988 /* vmsumshs Vector Multiply Sum Signed Half Word Saturate
1989 * v.203
1990 */
1991 void ppc_opc_vmsumshs()
1992 {
1993 VECTOR_DEBUG;
1994 int vrD, vrA, vrB, vrC;
1995 sint64 temp;
1996 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1997
1998 /* For this, there's no way to get around 64-bit math. If we use
1999 * the hacks used before, then we have to do it so often, that
2000 * we'll outpace the 64-bit math in execution time.
2001 */
2002
2003 for (int i=0; i<4; i++) {
2004 temp = gCPU.vr[vrC].sw[i];
2005
2006 temp += (sint32)gCPU.vr[vrA].sh[i<<1] *
2007 (sint32)gCPU.vr[vrB].sh[i<<1];
2008 temp += (sint32)gCPU.vr[vrA].sh[(i<<1)+1] *
2009 (sint32)gCPU.vr[vrB].sh[(i<<1)+1];
2010
2011 gCPU.vr[vrD].sw[i] = SATURATE_SW(temp);
2012 }
2013 }
2014
2015 /* vsum4ubs Vector Sum Across Partial (1/4) Unsigned Byte Saturate
2016 * v.275
2017 */
2018 void ppc_opc_vsum4ubs()
2019 {
2020 VECTOR_DEBUG;
2021 int vrD, vrA, vrB;
2022 uint64 res;
2023 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2024
2025 /* For this, there's no way to get around 64-bit math. If we use
2026 * the hacks used before, then we have to do it so often, that
2027 * we'll outpace the 64-bit math in execution time.
2028 */
2029
2030 for (int i=0; i<4; i++) {
2031 res = (uint64)gCPU.vr[vrB].w[i];
2032
2033 res += (uint64)gCPU.vr[vrA].b[(i<<2)];
2034 res += (uint64)gCPU.vr[vrA].b[(i<<2)+1];
2035 res += (uint64)gCPU.vr[vrA].b[(i<<2)+2];
2036 res += (uint64)gCPU.vr[vrA].b[(i<<2)+3];
2037
2038 gCPU.vr[vrD].w[i] = SATURATE_UW(res);
2039 }
2040 }
2041
2042 /* vsum4sbs Vector Sum Across Partial (1/4) Signed Byte Saturate
2043 * v.273
2044 */
2045 void ppc_opc_vsum4sbs()
2046 {
2047 VECTOR_DEBUG;
2048 int vrD, vrA, vrB;
2049 sint64 res;
2050 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2051
2052 for (int i=0; i<4; i++) {
2053 res = (sint64)gCPU.vr[vrB].sw[i];
2054
2055 res += (sint64)gCPU.vr[vrA].sb[(i<<2)];
2056 res += (sint64)gCPU.vr[vrA].sb[(i<<2)+1];
2057 res += (sint64)gCPU.vr[vrA].sb[(i<<2)+2];
2058 res += (sint64)gCPU.vr[vrA].sb[(i<<2)+3];
2059
2060 gCPU.vr[vrD].sw[i] = SATURATE_SW(res);
2061 }
2062 }
2063
2064 /* vsum4shs Vector Sum Across Partial (1/4) Signed Half Word Saturate
2065 * v.274
2066 */
2067 void ppc_opc_vsum4shs()
2068 {
2069 VECTOR_DEBUG;
2070 int vrD, vrA, vrB;
2071 sint64 res;
2072 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2073
2074 for (int i=0; i<4; i++) {
2075 res = (sint64)gCPU.vr[vrB].sw[i];
2076
2077 res += (sint64)gCPU.vr[vrA].sh[(i<<1)];
2078 res += (sint64)gCPU.vr[vrA].sh[(i<<1)+1];
2079
2080 gCPU.vr[vrD].sw[i] = SATURATE_SW(res);
2081 }
2082 }
2083
2084 /* vsum2sws Vector Sum Across Partial (1/2) Signed Word Saturate
2085 * v.272
2086 */
2087 void ppc_opc_vsum2sws()
2088 {
2089 VECTOR_DEBUG;
2090 int vrD, vrA, vrB;
2091 sint64 res;
2092 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2093
2094 res = (sint64)gCPU.vr[vrA].sw[0] + (sint64)gCPU.vr[vrA].sw[1];
2095 res += (sint64)gCPU.vr[vrB].sw[VECT_ODD(0)];
2096
2097 gCPU.vr[vrD].w[VECT_ODD(0)] = SATURATE_SW(res);
2098 gCPU.vr[vrD].w[VECT_EVEN(0)] = 0;
2099
2100 res = (sint64)gCPU.vr[vrA].sw[2] + (sint64)gCPU.vr[vrA].sw[3];
2101 res += (sint64)gCPU.vr[vrB].sw[VECT_ODD(1)];
2102
2103 gCPU.vr[vrD].w[VECT_ODD(1)] = SATURATE_SW(res);
2104 gCPU.vr[vrD].w[VECT_EVEN(1)] = 0;
2105 }
2106
2107 /* vsumsws Vector Sum Across Signed Word Saturate
2108 * v.271
2109 */
2110 void ppc_opc_vsumsws()
2111 {
2112 VECTOR_DEBUG;
2113 int vrD, vrA, vrB;
2114 sint64 res;
2115 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2116
2117 res = (sint64)gCPU.vr[vrA].sw[0] + (sint64)gCPU.vr[vrA].sw[1];
2118 res += (sint64)gCPU.vr[vrA].sw[2] + (sint64)gCPU.vr[vrA].sw[3];
2119
2120 res += (sint64)VECT_W(gCPU.vr[vrB], 3);
2121
2122 VECT_W(gCPU.vr[vrD], 3) = SATURATE_SW(res);
2123 VECT_W(gCPU.vr[vrD], 2) = 0;
2124 VECT_W(gCPU.vr[vrD], 1) = 0;
2125 VECT_W(gCPU.vr[vrD], 0) = 0;
2126 }
2127
2128 /* vnmsubfp Vector Negative Multiply-Subtract Floating Point
2129 * v.215
2130 */
2131 void ppc_opc_vnmsubfp()
2132 {
2133 VECTOR_DEBUG;
2134 int vrD, vrA, vrB, vrC;
2135 double res;
2136 PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
2137
2138 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2139 res = (double)gCPU.vr[vrA].f[i] * (double)gCPU.vr[vrC].f[i];
2140
2141 res = (double)gCPU.vr[vrB].f[i] - res;
2142
2143 gCPU.vr[vrD].f[i] = (float)res;
2144 }
2145 }
2146
2147 /* vavgub Vector Average Unsigned Byte
2148 * v.152
2149 */
2150 void ppc_opc_vavgub()
2151 {
2152 VECTOR_DEBUG;
2153 int vrD, vrA, vrB;
2154 uint16 res;
2155 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2156
2157 for (int i=0; i<16; i++) {
2158 res = (uint16)gCPU.vr[vrA].b[i] +
2159 (uint16)gCPU.vr[vrB].b[i] + 1;
2160
2161 gCPU.vr[vrD].b[i] = (res >> 1);
2162 }
2163 }
2164
2165 /* vavguh Vector Average Unsigned Half Word
2166 * v.153
2167 */
2168 void ppc_opc_vavguh()
2169 {
2170 VECTOR_DEBUG;
2171 int vrD, vrA, vrB;
2172 uint32 res;
2173 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2174
2175 for (int i=0; i<8; i++) {
2176 res = (uint32)gCPU.vr[vrA].h[i] +
2177 (uint32)gCPU.vr[vrB].h[i] + 1;
2178
2179 gCPU.vr[vrD].h[i] = (res >> 1);
2180 }
2181 }
2182
2183 /* vavguw Vector Average Unsigned Word
2184 * v.154
2185 */
2186 void ppc_opc_vavguw()
2187 {
2188 VECTOR_DEBUG;
2189 int vrD, vrA, vrB;
2190 uint64 res;
2191 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2192
2193 for (int i=0; i<4; i++) {
2194 res = (uint64)gCPU.vr[vrA].w[i] +
2195 (uint64)gCPU.vr[vrB].w[i] + 1;
2196
2197 gCPU.vr[vrD].w[i] = (res >> 1);
2198 }
2199 }
2200
2201 /* vavgsb Vector Average Signed Byte
2202 * v.149
2203 */
2204 void ppc_opc_vavgsb()
2205 {
2206 VECTOR_DEBUG;
2207 int vrD, vrA, vrB;
2208 sint16 res;
2209 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2210
2211 for (int i=0; i<16; i++) {
2212 res = (sint16)gCPU.vr[vrA].sb[i] +
2213 (sint16)gCPU.vr[vrB].sb[i] + 1;
2214
2215 gCPU.vr[vrD].sb[i] = (res >> 1);
2216 }
2217 }
2218
2219 /* vavgsh Vector Average Signed Half Word
2220 * v.150
2221 */
2222 void ppc_opc_vavgsh()
2223 {
2224 VECTOR_DEBUG;
2225 int vrD, vrA, vrB;
2226 sint32 res;
2227 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2228
2229 for (int i=0; i<8; i++) {
2230 res = (sint32)gCPU.vr[vrA].sh[i] +
2231 (sint32)gCPU.vr[vrB].sh[i] + 1;
2232
2233 gCPU.vr[vrD].sh[i] = (res >> 1);
2234 }
2235 }
2236
2237 /* vavgsw Vector Average Signed Word
2238 * v.151
2239 */
2240 void ppc_opc_vavgsw()
2241 {
2242 VECTOR_DEBUG;
2243 int vrD, vrA, vrB;
2244 sint64 res;
2245 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2246
2247 for (int i=0; i<4; i++) {
2248 res = (sint64)gCPU.vr[vrA].sw[i] +
2249 (sint64)gCPU.vr[vrB].sw[i] + 1;
2250
2251 gCPU.vr[vrD].sw[i] = (res >> 1);
2252 }
2253 }
2254
2255 /* vmaxub Vector Maximum Unsigned Byte
2256 * v.182
2257 */
2258 void ppc_opc_vmaxub()
2259 {
2260 VECTOR_DEBUG;
2261 int vrD, vrA, vrB;
2262 uint8 res;
2263 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2264
2265 for (int i=0; i<16; i++) {
2266 res = gCPU.vr[vrA].b[i];
2267
2268 if (res < gCPU.vr[vrB].b[i])
2269 res = gCPU.vr[vrB].b[i];
2270
2271 gCPU.vr[vrD].b[i] = res;
2272 }
2273 }
2274
2275 /* vmaxuh Vector Maximum Unsigned Half Word
2276 * v.183
2277 */
2278 void ppc_opc_vmaxuh()
2279 {
2280 VECTOR_DEBUG;
2281 int vrD, vrA, vrB;
2282 uint16 res;
2283 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2284
2285 for (int i=0; i<8; i++) {
2286 res = gCPU.vr[vrA].h[i];
2287
2288 if (res < gCPU.vr[vrB].h[i])
2289 res = gCPU.vr[vrB].h[i];
2290
2291 gCPU.vr[vrD].h[i] = res;
2292 }
2293 }
2294
2295 /* vmaxuw Vector Maximum Unsigned Word
2296 * v.184
2297 */
2298 void ppc_opc_vmaxuw()
2299 {
2300 VECTOR_DEBUG;
2301 int vrD, vrA, vrB;
2302 uint32 res;
2303 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2304
2305 for (int i=0; i<4; i++) {
2306 res = gCPU.vr[vrA].w[i];
2307
2308 if (res < gCPU.vr[vrB].w[i])
2309 res = gCPU.vr[vrB].w[i];
2310
2311 gCPU.vr[vrD].w[i] = res;
2312 }
2313 }
2314
2315 /* vmaxsb Vector Maximum Signed Byte
2316 * v.179
2317 */
2318 void ppc_opc_vmaxsb()
2319 {
2320 VECTOR_DEBUG;
2321 int vrD, vrA, vrB;
2322 sint8 res;
2323 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2324
2325 for (int i=0; i<16; i++) {
2326 res = gCPU.vr[vrA].sb[i];
2327
2328 if (res < gCPU.vr[vrB].sb[i])
2329 res = gCPU.vr[vrB].sb[i];
2330
2331 gCPU.vr[vrD].sb[i] = res;
2332 }
2333 }
2334
2335 /* vmaxsh Vector Maximum Signed Half Word
2336 * v.180
2337 */
2338 void ppc_opc_vmaxsh()
2339 {
2340 VECTOR_DEBUG;
2341 int vrD, vrA, vrB;
2342 sint16 res;
2343 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2344
2345 for (int i=0; i<8; i++) {
2346 res = gCPU.vr[vrA].sh[i];
2347
2348 if (res < gCPU.vr[vrB].sh[i])
2349 res = gCPU.vr[vrB].sh[i];
2350
2351 gCPU.vr[vrD].sh[i] = res;
2352 }
2353 }
2354
2355 /* vmaxsw Vector Maximum Signed Word
2356 * v.181
2357 */
2358 void ppc_opc_vmaxsw()
2359 {
2360 VECTOR_DEBUG;
2361 int vrD, vrA, vrB;
2362 sint32 res;
2363 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2364
2365 for (int i=0; i<4; i++) {
2366 res = gCPU.vr[vrA].sw[i];
2367
2368 if (res < gCPU.vr[vrB].sw[i])
2369 res = gCPU.vr[vrB].sw[i];
2370
2371 gCPU.vr[vrD].sw[i] = res;
2372 }
2373 }
2374
2375 /* vmaxfp Vector Maximum Floating Point
2376 * v.178
2377 */
2378 void ppc_opc_vmaxfp()
2379 {
2380 VECTOR_DEBUG;
2381 int vrD, vrA, vrB;
2382 float res;
2383 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2384
2385 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2386 res = gCPU.vr[vrA].f[i];
2387
2388 if (res < gCPU.vr[vrB].f[i])
2389 res = gCPU.vr[vrB].f[i];
2390
2391 gCPU.vr[vrD].f[i] = res;
2392 }
2393 }
2394
2395 /* vminub Vector Minimum Unsigned Byte
2396 * v.191
2397 */
2398 void ppc_opc_vminub()
2399 {
2400 VECTOR_DEBUG;
2401 int vrD, vrA, vrB;
2402 uint8 res;
2403 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2404
2405 for (int i=0; i<16; i++) {
2406 res = gCPU.vr[vrA].b[i];
2407
2408 if (res > gCPU.vr[vrB].b[i])
2409 res = gCPU.vr[vrB].b[i];
2410
2411 gCPU.vr[vrD].b[i] = res;
2412 }
2413 }
2414
2415 /* vminuh Vector Minimum Unsigned Half Word
2416 * v.192
2417 */
2418 void ppc_opc_vminuh()
2419 {
2420 VECTOR_DEBUG;
2421 int vrD, vrA, vrB;
2422 uint16 res;
2423 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2424
2425 for (int i=0; i<8; i++) {
2426 res = gCPU.vr[vrA].h[i];
2427
2428 if (res > gCPU.vr[vrB].h[i])
2429 res = gCPU.vr[vrB].h[i];
2430
2431 gCPU.vr[vrD].h[i] = res;
2432 }
2433 }
2434
2435 /* vminuw Vector Minimum Unsigned Word
2436 * v.193
2437 */
2438 void ppc_opc_vminuw()
2439 {
2440 VECTOR_DEBUG;
2441 int vrD, vrA, vrB;
2442 uint32 res;
2443 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2444
2445 for (int i=0; i<4; i++) {
2446 res = gCPU.vr[vrA].w[i];
2447
2448 if (res > gCPU.vr[vrB].w[i])
2449 res = gCPU.vr[vrB].w[i];
2450
2451 gCPU.vr[vrD].w[i] = res;
2452 }
2453 }
2454
2455 /* vminsb Vector Minimum Signed Byte
2456 * v.188
2457 */
2458 void ppc_opc_vminsb()
2459 {
2460 VECTOR_DEBUG;
2461 int vrD, vrA, vrB;
2462 sint8 res;
2463 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2464
2465 for (int i=0; i<16; i++) {
2466 res = gCPU.vr[vrA].sb[i];
2467
2468 if (res > gCPU.vr[vrB].sb[i])
2469 res = gCPU.vr[vrB].sb[i];
2470
2471 gCPU.vr[vrD].sb[i] = res;
2472 }
2473 }
2474
2475 /* vminsh Vector Minimum Signed Half Word
2476 * v.189
2477 */
2478 void ppc_opc_vminsh()
2479 {
2480 VECTOR_DEBUG;
2481 int vrD, vrA, vrB;
2482 sint16 res;
2483 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2484
2485 for (int i=0; i<8; i++) {
2486 res = gCPU.vr[vrA].sh[i];
2487
2488 if (res > gCPU.vr[vrB].sh[i])
2489 res = gCPU.vr[vrB].sh[i];
2490
2491 gCPU.vr[vrD].sh[i] = res;
2492 }
2493 }
2494
2495 /* vminsw Vector Minimum Signed Word
2496 * v.190
2497 */
2498 void ppc_opc_vminsw()
2499 {
2500 VECTOR_DEBUG;
2501 int vrD, vrA, vrB;
2502 sint32 res;
2503 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2504
2505 for (int i=0; i<4; i++) {
2506 res = gCPU.vr[vrA].sw[i];
2507
2508 if (res > gCPU.vr[vrB].sw[i])
2509 res = gCPU.vr[vrB].sw[i];
2510
2511 gCPU.vr[vrD].sw[i] = res;
2512 }
2513 }
2514
2515 /* vminfp Vector Minimum Floating Point
2516 * v.187
2517 */
2518 void ppc_opc_vminfp()
2519 {
2520 VECTOR_DEBUG;
2521 int vrD, vrA, vrB;
2522 float res;
2523 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2524
2525 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2526 res = gCPU.vr[vrA].f[i];
2527
2528 if (res > gCPU.vr[vrB].f[i])
2529 res = gCPU.vr[vrB].f[i];
2530
2531 gCPU.vr[vrD].f[i] = res;
2532 }
2533 }
2534
2535 /* vrfin Vector Round to Floating-Point Integer Nearest
2536 * v.231
2537 */
2538 void ppc_opc_vrfin()
2539 {
2540 VECTOR_DEBUG;
2541 int vrD, vrA, vrB;
2542 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2543 PPC_OPC_ASSERT(vrA==0);
2544
2545 /* Documentation doesn't dictate how this instruction should
2546 * round from a middle point. With a test on a real G4, it was
2547 * found to be round to nearest, with bias to even if equidistant.
2548 *
2549 * This is covered by the function rint()
2550 */
2551 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2552 gCPU.vr[vrD].f[i] = rintf(gCPU.vr[vrB].f[i]);
2553 }
2554 }
2555
2556 /* vrfip Vector Round to Floating-Point Integer toward Plus Infinity
2557 * v.232
2558 */
2559 void ppc_opc_vrfip()
2560 {
2561 VECTOR_DEBUG;
2562 int vrD, vrA, vrB;
2563 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2564 PPC_OPC_ASSERT(vrA==0);
2565
2566 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2567 gCPU.vr[vrD].f[i] = ceilf(gCPU.vr[vrB].f[i]);
2568 }
2569 }
2570
2571 /* vrfim Vector Round to Floating-Point Integer toward Minus Infinity
2572 * v.230
2573 */
2574 void ppc_opc_vrfim()
2575 {
2576 VECTOR_DEBUG;
2577 int vrD, vrA, vrB;
2578 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2579 PPC_OPC_ASSERT(vrA==0);
2580
2581 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2582 gCPU.vr[vrD].f[i] = floorf(gCPU.vr[vrB].f[i]);
2583 }
2584 }
2585
2586 /* vrfiz Vector Round to Floating-Point Integer toward Zero
2587 * v.233
2588 */
2589 void ppc_opc_vrfiz()
2590 {
2591 VECTOR_DEBUG;
2592 int vrD, vrA, vrB;
2593 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2594 PPC_OPC_ASSERT(vrA==0);
2595
2596 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2597 gCPU.vr[vrD].f[i] = truncf(gCPU.vr[vrD].f[i]);
2598 }
2599 }
2600
2601 /* vrefp Vector Reciprocal Estimate Floating Point
2602 * v.228
2603 */
2604 void ppc_opc_vrefp()
2605 {
2606 VECTOR_DEBUG;
2607 int vrD, vrA, vrB;
2608 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2609 PPC_OPC_ASSERT(vrA==0);
2610
2611 /* This emulation generates an exact value, instead of an estimate.
2612 * This is technically within specs, but some test-suites expect the
2613 * exact estimate value returned by G4s. These anomolous failures
2614 * should be ignored.
2615 */
2616
2617 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2618 gCPU.vr[vrD].f[i] = 1 / gCPU.vr[vrB].f[i];
2619 }
2620 }
2621
2622 /* vrsqrtefp Vector Reciprocal Square Root Estimate Floating Point
2623 * v.237
2624 */
2625 void ppc_opc_vrsqrtefp()
2626 {
2627 VECTOR_DEBUG;
2628 int vrD, vrA, vrB;
2629 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2630 PPC_OPC_ASSERT(vrA==0);
2631
2632 /* This emulation generates an exact value, instead of an estimate.
2633 * This is technically within specs, but some test-suites expect the
2634 * exact estimate value returned by G4s. These anomolous failures
2635 * should be ignored.
2636 */
2637
2638 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2639 gCPU.vr[vrD].f[i] = 1 / sqrt(gCPU.vr[vrB].f[i]);
2640 }
2641 }
2642
2643 /* vlogefp Vector Log2 Estimate Floating Point
2644 * v.175
2645 */
2646 void ppc_opc_vlogefp()
2647 {
2648 VECTOR_DEBUG;
2649 int vrD, vrA, vrB;
2650 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2651 PPC_OPC_ASSERT(vrA==0);
2652
2653 /* This emulation generates an exact value, instead of an estimate.
2654 * This is technically within specs, but some test-suites expect the
2655 * exact estimate value returned by G4s. These anomolous failures
2656 * should be ignored.
2657 */
2658
2659 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2660 gCPU.vr[vrD].f[i] = log2(gCPU.vr[vrB].f[i]);
2661 }
2662 }
2663
2664 /* vexptefp Vector 2 Raised to the Exponent Estimate Floating Point
2665 * v.173
2666 */
2667 void ppc_opc_vexptefp()
2668 {
2669 VECTOR_DEBUG;
2670 int vrD, vrA, vrB;
2671 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2672 PPC_OPC_ASSERT(vrA==0);
2673
2674 /* This emulation generates an exact value, instead of an estimate.
2675 * This is technically within specs, but some test-suites expect the
2676 * exact estimate value returned by G4s. These anomolous failures
2677 * should be ignored.
2678 */
2679
2680 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2681 gCPU.vr[vrD].f[i] = exp2(gCPU.vr[vrB].f[i]);
2682 }
2683 }
2684
2685 /* vcfux Vector Convert from Unsigned Fixed-Point Word
2686 * v.156
2687 */
2688 void ppc_opc_vcfux()
2689 {
2690 VECTOR_DEBUG;
2691 int vrD, vrB;
2692 uint32 uimm;
2693 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
2694
2695 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2696 gCPU.vr[vrD].f[i] = ((float)gCPU.vr[vrB].w[i]) / (1 << uimm);
2697 }
2698 }
2699
2700 /* vcfsx Vector Convert from Signed Fixed-Point Word
2701 * v.155
2702 */
2703 void ppc_opc_vcfsx()
2704 {
2705 VECTOR_DEBUG;
2706 int vrD, vrB;
2707 uint32 uimm;
2708 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
2709
2710 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2711 gCPU.vr[vrD].f[i] = ((float)gCPU.vr[vrB].sw[i]) / (1 << uimm);
2712 }
2713 }
2714
2715 /* vctsxs Vector Convert To Signed Fixed-Point Word Saturate
2716 * v.171
2717 */
2718 void ppc_opc_vctsxs()
2719 {
2720 VECTOR_DEBUG;
2721 int vrD, vrB;
2722 uint32 uimm;
2723 float ftmp;
2724 sint32 tmp;
2725 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
2726
2727 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2728 ftmp = gCPU.vr[vrB].f[i] * (float)(1 << uimm);
2729 ftmp = truncf(ftmp);
2730
2731 tmp = (sint32)ftmp;
2732
2733 if (ftmp > 2147483647.0) {
2734 tmp = 2147483647; // 0x7fffffff
2735 gCPU.vscr |= VSCR_SAT;
2736 } else if (ftmp < -2147483648.0) {
2737 tmp = -2147483648LL; // 0x80000000
2738 gCPU.vscr |= VSCR_SAT;
2739 }
2740
2741 gCPU.vr[vrD].sw[i] = tmp;
2742 }
2743 }
2744
2745 /* vctuxs Vector Convert to Unsigned Fixed-Point Word Saturate
2746 * v.172
2747 */
2748 void ppc_opc_vctuxs()
2749 {
2750 VECTOR_DEBUG;
2751 int vrD, vrB;
2752 uint32 tmp, uimm;
2753 float ftmp;
2754 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
2755
2756 for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2757 ftmp = gCPU.vr[vrB].f[i] * (float)(1 << uimm);
2758 ftmp = truncf(ftmp);
2759
2760 tmp = (uint32)ftmp;
2761
2762 if (ftmp > 4294967295.0) {
2763 tmp = 0xffffffff;
2764 gCPU.vscr |= VSCR_SAT;
2765 } else if (ftmp < 0) {
2766 tmp = 0;
2767 gCPU.vscr |= VSCR_SAT;
2768 }
2769
2770 gCPU.vr[vrD].w[i] = tmp;
2771 }
2772 }
2773
2774 /* vand Vector Logical AND
2775 * v.147
2776 */
2777 void ppc_opc_vand()
2778 {
2779 VECTOR_DEBUG;
2780 int vrD, vrA, vrB;
2781 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2782
2783 gCPU.vr[vrD].d[0] = gCPU.vr[vrA].d[0] & gCPU.vr[vrB].d[0];
2784 gCPU.vr[vrD].d[1] = gCPU.vr[vrA].d[1] & gCPU.vr[vrB].d[1];
2785 }
2786
2787 /* vandc Vector Logical AND with Complement
2788 * v.148
2789 */
2790 void ppc_opc_vandc()
2791 {
2792 VECTOR_DEBUG;
2793 int vrD, vrA, vrB;
2794 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2795
2796 gCPU.vr[vrD].d[0] = gCPU.vr[vrA].d[0] & ~gCPU.vr[vrB].d[0];
2797 gCPU.vr[vrD].d[1] = gCPU.vr[vrA].d[1] & ~gCPU.vr[vrB].d[1];
2798 }
2799
2800 /* vor Vector Logical OR
2801 * v.217
2802 */
2803 void ppc_opc_vor()
2804 {
2805 VECTOR_DEBUG_COMMON;
2806 int vrD, vrA, vrB;
2807 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2808
2809 gCPU.vr[vrD].d[0] = gCPU.vr[vrA].d[0] | gCPU.vr[vrB].d[0];
2810 gCPU.vr[vrD].d[1] = gCPU.vr[vrA].d[1] | gCPU.vr[vrB].d[1];
2811 }
2812
2813 /* vnor Vector Logical NOR
2814 * v.216
2815 */
2816 void ppc_opc_vnor()
2817 {
2818 VECTOR_DEBUG;
2819 int vrD, vrA, vrB;
2820 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2821
2822 gCPU.vr[vrD].d[0] = ~(gCPU.vr[vrA].d[0] | gCPU.vr[vrB].d[0]);
2823 gCPU.vr[vrD].d[1] = ~(gCPU.vr[vrA].d[1] | gCPU.vr[vrB].d[1]);
2824 }
2825
2826 /* vxor Vector Logical XOR
2827 * v.282
2828 */
2829 void ppc_opc_vxor()
2830 {
2831 VECTOR_DEBUG_COMMON;
2832 int vrD, vrA, vrB;
2833 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2834
2835 gCPU.vr[vrD].d[0] = gCPU.vr[vrA].d[0] ^ gCPU.vr[vrB].d[0];
2836 gCPU.vr[vrD].d[1] = gCPU.vr[vrA].d[1] ^ gCPU.vr[vrB].d[1];
2837 }
2838
2839 #define CR_CR6 (0x00f0)
2840 #define CR_CR6_EQ (1<<7)
2841 #define CR_CR6_NE_SOME (1<<6)
2842 #define CR_CR6_NE (1<<5)
2843 #define CR_CR6_EQ_SOME (1<<4)
2844
2845 /* vcmpequbx Vector Compare Equal-to Unsigned Byte
2846 * v.160
2847 */
2848 void ppc_opc_vcmpequbx()
2849 {
2850 VECTOR_DEBUG;
2851 int vrD, vrA, vrB;
2852 int tf=CR_CR6_EQ | CR_CR6_NE;
2853 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2854
2855 for (int i=0; i<16; i++) {
2856 if (gCPU.vr[vrA].b[i] == gCPU.vr[vrB].b[i]) {
2857 gCPU.vr[vrD].b[i] = 0xff;
2858 tf &= ~CR_CR6_NE;
2859 tf |= CR_CR6_EQ_SOME;
2860 } else {
2861 gCPU.vr[vrD].b[i] = 0;
2862 tf &= ~CR_CR6_EQ;
2863 tf |= CR_CR6_NE_SOME;
2864 }
2865 }
2866
2867 if (PPC_OPC_VRc & gCPU.current_opc) {
2868 gCPU.cr &= ~CR_CR6;
2869 gCPU.cr |= tf;
2870 }
2871 }
2872
2873 /* vcmpequhx Vector Compare Equal-to Unsigned Half Word
2874 * v.161
2875 */
2876 void ppc_opc_vcmpequhx()
2877 {
2878 VECTOR_DEBUG;
2879 int vrD, vrA, vrB;
2880 int tf=CR_CR6_EQ | CR_CR6_NE;
2881 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2882
2883 for (int i=0; i<8; i++) {
2884 if (gCPU.vr[vrA].h[i] == gCPU.vr[vrB].h[i]) {
2885 gCPU.vr[vrD].h[i] = 0xffff;
2886 tf &= ~CR_CR6_NE;
2887 tf |= CR_CR6_EQ_SOME;
2888 } else {
2889 gCPU.vr[vrD].h[i] = 0;
2890 tf &= ~CR_CR6_EQ;
2891 tf |= CR_CR6_NE_SOME;
2892 }
2893 }
2894
2895 if (PPC_OPC_VRc & gCPU.current_opc) {
2896 gCPU.cr &= ~CR_CR6;
2897 gCPU.cr |= tf;
2898 }
2899 }
2900
2901 /* vcmpequwx Vector Compare Equal-to Unsigned Word
2902 * v.162
2903 */
2904 void ppc_opc_vcmpequwx()
2905 {
2906 VECTOR_DEBUG;
2907 int vrD, vrA, vrB;
2908 int tf=CR_CR6_EQ | CR_CR6_NE;
2909 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2910
2911 for (int i=0; i<4; i++) {
2912 if (gCPU.vr[vrA].w[i] == gCPU.vr[vrB].w[i]) {
2913 gCPU.vr[vrD].w[i] = 0xffffffff;
2914 tf &= ~CR_CR6_NE;
2915 tf |= CR_CR6_EQ_SOME;
2916 } else {
2917 gCPU.vr[vrD].w[i] = 0;
2918 tf &= ~CR_CR6_EQ;
2919 tf |= CR_CR6_NE_SOME;
2920 }
2921 }
2922
2923 if (PPC_OPC_VRc</