/[pearpc]/src/cpu/cpu_generic/ppc_vec.cc
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /src/cpu/cpu_generic/ppc_vec.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations)
Wed Sep 5 17:11:21 2007 UTC (16 years, 7 months ago) by dpavlin
File size: 68507 byte(s)
import upstream CVS
1 dpavlin 1 /*
2     * PearPC
3     * ppc_vec.cc
4     *
5     * Copyright (C) 2004 Daniel Foesch (dfoesch@cs.nsmu.edu)
6     *
7     * This program is free software; you can redistribute it and/or modify
8     * it under the terms of the GNU General Public License version 2 as
9     * published by the Free Software Foundation.
10     *
11     * This program is distributed in the hope that it will be useful,
12     * but WITHOUT ANY WARRANTY; without even the implied warranty of
13     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14     * GNU General Public License for more details.
15     *
16     * You should have received a copy of the GNU General Public License
17     * along with this program; if not, write to the Free Software
18     * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19     */
20    
21     /* Pages marked: v.???
22     * From: IBM PowerPC MicroProcessor Family: Altivec(tm) Technology...
23     * Programming Environments Manual
24     */
25    
26     #include <math.h>
27    
28     /*
29     * FIXME: put somewhere appropriate
30     */
31     #ifndef HAS_LOG2
32     #define log2(x) log(x)/log(2)
33     #endif /* HAS_LOG2 */
34    
35     #ifndef HAS_EXP2
36     #define exp2(x) pow(2, x)
37     #endif /* HAS_EXP2 */
38    
39     #include "debug/tracers.h"
40     #include "ppc_cpu.h"
41     #include "ppc_dec.h"
42     #include "ppc_fpu.h"
43     #include "ppc_vec.h"
44    
45     #define SIGN32 0x80000000
46    
47     /* PACK_PIXEL Packs a uint32 pixel to uint16 pixel
48     * v.219
49     */
50     static inline uint16 PACK_PIXEL(uint32 clr)
51     {
52     return (((clr & 0x000000f8) >> 3) | \
53     ((clr & 0x0000f800) >> 6) | \
54     ((clr & 0x01f80000) >> 9));
55     }
56    
57     /* UNPACK_PIXEL Unpacks a uint16 pixel to uint32 pixel
58     * v.276 & v.279
59     */
60     static inline uint32 UNPACK_PIXEL(uint16 clr)
61     {
62     return (((uint32)(clr & 0x001f)) | \
63     ((uint32)(clr & 0x03E0) << 3) | \
64     ((uint32)(clr & 0x7c00) << 6) | \
65     (((clr) & 0x8000) ? 0xff000000 : 0));
66     }
67    
68     static inline uint8 SATURATE_UB(uint16 val)
69     {
70     if (val & 0xff00) {
71     gCPU.vscr |= VSCR_SAT;
72     return 0xff;
73     }
74     return val;
75     }
76     static inline uint8 SATURATE_0B(uint16 val)
77     {
78     if (val & 0xff00) {
79     gCPU.vscr |= VSCR_SAT;
80     return 0;
81     }
82     return val;
83     }
84    
85     static inline uint16 SATURATE_UH(uint32 val)
86     {
87     if (val & 0xffff0000) {
88     gCPU.vscr |= VSCR_SAT;
89     return 0xffff;
90     }
91     return val;
92     }
93    
94     static inline uint16 SATURATE_0H(uint32 val)
95     {
96     if (val & 0xffff0000) {
97     gCPU.vscr |= VSCR_SAT;
98     return 0;
99     }
100     return val;
101     }
102    
103     static inline sint8 SATURATE_SB(sint16 val)
104     {
105     if (val > 127) { // 0x7F
106     gCPU.vscr |= VSCR_SAT;
107     return 127;
108     } else if (val < -128) { // 0x80
109     gCPU.vscr |= VSCR_SAT;
110     return -128;
111     }
112     return val;
113     }
114    
115     static inline uint8 SATURATE_USB(sint16 val)
116     {
117     if (val > 0xff) {
118     gCPU.vscr |= VSCR_SAT;
119     return 0xff;
120     } else if (val < 0) {
121     gCPU.vscr |= VSCR_SAT;
122     return 0;
123     }
124     return (uint8)val;
125     }
126    
127     static inline sint16 SATURATE_SH(sint32 val)
128     {
129     if (val > 32767) { // 0x7fff
130     gCPU.vscr |= VSCR_SAT;
131     return 32767;
132     } else if (val < -32768) { // 0x8000
133     gCPU.vscr |= VSCR_SAT;
134     return -32768;
135     }
136     return val;
137     }
138    
139     static inline uint16 SATURATE_USH(sint32 val)
140     {
141     if (val > 0xffff) {
142     gCPU.vscr |= VSCR_SAT;
143     return 0xffff;
144     } else if (val < 0) {
145     gCPU.vscr |= VSCR_SAT;
146     return 0;
147     }
148     return (uint16)val;
149     }
150    
151     static inline sint32 SATURATE_UW(sint64 val)
152     {
153     if (val > 0xffffffffLL) {
154     gCPU.vscr |= VSCR_SAT;
155     return 0xffffffffLL;
156     }
157     return val;
158     }
159    
160     static inline sint32 SATURATE_SW(sint64 val)
161     {
162     if (val > 2147483647LL) { // 0x7fffffff
163     gCPU.vscr |= VSCR_SAT;
164     return 2147483647LL;
165     } else if (val < -2147483648LL) { // 0x80000000
166     gCPU.vscr |= VSCR_SAT;
167     return -2147483648LL;
168     }
169     return val;
170     }
171    
172     /* vperm Vector Permutation
173     * v.218
174     */
175     void ppc_opc_vperm()
176     {
177     VECTOR_DEBUG_COMMON;
178     int vrD, vrA, vrB, vrC;
179     int sel;
180     Vector_t r;
181     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
182     for (int i=0; i<16; i++) {
183     sel = gCPU.vr[vrC].b[i];
184     if (sel & 0x10)
185     r.b[i] = VECT_B(gCPU.vr[vrB], sel & 0xf);
186     else
187     r.b[i] = VECT_B(gCPU.vr[vrA], sel & 0xf);
188     }
189    
190     gCPU.vr[vrD] = r;
191     }
192    
193     /* vsel Vector Select
194     * v.238
195     */
196     void ppc_opc_vsel()
197     {
198     VECTOR_DEBUG;
199     int vrD, vrA, vrB, vrC;
200     uint64 mask, val;
201     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
202    
203     mask = gCPU.vr[vrC].d[0];
204     val = gCPU.vr[vrB].d[0] & mask;
205     val |= gCPU.vr[vrA].d[0] & ~mask;
206     gCPU.vr[vrD].d[0] = val;
207    
208     mask = gCPU.vr[vrC].d[1];
209     val = gCPU.vr[vrB].d[1] & mask;
210     val |= gCPU.vr[vrA].d[1] & ~mask;
211     gCPU.vr[vrD].d[1] = val;
212     }
213    
214     /* vsrb Vector Shift Right Byte
215     * v.256
216     */
217     void ppc_opc_vsrb()
218     {
219     VECTOR_DEBUG;
220     int vrD, vrA, vrB;
221     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
222     for (int i=0; i<16; i++) {
223     gCPU.vr[vrD].b[i] = gCPU.vr[vrA].b[i] >> (gCPU.vr[vrB].b[i] & 0x7);
224     }
225     }
226    
227     /* vsrh Vector Shift Right Half Word
228     * v.257
229     */
230     void ppc_opc_vsrh()
231     {
232     VECTOR_DEBUG;
233     int vrD, vrA, vrB;
234     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
235     for (int i=0; i<8; i++) {
236     gCPU.vr[vrD].h[i] = gCPU.vr[vrA].h[i] >> (gCPU.vr[vrB].h[i] & 0xf);
237     }
238     }
239    
240     /* vsrw Vector Shift Right Word
241     * v.259
242     */
243     void ppc_opc_vsrw()
244     {
245     VECTOR_DEBUG;
246     int vrD, vrA, vrB;
247     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
248     for (int i=0; i<4; i++) {
249     gCPU.vr[vrD].w[i] = gCPU.vr[vrA].w[i] >> (gCPU.vr[vrB].w[i] & 0x1f);
250     }
251     }
252    
253     /* vsrab Vector Shift Right Arithmetic Byte
254     * v.253
255     */
256     void ppc_opc_vsrab()
257     {
258     VECTOR_DEBUG;
259     int vrD, vrA, vrB;
260     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
261     for (int i=0; i<16; i++) {
262     gCPU.vr[vrD].sb[i] = gCPU.vr[vrA].sb[i] >> (gCPU.vr[vrB].b[i] & 0x7);
263     }
264     }
265    
266     /* vsrah Vector Shift Right Arithmetic Half Word
267     * v.254
268     */
269     void ppc_opc_vsrah()
270     {
271     VECTOR_DEBUG;
272     int vrD, vrA, vrB;
273     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
274     for (int i=0; i<8; i++) {
275     gCPU.vr[vrD].sh[i] = gCPU.vr[vrA].sh[i] >> (gCPU.vr[vrB].h[i] & 0xf);
276     }
277     }
278    
279     /* vsraw Vector Shift Right Arithmetic Word
280     * v.255
281     */
282     void ppc_opc_vsraw()
283     {
284     VECTOR_DEBUG;
285     int vrD, vrA, vrB;
286     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
287     for (int i=0; i<4; i++) {
288     gCPU.vr[vrD].sw[i] = gCPU.vr[vrA].sw[i] >> (gCPU.vr[vrB].w[i] & 0x1f);
289     }
290     }
291    
292     /* vslb Vector Shift Left Byte
293     * v.240
294     */
295     void ppc_opc_vslb()
296     {
297     VECTOR_DEBUG;
298     int vrD, vrA, vrB;
299     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
300     for (int i=0; i<16; i++) {
301     gCPU.vr[vrD].b[i] = gCPU.vr[vrA].b[i] << (gCPU.vr[vrB].b[i] & 0x7);
302     }
303     }
304    
305     /* vslh Vector Shift Left Half Word
306     * v.242
307     */
308     void ppc_opc_vslh()
309     {
310     VECTOR_DEBUG;
311     int vrD, vrA, vrB;
312     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
313     for (int i=0; i<8; i++) {
314     gCPU.vr[vrD].h[i] = gCPU.vr[vrA].h[i] << (gCPU.vr[vrB].h[i] & 0xf);
315     }
316     }
317    
318     /* vslw Vector Shift Left Word
319     * v.244
320     */
321     void ppc_opc_vslw()
322     {
323     VECTOR_DEBUG;
324     int vrD, vrA, vrB;
325     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
326     for (int i=0; i<4; i++) {
327     gCPU.vr[vrD].w[i] = gCPU.vr[vrA].w[i] << (gCPU.vr[vrB].w[i] & 0x1f);
328     }
329     }
330    
331     /* vsr Vector Shift Right
332     * v.251
333     */
334     void ppc_opc_vsr()
335     {
336     VECTOR_DEBUG;
337     int vrD, vrA, vrB;
338     Vector_t r;
339     int shift;
340     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
341    
342     /* Specs say that the low-order 3 bits of all byte elements in vB
343     * must be the same, or the result is undefined. So we can just
344     * use the same low-order 3 bits for all of our shifts.
345     */
346     shift = gCPU.vr[vrB].w[0] & 0x7;
347    
348     r.d[0] = gCPU.vr[vrA].d[0] >> shift;
349     r.d[1] = gCPU.vr[vrA].d[1] >> shift;
350    
351     VECT_D(r, 1) |= VECT_D(gCPU.vr[vrA], 0) << (64 - shift);
352    
353     gCPU.vr[vrD] = r;
354     }
355    
356     /* vsro Vector Shift Right Octet
357     * v.258
358     */
359     void ppc_opc_vsro()
360     {
361     VECTOR_DEBUG;
362     int vrD, vrA, vrB;
363     Vector_t r;
364     int shift, i;
365     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
366    
367     shift = (gCPU.vr[vrB].w[0] >> 3) & 0xf;
368     #if HOST_ENDIANESS == HOST_ENDIANESS_LE
369     for (i=0; i<(16-shift); i++) {
370     r.b[i] = gCPU.vr[vrA].b[i+shift];
371     }
372    
373     for (; i<16; i++) {
374     r.b[i] = 0;
375     }
376     #elif HOST_ENDIANESS == HOST_ENDIANESS_BE
377     for (i=0; i<shift; i++) {
378     r.b[i] = 0;
379     }
380    
381     for (; i<16; i++) {
382     r.b[i] = gCPU.vr[vrA].b[i-shift];
383     }
384     #else
385     #error Endianess not supported!
386     #endif
387    
388     gCPU.vr[vrD] = r;
389     }
390    
391     /* vsl Vector Shift Left
392     * v.239
393     */
394     void ppc_opc_vsl()
395     {
396     VECTOR_DEBUG;
397     int vrD, vrA, vrB;
398     Vector_t r;
399     int shift;
400     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
401    
402     /* Specs say that the low-order 3 bits of all byte elements in vB
403     * must be the same, or the result is undefined. So we can just
404     * use the same low-order 3 bits for all of our shifts.
405     */
406     shift = gCPU.vr[vrB].w[0] & 0x7;
407    
408     r.d[0] = gCPU.vr[vrA].d[0] << shift;
409     r.d[1] = gCPU.vr[vrA].d[1] << shift;
410    
411     VECT_D(r, 0) |= VECT_D(gCPU.vr[vrA], 1) >> (64 - shift);
412    
413     gCPU.vr[vrD] = r;
414     }
415    
416     /* vslo Vector Shift Left Octet
417     * v.243
418     */
419     void ppc_opc_vslo()
420     {
421     VECTOR_DEBUG;
422     int vrD, vrA, vrB;
423     Vector_t r;
424     int shift, i;
425     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
426    
427     shift = (gCPU.vr[vrB].w[0] >> 3) & 0xf;
428     #if HOST_ENDIANESS == HOST_ENDIANESS_LE
429     for (i=0; i<shift; i++) {
430     r.b[i] = 0;
431     }
432    
433     for (; i<16; i++) {
434     r.b[i] = gCPU.vr[vrA].b[i-shift];
435     }
436     #elif HOST_ENDIANESS == HOST_ENDIANESS_BE
437     for (i=0; i<(16-shift); i++) {
438     r.b[i] = gCPU.vr[vrA].b[i+shift];
439     }
440    
441     for (; i<16; i++) {
442     r.b[i] = 0;
443     }
444     #else
445     #error Endianess not supported!
446     #endif
447    
448     gCPU.vr[vrD] = r;
449     }
450    
451     /* vsldoi Vector Shift Left Double by Octet Immediate
452     * v.241
453     */
454     void ppc_opc_vsldoi()
455     {
456     VECTOR_DEBUG_COMMON;
457     int vrD, vrA, vrB, shift, ashift;
458     int i;
459     Vector_t r;
460     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, shift);
461    
462     shift &= 0xf;
463     ashift = 16 - shift;
464    
465     #if HOST_ENDIANESS == HOST_ENDIANESS_LE
466     for (i=0; i<shift; i++) {
467     r.b[i] = gCPU.vr[vrB].b[i+ashift];
468     }
469    
470     for (; i<16; i++) {
471     r.b[i] = gCPU.vr[vrA].b[i-shift];
472     }
473     #elif HOST_ENDIANESS == HOST_ENDIANESS_BE
474     for (i=0; i<ashift; i++) {
475     r.b[i] = gCPU.vr[vrA].b[i+shift];
476     }
477    
478     for (; i<16; i++) {
479     r.b[i] = gCPU.vr[vrB].b[i-ashift];
480     }
481     #else
482     #error Endianess not supported!
483     #endif
484    
485     gCPU.vr[vrD] = r;
486     }
487    
488     /* vrlb Vector Rotate Left Byte
489     * v.234
490     */
491     void ppc_opc_vrlb()
492     {
493     VECTOR_DEBUG;
494     int vrD, vrA, vrB, shift;
495     Vector_t r;
496     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
497    
498     for (int i=0; i<16; i++) {
499     shift = (gCPU.vr[vrB].b[i] & 0x7);
500    
501     r.b[i] = gCPU.vr[vrA].b[i] << shift;
502     r.b[i] |= gCPU.vr[vrA].b[i] >> (8 - shift);
503     }
504    
505     gCPU.vr[vrD] = r;
506     }
507    
508     /* vrlh Vector Rotate Left Half Word
509     * v.235
510     */
511     void ppc_opc_vrlh()
512     {
513     VECTOR_DEBUG;
514     int vrD, vrA, vrB, shift;
515     Vector_t r;
516     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
517    
518     for (int i=0; i<8; i++) {
519     shift = (gCPU.vr[vrB].h[i] & 0xf);
520    
521     r.h[i] = gCPU.vr[vrA].h[i] << shift;
522     r.h[i] |= gCPU.vr[vrA].h[i] >> (16 - shift);
523     }
524    
525     gCPU.vr[vrD] = r;
526     }
527    
528     /* vrlw Vector Rotate Left Word
529     * v.236
530     */
531     void ppc_opc_vrlw()
532     {
533     VECTOR_DEBUG;
534     int vrD, vrA, vrB, shift;
535     Vector_t r;
536     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
537    
538     for (int i=0; i<4; i++) {
539     shift = (gCPU.vr[vrB].w[i] & 0x1F);
540    
541     r.w[i] = gCPU.vr[vrA].w[i] << shift;
542     r.w[i] |= gCPU.vr[vrA].w[i] >> (32 - shift);
543     }
544    
545     gCPU.vr[vrD] = r;
546     }
547    
548     /* With the merges, I just don't see any point in risking that a compiler
549     * might generate actual alu code to calculate anything when it's
550     * compile-time known. Plus, it's easier to validate it like this.
551     */
552    
553     /* vmrghb Vector Merge High Byte
554     * v.195
555     */
556     void ppc_opc_vmrghb()
557     {
558     VECTOR_DEBUG;
559     int vrD, vrA, vrB;
560     Vector_t r;
561     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
562    
563     VECT_B(r, 0) = VECT_B(gCPU.vr[vrA], 0);
564     VECT_B(r, 1) = VECT_B(gCPU.vr[vrB], 0);
565     VECT_B(r, 2) = VECT_B(gCPU.vr[vrA], 1);
566     VECT_B(r, 3) = VECT_B(gCPU.vr[vrB], 1);
567     VECT_B(r, 4) = VECT_B(gCPU.vr[vrA], 2);
568     VECT_B(r, 5) = VECT_B(gCPU.vr[vrB], 2);
569     VECT_B(r, 6) = VECT_B(gCPU.vr[vrA], 3);
570     VECT_B(r, 7) = VECT_B(gCPU.vr[vrB], 3);
571     VECT_B(r, 8) = VECT_B(gCPU.vr[vrA], 4);
572     VECT_B(r, 9) = VECT_B(gCPU.vr[vrB], 4);
573     VECT_B(r,10) = VECT_B(gCPU.vr[vrA], 5);
574     VECT_B(r,11) = VECT_B(gCPU.vr[vrB], 5);
575     VECT_B(r,12) = VECT_B(gCPU.vr[vrA], 6);
576     VECT_B(r,13) = VECT_B(gCPU.vr[vrB], 6);
577     VECT_B(r,14) = VECT_B(gCPU.vr[vrA], 7);
578     VECT_B(r,15) = VECT_B(gCPU.vr[vrB], 7);
579    
580     gCPU.vr[vrD] = r;
581     }
582    
583     /* vmrghh Vector Merge High Half Word
584     * v.196
585     */
586     void ppc_opc_vmrghh()
587     {
588     VECTOR_DEBUG;
589     int vrD, vrA, vrB;
590     Vector_t r;
591     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
592    
593     VECT_H(r, 0) = VECT_H(gCPU.vr[vrA], 0);
594     VECT_H(r, 1) = VECT_H(gCPU.vr[vrB], 0);
595     VECT_H(r, 2) = VECT_H(gCPU.vr[vrA], 1);
596     VECT_H(r, 3) = VECT_H(gCPU.vr[vrB], 1);
597     VECT_H(r, 4) = VECT_H(gCPU.vr[vrA], 2);
598     VECT_H(r, 5) = VECT_H(gCPU.vr[vrB], 2);
599     VECT_H(r, 6) = VECT_H(gCPU.vr[vrA], 3);
600     VECT_H(r, 7) = VECT_H(gCPU.vr[vrB], 3);
601    
602     gCPU.vr[vrD] = r;
603     }
604    
605     /* vmrghw Vector Merge High Word
606     * v.197
607     */
608     void ppc_opc_vmrghw()
609     {
610     VECTOR_DEBUG;
611     int vrD, vrA, vrB;
612     Vector_t r;
613     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
614    
615     VECT_W(r, 0) = VECT_W(gCPU.vr[vrA], 0);
616     VECT_W(r, 1) = VECT_W(gCPU.vr[vrB], 0);
617     VECT_W(r, 2) = VECT_W(gCPU.vr[vrA], 1);
618     VECT_W(r, 3) = VECT_W(gCPU.vr[vrB], 1);
619    
620     gCPU.vr[vrD] = r;
621     }
622    
623     /* vmrglb Vector Merge Low Byte
624     * v.198
625     */
626     void ppc_opc_vmrglb()
627     {
628     VECTOR_DEBUG;
629     int vrD, vrA, vrB;
630     Vector_t r;
631     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
632    
633     VECT_B(r, 0) = VECT_B(gCPU.vr[vrA], 8);
634     VECT_B(r, 1) = VECT_B(gCPU.vr[vrB], 8);
635     VECT_B(r, 2) = VECT_B(gCPU.vr[vrA], 9);
636     VECT_B(r, 3) = VECT_B(gCPU.vr[vrB], 9);
637     VECT_B(r, 4) = VECT_B(gCPU.vr[vrA],10);
638     VECT_B(r, 5) = VECT_B(gCPU.vr[vrB],10);
639     VECT_B(r, 6) = VECT_B(gCPU.vr[vrA],11);
640     VECT_B(r, 7) = VECT_B(gCPU.vr[vrB],11);
641     VECT_B(r, 8) = VECT_B(gCPU.vr[vrA],12);
642     VECT_B(r, 9) = VECT_B(gCPU.vr[vrB],12);
643     VECT_B(r,10) = VECT_B(gCPU.vr[vrA],13);
644     VECT_B(r,11) = VECT_B(gCPU.vr[vrB],13);
645     VECT_B(r,12) = VECT_B(gCPU.vr[vrA],14);
646     VECT_B(r,13) = VECT_B(gCPU.vr[vrB],14);
647     VECT_B(r,14) = VECT_B(gCPU.vr[vrA],15);
648     VECT_B(r,15) = VECT_B(gCPU.vr[vrB],15);
649    
650     gCPU.vr[vrD] = r;
651     }
652    
653     /* vmrglh Vector Merge Low Half Word
654     * v.199
655     */
656     void ppc_opc_vmrglh()
657     {
658     VECTOR_DEBUG;
659     int vrD, vrA, vrB;
660     Vector_t r;
661     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
662    
663     VECT_H(r, 0) = VECT_H(gCPU.vr[vrA], 4);
664     VECT_H(r, 1) = VECT_H(gCPU.vr[vrB], 4);
665     VECT_H(r, 2) = VECT_H(gCPU.vr[vrA], 5);
666     VECT_H(r, 3) = VECT_H(gCPU.vr[vrB], 5);
667     VECT_H(r, 4) = VECT_H(gCPU.vr[vrA], 6);
668     VECT_H(r, 5) = VECT_H(gCPU.vr[vrB], 6);
669     VECT_H(r, 6) = VECT_H(gCPU.vr[vrA], 7);
670     VECT_H(r, 7) = VECT_H(gCPU.vr[vrB], 7);
671    
672     gCPU.vr[vrD] = r;
673     }
674    
675     /* vmrglw Vector Merge Low Word
676     * v.200
677     */
678     void ppc_opc_vmrglw()
679     {
680     VECTOR_DEBUG;
681     int vrD, vrA, vrB;
682     Vector_t r;
683     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
684    
685     VECT_W(r, 0) = VECT_W(gCPU.vr[vrA], 2);
686     VECT_W(r, 1) = VECT_W(gCPU.vr[vrB], 2);
687     VECT_W(r, 2) = VECT_W(gCPU.vr[vrA], 3);
688     VECT_W(r, 3) = VECT_W(gCPU.vr[vrB], 3);
689    
690     gCPU.vr[vrD] = r;
691     }
692    
693     /* vspltb Vector Splat Byte
694     * v.245
695     */
696     void ppc_opc_vspltb()
697     {
698     VECTOR_DEBUG;
699     int vrD, vrB;
700     uint32 uimm;
701     uint64 val;
702     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
703    
704     /* The documentation doesn't stipulate what a value higher than 0xf
705     * will do. Thus, this is by default an undefined value. We
706     * are thus doing this the fastest way that won't crash us.
707     */
708     val = VECT_B(gCPU.vr[vrB], uimm & 0xf);
709     val |= (val << 8);
710     val |= (val << 16);
711     val |= (val << 32);
712    
713     gCPU.vr[vrD].d[0] = val;
714     gCPU.vr[vrD].d[1] = val;
715     }
716    
717     /* vsplth Vector Splat Half Word
718     * v.246
719     */
720     void ppc_opc_vsplth()
721     {
722     VECTOR_DEBUG;
723     int vrD, vrB;
724     uint32 uimm;
725     uint64 val;
726     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
727    
728     /* The documentation doesn't stipulate what a value higher than 0x7
729     * will do. Thus, this is by default an undefined value. We
730     * are thus doing this the fastest way that won't crash us.
731     */
732     val = VECT_H(gCPU.vr[vrB], uimm & 0x7);
733     val |= (val << 16);
734     val |= (val << 32);
735    
736     gCPU.vr[vrD].d[0] = val;
737     gCPU.vr[vrD].d[1] = val;
738     }
739    
740     /* vspltw Vector Splat Word
741     * v.250
742     */
743     void ppc_opc_vspltw()
744     {
745     VECTOR_DEBUG;
746     int vrD, vrB;
747     uint32 uimm;
748     uint64 val;
749     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
750    
751     /* The documentation doesn't stipulate what a value higher than 0x3
752     * will do. Thus, this is by default an undefined value. We
753     * are thus doing this the fastest way that won't crash us.
754     */
755     val = VECT_W(gCPU.vr[vrB], uimm & 0x3);
756     val |= (val << 32);
757    
758     gCPU.vr[vrD].d[0] = val;
759     gCPU.vr[vrD].d[1] = val;
760     }
761    
762     /* vspltisb Vector Splat Immediate Signed Byte
763     * v.247
764     */
765     void ppc_opc_vspltisb()
766     {
767     VECTOR_DEBUG_COMMON;
768     int vrD, vrB;
769     uint32 simm;
770     uint64 val;
771     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, simm, vrB);
772     PPC_OPC_ASSERT(vrB==0);
773    
774     val = (simm & 0x10) ? (simm | 0xE0) : simm;
775     val |= (val << 8);
776     val |= (val << 16);
777     val |= (val << 32);
778    
779     gCPU.vr[vrD].d[0] = val;
780     gCPU.vr[vrD].d[1] = val;
781     }
782    
783     /* vspltish Vector Splat Immediate Signed Half Word
784     * v.248
785     */
786     void ppc_opc_vspltish()
787     {
788     VECTOR_DEBUG_COMMON;
789     int vrD, vrB;
790     uint32 simm;
791     uint64 val;
792     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, simm, vrB);
793     PPC_OPC_ASSERT(vrB==0);
794    
795     val = (simm & 0x10) ? (simm | 0xFFE0) : simm;
796     val |= (val << 16);
797     val |= (val << 32);
798    
799     gCPU.vr[vrD].d[0] = val;
800     gCPU.vr[vrD].d[1] = val;
801     }
802    
803     /* vspltisw Vector Splat Immediate Signed Word
804     * v.249
805     */
806     void ppc_opc_vspltisw()
807     {
808     VECTOR_DEBUG_COMMON;
809     int vrD, vrB;
810     uint32 simm;
811     uint64 val;
812     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, simm, vrB);
813     PPC_OPC_ASSERT(vrB==0);
814    
815     val = (simm & 0x10) ? (simm | 0xFFFFFFE0) : simm;
816     val |= (val << 32);
817    
818     gCPU.vr[vrD].d[0] = val;
819     gCPU.vr[vrD].d[1] = val;
820     }
821    
822     /* mfvscr Move from Vector Status and Control Register
823     * v.129
824     */
825     void ppc_opc_mfvscr()
826     {
827     VECTOR_DEBUG_COMMON;
828     int vrD, vrA, vrB;
829     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
830     PPC_OPC_ASSERT(vrA==0);
831     PPC_OPC_ASSERT(vrB==0);
832    
833     VECT_W(gCPU.vr[vrD], 3) = gCPU.vscr;
834     VECT_W(gCPU.vr[vrD], 2) = 0;
835     VECT_D(gCPU.vr[vrD], 0) = 0;
836     }
837    
838     /* mtvscr Move to Vector Status and Control Register
839     * v.130
840     */
841     void ppc_opc_mtvscr()
842     {
843     VECTOR_DEBUG_COMMON;
844     int vrD, vrA, vrB;
845     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
846     PPC_OPC_ASSERT(vrA==0);
847     PPC_OPC_ASSERT(vrD==0);
848    
849     gCPU.vscr = VECT_W(gCPU.vr[vrB], 3);
850     }
851    
852     /* vpkuhum Vector Pack Unsigned Half Word Unsigned Modulo
853     * v.224
854     */
855     void ppc_opc_vpkuhum()
856     {
857     VECTOR_DEBUG;
858     int vrD, vrA, vrB;
859     Vector_t r;
860     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
861    
862     VECT_B(r, 0) = VECT_B(gCPU.vr[vrA], 1);
863     VECT_B(r, 1) = VECT_B(gCPU.vr[vrA], 3);
864     VECT_B(r, 2) = VECT_B(gCPU.vr[vrA], 5);
865     VECT_B(r, 3) = VECT_B(gCPU.vr[vrA], 7);
866     VECT_B(r, 4) = VECT_B(gCPU.vr[vrA], 9);
867     VECT_B(r, 5) = VECT_B(gCPU.vr[vrA],11);
868     VECT_B(r, 6) = VECT_B(gCPU.vr[vrA],13);
869     VECT_B(r, 7) = VECT_B(gCPU.vr[vrA],15);
870    
871     VECT_B(r, 8) = VECT_B(gCPU.vr[vrB], 1);
872     VECT_B(r, 9) = VECT_B(gCPU.vr[vrB], 3);
873     VECT_B(r,10) = VECT_B(gCPU.vr[vrB], 5);
874     VECT_B(r,11) = VECT_B(gCPU.vr[vrB], 7);
875     VECT_B(r,12) = VECT_B(gCPU.vr[vrB], 9);
876     VECT_B(r,13) = VECT_B(gCPU.vr[vrB],11);
877     VECT_B(r,14) = VECT_B(gCPU.vr[vrB],13);
878     VECT_B(r,15) = VECT_B(gCPU.vr[vrB],15);
879    
880     gCPU.vr[vrD] = r;
881     }
882    
883     /* vpkuwum Vector Pack Unsigned Word Unsigned Modulo
884     * v.226
885     */
886     void ppc_opc_vpkuwum()
887     {
888     VECTOR_DEBUG;
889     int vrD, vrA, vrB;
890     Vector_t r;
891     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
892    
893     VECT_H(r, 0) = VECT_H(gCPU.vr[vrA], 1);
894     VECT_H(r, 1) = VECT_H(gCPU.vr[vrA], 3);
895     VECT_H(r, 2) = VECT_H(gCPU.vr[vrA], 5);
896     VECT_H(r, 3) = VECT_H(gCPU.vr[vrA], 7);
897    
898     VECT_H(r, 4) = VECT_H(gCPU.vr[vrB], 1);
899     VECT_H(r, 5) = VECT_H(gCPU.vr[vrB], 3);
900     VECT_H(r, 6) = VECT_H(gCPU.vr[vrB], 5);
901     VECT_H(r, 7) = VECT_H(gCPU.vr[vrB], 7);
902    
903     gCPU.vr[vrD] = r;
904     }
905    
906     /* vpkpx Vector Pack Pixel32
907     * v.219
908     */
909     void ppc_opc_vpkpx()
910     {
911     VECTOR_DEBUG;
912     int vrD, vrA, vrB;
913     Vector_t r;
914     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
915    
916     VECT_H(r, 0) = PACK_PIXEL(VECT_W(gCPU.vr[vrA], 0));
917     VECT_H(r, 1) = PACK_PIXEL(VECT_W(gCPU.vr[vrA], 1));
918     VECT_H(r, 2) = PACK_PIXEL(VECT_W(gCPU.vr[vrA], 2));
919     VECT_H(r, 3) = PACK_PIXEL(VECT_W(gCPU.vr[vrA], 3));
920    
921     VECT_H(r, 4) = PACK_PIXEL(VECT_W(gCPU.vr[vrB], 0));
922     VECT_H(r, 5) = PACK_PIXEL(VECT_W(gCPU.vr[vrB], 1));
923     VECT_H(r, 6) = PACK_PIXEL(VECT_W(gCPU.vr[vrB], 2));
924     VECT_H(r, 7) = PACK_PIXEL(VECT_W(gCPU.vr[vrB], 3));
925    
926     gCPU.vr[vrD] = r;
927     }
928    
929    
930     /* vpkuhus Vector Pack Unsigned Half Word Unsigned Saturate
931     * v.225
932     */
933     void ppc_opc_vpkuhus()
934     {
935     VECTOR_DEBUG;
936     int vrD, vrA, vrB;
937     Vector_t r;
938     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
939    
940     VECT_B(r, 0) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 0));
941     VECT_B(r, 1) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 1));
942     VECT_B(r, 2) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 2));
943     VECT_B(r, 3) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 3));
944     VECT_B(r, 4) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 4));
945     VECT_B(r, 5) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 5));
946     VECT_B(r, 6) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 6));
947     VECT_B(r, 7) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 7));
948    
949     VECT_B(r, 8) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 0));
950     VECT_B(r, 9) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 1));
951     VECT_B(r,10) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 2));
952     VECT_B(r,11) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 3));
953     VECT_B(r,12) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 4));
954     VECT_B(r,13) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 5));
955     VECT_B(r,14) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 6));
956     VECT_B(r,15) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 7));
957    
958     gCPU.vr[vrD] = r;
959     }
960    
961     /* vpkshss Vector Pack Signed Half Word Signed Saturate
962     * v.220
963     */
964     void ppc_opc_vpkshss()
965     {
966     VECTOR_DEBUG;
967     int vrD, vrA, vrB;
968     Vector_t r;
969     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
970    
971     VECT_B(r, 0) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 0));
972     VECT_B(r, 1) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 1));
973     VECT_B(r, 2) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 2));
974     VECT_B(r, 3) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 3));
975     VECT_B(r, 4) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 4));
976     VECT_B(r, 5) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 5));
977     VECT_B(r, 6) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 6));
978     VECT_B(r, 7) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 7));
979    
980     VECT_B(r, 8) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 0));
981     VECT_B(r, 9) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 1));
982     VECT_B(r,10) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 2));
983     VECT_B(r,11) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 3));
984     VECT_B(r,12) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 4));
985     VECT_B(r,13) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 5));
986     VECT_B(r,14) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 6));
987     VECT_B(r,15) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 7));
988    
989     gCPU.vr[vrD] = r;
990     }
991    
992     /* vpkuwus Vector Pack Unsigned Word Unsigned Saturate
993     * v.227
994     */
995     void ppc_opc_vpkuwus()
996     {
997     VECTOR_DEBUG;
998     int vrD, vrA, vrB;
999     Vector_t r;
1000     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1001    
1002     VECT_H(r, 0) = SATURATE_UH(VECT_W(gCPU.vr[vrA], 0));
1003     VECT_H(r, 1) = SATURATE_UH(VECT_W(gCPU.vr[vrA], 1));
1004     VECT_H(r, 2) = SATURATE_UH(VECT_W(gCPU.vr[vrA], 2));
1005     VECT_H(r, 3) = SATURATE_UH(VECT_W(gCPU.vr[vrA], 3));
1006    
1007     VECT_H(r, 4) = SATURATE_UH(VECT_W(gCPU.vr[vrB], 0));
1008     VECT_H(r, 5) = SATURATE_UH(VECT_W(gCPU.vr[vrB], 1));
1009     VECT_H(r, 6) = SATURATE_UH(VECT_W(gCPU.vr[vrB], 2));
1010     VECT_H(r, 7) = SATURATE_UH(VECT_W(gCPU.vr[vrB], 3));
1011    
1012     gCPU.vr[vrD] = r;
1013     }
1014    
1015     /* vpkswss Vector Pack Signed Word Signed Saturate
1016     * v.222
1017     */
1018     void ppc_opc_vpkswss()
1019     {
1020     VECTOR_DEBUG;
1021     int vrD, vrA, vrB;
1022     Vector_t r;
1023     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1024    
1025     VECT_H(r, 0) = SATURATE_SH(VECT_W(gCPU.vr[vrA], 0));
1026     VECT_H(r, 1) = SATURATE_SH(VECT_W(gCPU.vr[vrA], 1));
1027     VECT_H(r, 2) = SATURATE_SH(VECT_W(gCPU.vr[vrA], 2));
1028     VECT_H(r, 3) = SATURATE_SH(VECT_W(gCPU.vr[vrA], 3));
1029    
1030     VECT_H(r, 4) = SATURATE_SH(VECT_W(gCPU.vr[vrB], 0));
1031     VECT_H(r, 5) = SATURATE_SH(VECT_W(gCPU.vr[vrB], 1));
1032     VECT_H(r, 6) = SATURATE_SH(VECT_W(gCPU.vr[vrB], 2));
1033     VECT_H(r, 7) = SATURATE_SH(VECT_W(gCPU.vr[vrB], 3));
1034    
1035     gCPU.vr[vrD] = r;
1036     }
1037    
1038     /* vpkshus Vector Pack Signed Half Word Unsigned Saturate
1039     * v.221
1040     */
1041     void ppc_opc_vpkshus()
1042     {
1043     VECTOR_DEBUG;
1044     int vrD, vrA, vrB;
1045     Vector_t r;
1046     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1047    
1048     VECT_B(r, 0) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 0));
1049     VECT_B(r, 1) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 1));
1050     VECT_B(r, 2) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 2));
1051     VECT_B(r, 3) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 3));
1052     VECT_B(r, 4) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 4));
1053     VECT_B(r, 5) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 5));
1054     VECT_B(r, 6) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 6));
1055     VECT_B(r, 7) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 7));
1056    
1057     VECT_B(r, 8) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 0));
1058     VECT_B(r, 9) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 1));
1059     VECT_B(r,10) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 2));
1060     VECT_B(r,11) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 3));
1061     VECT_B(r,12) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 4));
1062     VECT_B(r,13) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 5));
1063     VECT_B(r,14) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 6));
1064     VECT_B(r,15) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 7));
1065    
1066     gCPU.vr[vrD] = r;
1067     }
1068    
1069     /* vpkswus Vector Pack Signed Word Unsigned Saturate
1070     * v.223
1071     */
1072     void ppc_opc_vpkswus()
1073     {
1074     VECTOR_DEBUG;
1075     int vrD, vrA, vrB;
1076     Vector_t r;
1077     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1078    
1079     VECT_H(r, 0) = SATURATE_USH(VECT_W(gCPU.vr[vrA], 0));
1080     VECT_H(r, 1) = SATURATE_USH(VECT_W(gCPU.vr[vrA], 1));
1081     VECT_H(r, 2) = SATURATE_USH(VECT_W(gCPU.vr[vrA], 2));
1082     VECT_H(r, 3) = SATURATE_USH(VECT_W(gCPU.vr[vrA], 3));
1083    
1084     VECT_H(r, 4) = SATURATE_USH(VECT_W(gCPU.vr[vrB], 0));
1085     VECT_H(r, 5) = SATURATE_USH(VECT_W(gCPU.vr[vrB], 1));
1086     VECT_H(r, 6) = SATURATE_USH(VECT_W(gCPU.vr[vrB], 2));
1087     VECT_H(r, 7) = SATURATE_USH(VECT_W(gCPU.vr[vrB], 3));
1088    
1089     gCPU.vr[vrD] = r;
1090     }
1091    
1092     /* vupkhsb Vector Unpack High Signed Byte
1093     * v.277
1094     */
1095     void ppc_opc_vupkhsb()
1096     {
1097     VECTOR_DEBUG;
1098     int vrD, vrA, vrB;
1099     Vector_t r;
1100     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1101     PPC_OPC_ASSERT(vrA==0);
1102    
1103     VECT_SH(r, 0) = VECT_SB(gCPU.vr[vrB], 0);
1104     VECT_SH(r, 1) = VECT_SB(gCPU.vr[vrB], 1);
1105     VECT_SH(r, 2) = VECT_SB(gCPU.vr[vrB], 2);
1106     VECT_SH(r, 3) = VECT_SB(gCPU.vr[vrB], 3);
1107     VECT_SH(r, 4) = VECT_SB(gCPU.vr[vrB], 4);
1108     VECT_SH(r, 5) = VECT_SB(gCPU.vr[vrB], 5);
1109     VECT_SH(r, 6) = VECT_SB(gCPU.vr[vrB], 6);
1110     VECT_SH(r, 7) = VECT_SB(gCPU.vr[vrB], 7);
1111    
1112     gCPU.vr[vrD] = r;
1113     }
1114    
1115     /* vupkhpx Vector Unpack High Pixel32
1116     * v.279
1117     */
1118     void ppc_opc_vupkhpx()
1119     {
1120     VECTOR_DEBUG;
1121     int vrD, vrA, vrB;
1122     Vector_t r;
1123     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1124     PPC_OPC_ASSERT(vrA==0);
1125    
1126     VECT_W(r, 0) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 0));
1127     VECT_W(r, 1) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 1));
1128     VECT_W(r, 2) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 2));
1129     VECT_W(r, 3) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 3));
1130    
1131     gCPU.vr[vrD] = r;
1132     }
1133    
1134     /* vupkhsh Vector Unpack High Signed Half Word
1135     * v.278
1136     */
1137     void ppc_opc_vupkhsh()
1138     {
1139     VECTOR_DEBUG;
1140     int vrD, vrA, vrB;
1141     Vector_t r;
1142     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1143     PPC_OPC_ASSERT(vrA==0);
1144    
1145     VECT_SW(r, 0) = VECT_SH(gCPU.vr[vrB], 0);
1146     VECT_SW(r, 1) = VECT_SH(gCPU.vr[vrB], 1);
1147     VECT_SW(r, 2) = VECT_SH(gCPU.vr[vrB], 2);
1148     VECT_SW(r, 3) = VECT_SH(gCPU.vr[vrB], 3);
1149    
1150     gCPU.vr[vrD] = r;
1151     }
1152    
1153     /* vupklsb Vector Unpack Low Signed Byte
1154     * v.280
1155     */
1156     void ppc_opc_vupklsb()
1157     {
1158     VECTOR_DEBUG;
1159     int vrD, vrA, vrB;
1160     Vector_t r;
1161     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1162     PPC_OPC_ASSERT(vrA==0);
1163    
1164     VECT_SH(r, 0) = VECT_SB(gCPU.vr[vrB], 8);
1165     VECT_SH(r, 1) = VECT_SB(gCPU.vr[vrB], 9);
1166     VECT_SH(r, 2) = VECT_SB(gCPU.vr[vrB],10);
1167     VECT_SH(r, 3) = VECT_SB(gCPU.vr[vrB],11);
1168     VECT_SH(r, 4) = VECT_SB(gCPU.vr[vrB],12);
1169     VECT_SH(r, 5) = VECT_SB(gCPU.vr[vrB],13);
1170     VECT_SH(r, 6) = VECT_SB(gCPU.vr[vrB],14);
1171     VECT_SH(r, 7) = VECT_SB(gCPU.vr[vrB],15);
1172    
1173     gCPU.vr[vrD] = r;
1174     }
1175    
1176     /* vupklpx Vector Unpack Low Pixel32
1177     * v.279
1178     */
1179     void ppc_opc_vupklpx()
1180     {
1181     VECTOR_DEBUG;
1182     int vrD, vrA, vrB;
1183     Vector_t r;
1184     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1185     PPC_OPC_ASSERT(vrA==0);
1186    
1187     VECT_W(r, 0) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 4));
1188     VECT_W(r, 1) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 5));
1189     VECT_W(r, 2) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 6));
1190     VECT_W(r, 3) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 7));
1191    
1192     gCPU.vr[vrD] = r;
1193     }
1194    
1195     /* vupklsh Vector Unpack Low Signed Half Word
1196     * v.281
1197     */
1198     void ppc_opc_vupklsh()
1199     {
1200     VECTOR_DEBUG;
1201     int vrD, vrA, vrB;
1202     Vector_t r;
1203     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1204     PPC_OPC_ASSERT(vrA==0);
1205    
1206     VECT_SW(r, 0) = VECT_SH(gCPU.vr[vrB], 4);
1207     VECT_SW(r, 1) = VECT_SH(gCPU.vr[vrB], 5);
1208     VECT_SW(r, 2) = VECT_SH(gCPU.vr[vrB], 6);
1209     VECT_SW(r, 3) = VECT_SH(gCPU.vr[vrB], 7);
1210    
1211     gCPU.vr[vrD] = r;
1212     }
1213    
1214     /* vaddubm Vector Add Unsigned Byte Modulo
1215     * v.141
1216     */
1217     void ppc_opc_vaddubm()
1218     {
1219     VECTOR_DEBUG;
1220     int vrD, vrA, vrB;
1221     uint8 res;
1222     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1223    
1224     for (int i=0; i<16; i++) {
1225     res = gCPU.vr[vrA].b[i] + gCPU.vr[vrB].b[i];
1226     gCPU.vr[vrD].b[i] = res;
1227     }
1228     }
1229    
1230     /* vadduhm Vector Add Unsigned Half Word Modulo
1231     * v.143
1232     */
1233     void ppc_opc_vadduhm()
1234     {
1235     VECTOR_DEBUG;
1236     int vrD, vrA, vrB;
1237     uint16 res;
1238     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1239    
1240     for (int i=0; i<8; i++) {
1241     res = gCPU.vr[vrA].h[i] + gCPU.vr[vrB].h[i];
1242     gCPU.vr[vrD].h[i] = res;
1243     }
1244     }
1245    
1246     /* vadduwm Vector Add Unsigned Word Modulo
1247     * v.145
1248     */
1249     void ppc_opc_vadduwm()
1250     {
1251     VECTOR_DEBUG;
1252     int vrD, vrA, vrB;
1253     uint32 res;
1254     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1255    
1256     for (int i=0; i<4; i++) {
1257     res = gCPU.vr[vrA].w[i] + gCPU.vr[vrB].w[i];
1258     gCPU.vr[vrD].w[i] = res;
1259     }
1260     }
1261    
1262     /* vaddfp Vector Add Float Point
1263     * v.137
1264     */
1265     void ppc_opc_vaddfp()
1266     {
1267     VECTOR_DEBUG;
1268     int vrD, vrA, vrB;
1269     float res;
1270     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1271    
1272     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
1273     res = gCPU.vr[vrA].f[i] + gCPU.vr[vrB].f[i];
1274     gCPU.vr[vrD].f[i] = res;
1275     }
1276     }
1277    
1278     /* vaddcuw Vector Add Carryout Unsigned Word
1279     * v.136
1280     */
1281     void ppc_opc_vaddcuw()
1282     {
1283     VECTOR_DEBUG;
1284     int vrD, vrA, vrB;
1285     uint32 res;
1286     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1287    
1288     for (int i=0; i<4; i++) {
1289     res = gCPU.vr[vrA].w[i] + gCPU.vr[vrB].w[i];
1290     gCPU.vr[vrD].w[i] = (res < gCPU.vr[vrA].w[i]) ? 1 : 0;
1291     }
1292     }
1293    
1294     /* vaddubs Vector Add Unsigned Byte Saturate
1295     * v.142
1296     */
1297     void ppc_opc_vaddubs()
1298     {
1299     VECTOR_DEBUG;
1300     int vrD, vrA, vrB;
1301     uint16 res;
1302     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1303    
1304     for (int i=0; i<16; i++) {
1305     res = (uint16)gCPU.vr[vrA].b[i] + (uint16)gCPU.vr[vrB].b[i];
1306     gCPU.vr[vrD].b[i] = SATURATE_UB(res);
1307     }
1308     }
1309    
1310     /* vaddsbs Vector Add Signed Byte Saturate
1311     * v.138
1312     */
1313     void ppc_opc_vaddsbs()
1314     {
1315     VECTOR_DEBUG;
1316     int vrD, vrA, vrB;
1317     sint16 res;
1318     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1319    
1320     for (int i=0; i<16; i++) {
1321     res = (sint16)gCPU.vr[vrA].sb[i] + (sint16)gCPU.vr[vrB].sb[i];
1322     gCPU.vr[vrD].b[i] = SATURATE_SB(res);
1323     }
1324     }
1325    
1326     /* vadduhs Vector Add Unsigned Half Word Saturate
1327     * v.144
1328     */
1329     void ppc_opc_vadduhs()
1330     {
1331     VECTOR_DEBUG;
1332     int vrD, vrA, vrB;
1333     uint32 res;
1334     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1335    
1336     for (int i=0; i<8; i++) {
1337     res = (uint32)gCPU.vr[vrA].h[i] + (uint32)gCPU.vr[vrB].h[i];
1338     gCPU.vr[vrD].h[i] = SATURATE_UH(res);
1339     }
1340     }
1341    
1342     /* vaddshs Vector Add Signed Half Word Saturate
1343     * v.139
1344     */
1345     void ppc_opc_vaddshs()
1346     {
1347     VECTOR_DEBUG;
1348     int vrD, vrA, vrB;
1349     sint32 res;
1350     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1351    
1352     for (int i=0; i<8; i++) {
1353     res = (sint32)gCPU.vr[vrA].sh[i] + (sint32)gCPU.vr[vrB].sh[i];
1354     gCPU.vr[vrD].h[i] = SATURATE_SH(res);
1355     }
1356     }
1357    
1358     /* vadduws Vector Add Unsigned Word Saturate
1359     * v.146
1360     */
1361     void ppc_opc_vadduws()
1362     {
1363     VECTOR_DEBUG;
1364     int vrD, vrA, vrB;
1365     uint32 res;
1366     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1367    
1368     for (int i=0; i<4; i++) {
1369     res = gCPU.vr[vrA].w[i] + gCPU.vr[vrB].w[i];
1370    
1371     // We do this to prevent us from having to do 64-bit math
1372     if (res < gCPU.vr[vrA].w[i]) {
1373     res = 0xFFFFFFFF;
1374     gCPU.vscr |= VSCR_SAT;
1375     }
1376    
1377     /* 64-bit math | 32-bit hack
1378     * ------------------------+-------------------------------------
1379     * add, addc (a+b) | add (a+b)
1380     * sub, subb (r>ub) | sub (r<a)
1381     */
1382    
1383     gCPU.vr[vrD].w[i] = res;
1384     }
1385     }
1386    
1387     /* vaddsws Vector Add Signed Word Saturate
1388     * v.140
1389     */
1390     void ppc_opc_vaddsws()
1391     {
1392     VECTOR_DEBUG;
1393     int vrD, vrA, vrB;
1394     uint32 res;
1395     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1396    
1397     for (int i=0; i<4; i++) {
1398     res = gCPU.vr[vrA].w[i] + gCPU.vr[vrB].w[i];
1399    
1400     // We do this to prevent us from having to do 64-bit math
1401     if (((gCPU.vr[vrA].w[i] ^ gCPU.vr[vrB].w[i]) & SIGN32) == 0) {
1402     // the signs of both operands are the same
1403    
1404     if (((res ^ gCPU.vr[vrA].w[i]) & SIGN32) != 0) {
1405     // sign of result != sign of operands
1406    
1407     // if res is negative, should have been positive
1408     res = (res & SIGN32) ? (SIGN32 - 1) : SIGN32;
1409     gCPU.vscr |= VSCR_SAT;
1410     }
1411     }
1412    
1413     /* 64-bit math | 32-bit hack
1414     * ------------------------+-------------------------------------
1415     * add, addc (a+b) | add (a+b)
1416     * sub, subb (r>ub) | xor, and (sign == sign)
1417     * sub, subb (r<lb) | xor, and (sign != sign)
1418     * | and (which)
1419     */
1420    
1421     gCPU.vr[vrD].w[i] = res;
1422     }
1423     }
1424    
1425     /* vsububm Vector Subtract Unsigned Byte Modulo
1426     * v.265
1427     */
1428     void ppc_opc_vsububm()
1429     {
1430     VECTOR_DEBUG;
1431     int vrD, vrA, vrB;
1432     uint8 res;
1433     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1434    
1435     for (int i=0; i<16; i++) {
1436     res = gCPU.vr[vrA].b[i] - gCPU.vr[vrB].b[i];
1437     gCPU.vr[vrD].b[i] = res;
1438     }
1439     }
1440    
1441     /* vsubuhm Vector Subtract Unsigned Half Word Modulo
1442     * v.267
1443     */
1444     void ppc_opc_vsubuhm()
1445     {
1446     VECTOR_DEBUG;
1447     int vrD, vrA, vrB;
1448     uint16 res;
1449     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1450    
1451     for (int i=0; i<8; i++) {
1452     res = gCPU.vr[vrA].h[i] - gCPU.vr[vrB].h[i];
1453     gCPU.vr[vrD].h[i] = res;
1454     }
1455     }
1456    
1457     /* vsubuwm Vector Subtract Unsigned Word Modulo
1458     * v.269
1459     */
1460     void ppc_opc_vsubuwm()
1461     {
1462     VECTOR_DEBUG;
1463     int vrD, vrA, vrB;
1464     uint32 res;
1465     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1466    
1467     for (int i=0; i<4; i++) {
1468     res = gCPU.vr[vrA].w[i] - gCPU.vr[vrB].w[i];
1469     gCPU.vr[vrD].w[i] = res;
1470     }
1471     }
1472    
1473     /* vsubfp Vector Subtract Float Point
1474     * v.261
1475     */
1476     void ppc_opc_vsubfp()
1477     {
1478     VECTOR_DEBUG;
1479     int vrD, vrA, vrB;
1480     float res;
1481     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1482    
1483     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
1484     res = gCPU.vr[vrA].f[i] - gCPU.vr[vrB].f[i];
1485     gCPU.vr[vrD].f[i] = res;
1486     }
1487     }
1488    
1489     /* vsubcuw Vector Subtract Carryout Unsigned Word
1490     * v.260
1491     */
1492     void ppc_opc_vsubcuw()
1493     {
1494     VECTOR_DEBUG;
1495     int vrD, vrA, vrB;
1496     uint32 res;
1497     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1498    
1499     for (int i=0; i<4; i++) {
1500     res = gCPU.vr[vrA].w[i] - gCPU.vr[vrB].w[i];
1501     gCPU.vr[vrD].w[i] = (res <= gCPU.vr[vrA].w[i]) ? 1 : 0;
1502     }
1503     }
1504    
1505     /* vsububs Vector Subtract Unsigned Byte Saturate
1506     * v.266
1507     */
1508     void ppc_opc_vsububs()
1509     {
1510     VECTOR_DEBUG;
1511     int vrD, vrA, vrB;
1512     uint16 res;
1513     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1514    
1515     for (int i=0; i<16; i++) {
1516     res = (uint16)gCPU.vr[vrA].b[i] - (uint16)gCPU.vr[vrB].b[i];
1517    
1518     gCPU.vr[vrD].b[i] = SATURATE_0B(res);
1519     }
1520     }
1521    
1522     /* vsubsbs Vector Subtract Signed Byte Saturate
1523     * v.262
1524     */
1525     void ppc_opc_vsubsbs()
1526     {
1527     VECTOR_DEBUG;
1528     int vrD, vrA, vrB;
1529     sint16 res;
1530     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1531    
1532     for (int i=0; i<16; i++) {
1533     res = (sint16)gCPU.vr[vrA].sb[i] - (sint16)gCPU.vr[vrB].sb[i];
1534    
1535     gCPU.vr[vrD].sb[i] = SATURATE_SB(res);
1536     }
1537     }
1538    
1539     /* vsubuhs Vector Subtract Unsigned Half Word Saturate
1540     * v.268
1541     */
1542     void ppc_opc_vsubuhs()
1543     {
1544     VECTOR_DEBUG;
1545     int vrD, vrA, vrB;
1546     uint32 res;
1547     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1548    
1549     for (int i=0; i<8; i++) {
1550     res = (uint32)gCPU.vr[vrA].h[i] - (uint32)gCPU.vr[vrB].h[i];
1551    
1552     gCPU.vr[vrD].h[i] = SATURATE_0H(res);
1553     }
1554     }
1555    
1556     /* vsubshs Vector Subtract Signed Half Word Saturate
1557     * v.263
1558     */
1559     void ppc_opc_vsubshs()
1560     {
1561     VECTOR_DEBUG;
1562     int vrD, vrA, vrB;
1563     sint32 res;
1564     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1565    
1566     for (int i=0; i<8; i++) {
1567     res = (sint32)gCPU.vr[vrA].sh[i] - (sint32)gCPU.vr[vrB].sh[i];
1568    
1569     gCPU.vr[vrD].sh[i] = SATURATE_SH(res);
1570     }
1571     }
1572    
1573     /* vsubuws Vector Subtract Unsigned Word Saturate
1574     * v.270
1575     */
1576     void ppc_opc_vsubuws()
1577     {
1578     VECTOR_DEBUG;
1579     int vrD, vrA, vrB;
1580     uint32 res;
1581     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1582    
1583     for (int i=0; i<4; i++) {
1584     res = gCPU.vr[vrA].w[i] - gCPU.vr[vrB].w[i];
1585    
1586     // We do this to prevent us from having to do 64-bit math
1587     if (res > gCPU.vr[vrA].w[i]) {
1588     res = 0;
1589     gCPU.vscr |= VSCR_SAT;
1590     }
1591    
1592     /* 64-bit math | 32-bit hack
1593     * ------------------------+-------------------------------------
1594     * sub, subb (a+b) | sub (a+b)
1595     * sub, subb (r>ub) | sub (r<a)
1596     */
1597    
1598     gCPU.vr[vrD].w[i] = res;
1599     }
1600     }
1601    
1602     /* vsubsws Vector Subtract Signed Word Saturate
1603     * v.264
1604     */
1605     void ppc_opc_vsubsws()
1606     {
1607     VECTOR_DEBUG;
1608     int vrD, vrA, vrB;
1609     uint32 res, tmp;
1610     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1611    
1612     for (int i=0; i<4; i++) {
1613     tmp = -gCPU.vr[vrB].w[i];
1614     res = gCPU.vr[vrA].w[i] + tmp;
1615    
1616     // We do this to prevent us from having to do 64-bit math
1617     if (((gCPU.vr[vrA].w[i] ^ tmp) & SIGN32) == 0) {
1618     // the signs of both operands are the same
1619    
1620     if (((res ^ tmp) & SIGN32) != 0) {
1621     // sign of result != sign of operands
1622    
1623     // if res is negative, should have been positive
1624     res = (res & SIGN32) ? (SIGN32 - 1) : SIGN32;
1625     gCPU.vscr |= VSCR_SAT;
1626     }
1627     }
1628    
1629     /* 64-bit math | 32-bit hack
1630     * ------------------------+-------------------------------------
1631     * sub, subc (a+b) | neg, add (a-b)
1632     * sub, subb (r>ub) | xor, and (sign == sign)
1633     * sub, subb (r<lb) | xor, and (sign != sign)
1634     * | and (which)
1635     */
1636    
1637     gCPU.vr[vrD].w[i] = res;
1638     }
1639     }
1640    
1641     /* vmuleub Vector Multiply Even Unsigned Byte
1642     * v.209
1643     */
1644     void ppc_opc_vmuleub()
1645     {
1646     VECTOR_DEBUG;
1647     int vrD, vrA, vrB;
1648     uint16 res;
1649     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1650    
1651     for (int i=0; i<8; i++) {
1652     res = (uint16)gCPU.vr[vrA].b[VECT_EVEN(i)] *
1653     (uint16)gCPU.vr[vrB].b[VECT_EVEN(i)];
1654    
1655     gCPU.vr[vrD].h[i] = res;
1656     }
1657     }
1658    
1659     /* vmulesb Vector Multiply Even Signed Byte
1660     * v.207
1661     */
1662     void ppc_opc_vmulesb()
1663     {
1664     VECTOR_DEBUG;
1665     int vrD, vrA, vrB;
1666     sint16 res;
1667     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1668    
1669     for (int i=0; i<8; i++) {
1670     res = (sint16)gCPU.vr[vrA].sb[VECT_EVEN(i)] *
1671     (sint16)gCPU.vr[vrB].sb[VECT_EVEN(i)];
1672    
1673     gCPU.vr[vrD].sh[i] = res;
1674     }
1675     }
1676    
1677     /* vmuleuh Vector Multiply Even Unsigned Half Word
1678     * v.210
1679     */
1680     void ppc_opc_vmuleuh()
1681     {
1682     VECTOR_DEBUG;
1683     int vrD, vrA, vrB;
1684     uint32 res;
1685     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1686    
1687     for (int i=0; i<4; i++) {
1688     res = (uint32)gCPU.vr[vrA].h[VECT_EVEN(i)] *
1689     (uint32)gCPU.vr[vrB].h[VECT_EVEN(i)];
1690    
1691     gCPU.vr[vrD].w[i] = res;
1692     }
1693     }
1694    
1695     /* vmulesh Vector Multiply Even Signed Half Word
1696     * v.208
1697     */
1698     void ppc_opc_vmulesh()
1699     {
1700     VECTOR_DEBUG;
1701     int vrD, vrA, vrB;
1702     sint32 res;
1703     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1704    
1705     for (int i=0; i<4; i++) {
1706     res = (sint32)gCPU.vr[vrA].sh[VECT_EVEN(i)] *
1707     (sint32)gCPU.vr[vrB].sh[VECT_EVEN(i)];
1708    
1709     gCPU.vr[vrD].sw[i] = res;
1710     }
1711     }
1712    
1713     /* vmuloub Vector Multiply Odd Unsigned Byte
1714     * v.213
1715     */
1716     void ppc_opc_vmuloub()
1717     {
1718     VECTOR_DEBUG;
1719     int vrD, vrA, vrB;
1720     uint16 res;
1721     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1722    
1723     for (int i=0; i<8; i++) {
1724     res = (uint16)gCPU.vr[vrA].b[VECT_ODD(i)] *
1725     (uint16)gCPU.vr[vrB].b[VECT_ODD(i)];
1726    
1727     gCPU.vr[vrD].h[i] = res;
1728     }
1729     }
1730    
1731     /* vmulosb Vector Multiply Odd Signed Byte
1732     * v.211
1733     */
1734     void ppc_opc_vmulosb()
1735     {
1736     VECTOR_DEBUG;
1737     int vrD, vrA, vrB;
1738     sint16 res;
1739     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1740    
1741     for (int i=0; i<8; i++) {
1742     res = (sint16)gCPU.vr[vrA].sb[VECT_ODD(i)] *
1743     (sint16)gCPU.vr[vrB].sb[VECT_ODD(i)];
1744    
1745     gCPU.vr[vrD].sh[i] = res;
1746     }
1747     }
1748    
1749     /* vmulouh Vector Multiply Odd Unsigned Half Word
1750     * v.214
1751     */
1752     void ppc_opc_vmulouh()
1753     {
1754     VECTOR_DEBUG;
1755     int vrD, vrA, vrB;
1756     uint32 res;
1757     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1758    
1759     for (int i=0; i<4; i++) {
1760     res = (uint32)gCPU.vr[vrA].h[VECT_ODD(i)] *
1761     (uint32)gCPU.vr[vrB].h[VECT_ODD(i)];
1762    
1763     gCPU.vr[vrD].w[i] = res;
1764     }
1765     }
1766    
1767     /* vmulosh Vector Multiply Odd Signed Half Word
1768     * v.212
1769     */
1770     void ppc_opc_vmulosh()
1771     {
1772     VECTOR_DEBUG;
1773     int vrD, vrA, vrB;
1774     sint32 res;
1775     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
1776    
1777     for (int i=0; i<4; i++) {
1778     res = (sint32)gCPU.vr[vrA].sh[VECT_ODD(i)] *
1779     (sint32)gCPU.vr[vrB].sh[VECT_ODD(i)];
1780    
1781     gCPU.vr[vrD].sw[i] = res;
1782     }
1783     }
1784    
1785     /* vmaddfp Vector Multiply Add Floating Point
1786     * v.177
1787     */
1788     void ppc_opc_vmaddfp()
1789     {
1790     VECTOR_DEBUG;
1791     int vrD, vrA, vrB, vrC;
1792     double res;
1793     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1794    
1795     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
1796     res = (double)gCPU.vr[vrA].f[i] * (double)gCPU.vr[vrC].f[i];
1797    
1798     res = (double)gCPU.vr[vrB].f[i] + res;
1799    
1800     gCPU.vr[vrD].f[i] = (float)res;
1801     }
1802     }
1803    
1804     /* vmhaddshs Vector Multiply High and Add Signed Half Word Saturate
1805     * v.185
1806     */
1807     void ppc_opc_vmhaddshs()
1808     {
1809     VECTOR_DEBUG;
1810     int vrD, vrA, vrB, vrC;
1811     sint32 prod;
1812     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1813    
1814     for (int i=0; i<8; i++) {
1815     prod = (sint32)gCPU.vr[vrA].sh[i] * (sint32)gCPU.vr[vrB].sh[i];
1816    
1817     prod = (prod >> 15) + (sint32)gCPU.vr[vrC].sh[i];
1818    
1819     gCPU.vr[vrD].sh[i] = SATURATE_SH(prod);
1820     }
1821     }
1822    
1823     /* vmladduhm Vector Multiply Low and Add Unsigned Half Word Modulo
1824     * v.194
1825     */
1826     void ppc_opc_vmladduhm()
1827     {
1828     VECTOR_DEBUG;
1829     int vrD, vrA, vrB, vrC;
1830     uint32 prod;
1831     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1832    
1833     for (int i=0; i<8; i++) {
1834     prod = (uint32)gCPU.vr[vrA].h[i] * (uint32)gCPU.vr[vrB].h[i];
1835    
1836     prod = prod + (uint32)gCPU.vr[vrC].h[i];
1837    
1838     gCPU.vr[vrD].h[i] = prod;
1839     }
1840     }
1841    
1842     /* vmhraddshs Vector Multiply High Round and Add Signed Half Word Saturate
1843     * v.186
1844     */
1845     void ppc_opc_vmhraddshs()
1846     {
1847     VECTOR_DEBUG;
1848     int vrD, vrA, vrB, vrC;
1849     sint32 prod;
1850     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1851    
1852     for (int i=0; i<8; i++) {
1853     prod = (sint32)gCPU.vr[vrA].sh[i] * (sint32)gCPU.vr[vrB].sh[i];
1854    
1855     prod += 0x4000;
1856     prod = (prod >> 15) + (sint32)gCPU.vr[vrC].sh[i];
1857    
1858     gCPU.vr[vrD].sh[i] = SATURATE_SH(prod);
1859     }
1860     }
1861    
1862     /* vmsumubm Vector Multiply Sum Unsigned Byte Modulo
1863     * v.204
1864     */
1865     void ppc_opc_vmsumubm()
1866     {
1867     VECTOR_DEBUG;
1868     int vrD, vrA, vrB, vrC;
1869     uint32 temp;
1870     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1871    
1872     for (int i=0; i<4; i++) {
1873     temp = gCPU.vr[vrC].w[i];
1874    
1875     temp += (uint16)gCPU.vr[vrA].b[i<<2] *
1876     (uint16)gCPU.vr[vrB].b[i<<2];
1877    
1878     temp += (uint16)gCPU.vr[vrA].b[(i<<2)+1] *
1879     (uint16)gCPU.vr[vrB].b[(i<<2)+1];
1880    
1881     temp += (uint16)gCPU.vr[vrA].b[(i<<2)+2] *
1882     (uint16)gCPU.vr[vrB].b[(i<<2)+2];
1883    
1884     temp += (uint16)gCPU.vr[vrA].b[(i<<2)+3] *
1885     (uint16)gCPU.vr[vrB].b[(i<<2)+3];
1886    
1887     gCPU.vr[vrD].w[i] = temp;
1888     }
1889     }
1890    
1891     /* vmsumuhm Vector Multiply Sum Unsigned Half Word Modulo
1892     * v.205
1893     */
1894     void ppc_opc_vmsumuhm()
1895     {
1896     VECTOR_DEBUG;
1897     int vrD, vrA, vrB, vrC;
1898     uint32 temp;
1899     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1900    
1901     for (int i=0; i<4; i++) {
1902     temp = gCPU.vr[vrC].w[i];
1903    
1904     temp += (uint32)gCPU.vr[vrA].h[i<<1] *
1905     (uint32)gCPU.vr[vrB].h[i<<1];
1906     temp += (uint32)gCPU.vr[vrA].h[(i<<1)+1] *
1907     (uint32)gCPU.vr[vrB].h[(i<<1)+1];
1908    
1909     gCPU.vr[vrD].w[i] = temp;
1910     }
1911     }
1912    
1913     /* vmsummbm Vector Multiply Sum Mixed-Sign Byte Modulo
1914     * v.201
1915     */
1916     void ppc_opc_vmsummbm()
1917     {
1918     VECTOR_DEBUG;
1919     int vrD, vrA, vrB, vrC;
1920     sint32 temp;
1921     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1922    
1923     for (int i=0; i<4; i++) {
1924     temp = gCPU.vr[vrC].sw[i];
1925    
1926     temp += (sint16)gCPU.vr[vrA].sb[i<<2] *
1927     (uint16)gCPU.vr[vrB].b[i<<2];
1928     temp += (sint16)gCPU.vr[vrA].sb[(i<<2)+1] *
1929     (uint16)gCPU.vr[vrB].b[(i<<2)+1];
1930     temp += (sint16)gCPU.vr[vrA].sb[(i<<2)+2] *
1931     (uint16)gCPU.vr[vrB].b[(i<<2)+2];
1932     temp += (sint16)gCPU.vr[vrA].sb[(i<<2)+3] *
1933     (uint16)gCPU.vr[vrB].b[(i<<2)+3];
1934    
1935     gCPU.vr[vrD].sw[i] = temp;
1936     }
1937     }
1938    
1939     /* vmsumshm Vector Multiply Sum Signed Half Word Modulo
1940     * v.202
1941     */
1942     void ppc_opc_vmsumshm()
1943     {
1944     VECTOR_DEBUG;
1945     int vrD, vrA, vrB, vrC;
1946     sint32 temp;
1947     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1948    
1949     for (int i=0; i<4; i++) {
1950     temp = gCPU.vr[vrC].sw[i];
1951    
1952     temp += (sint32)gCPU.vr[vrA].sh[i<<1] *
1953     (sint32)gCPU.vr[vrB].sh[i<<1];
1954     temp += (sint32)gCPU.vr[vrA].sh[(i<<1)+1] *
1955     (sint32)gCPU.vr[vrB].sh[(i<<1)+1];
1956    
1957     gCPU.vr[vrD].sw[i] = temp;
1958     }
1959     }
1960    
1961     /* vmsumuhs Vector Multiply Sum Unsigned Half Word Saturate
1962     * v.206
1963     */
1964     void ppc_opc_vmsumuhs()
1965     {
1966     VECTOR_DEBUG;
1967     int vrD, vrA, vrB, vrC;
1968     uint64 temp;
1969     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1970    
1971     /* For this, there's no way to get around 64-bit math. If we use
1972     * the hacks used before, then we have to do it so often, that
1973     * we'll outpace the 64-bit math in execution time.
1974     */
1975     for (int i=0; i<4; i++) {
1976     temp = gCPU.vr[vrC].w[i];
1977    
1978     temp += (uint32)gCPU.vr[vrA].h[i<<1] *
1979     (uint32)gCPU.vr[vrB].h[i<<1];
1980    
1981     temp += (uint32)gCPU.vr[vrA].h[(i<<1)+1] *
1982     (uint32)gCPU.vr[vrB].h[(i<<1)+1];
1983    
1984     gCPU.vr[vrD].w[i] = SATURATE_UW(temp);
1985     }
1986     }
1987    
1988     /* vmsumshs Vector Multiply Sum Signed Half Word Saturate
1989     * v.203
1990     */
1991     void ppc_opc_vmsumshs()
1992     {
1993     VECTOR_DEBUG;
1994     int vrD, vrA, vrB, vrC;
1995     sint64 temp;
1996     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
1997    
1998     /* For this, there's no way to get around 64-bit math. If we use
1999     * the hacks used before, then we have to do it so often, that
2000     * we'll outpace the 64-bit math in execution time.
2001     */
2002    
2003     for (int i=0; i<4; i++) {
2004     temp = gCPU.vr[vrC].sw[i];
2005    
2006     temp += (sint32)gCPU.vr[vrA].sh[i<<1] *
2007     (sint32)gCPU.vr[vrB].sh[i<<1];
2008     temp += (sint32)gCPU.vr[vrA].sh[(i<<1)+1] *
2009     (sint32)gCPU.vr[vrB].sh[(i<<1)+1];
2010    
2011     gCPU.vr[vrD].sw[i] = SATURATE_SW(temp);
2012     }
2013     }
2014    
2015     /* vsum4ubs Vector Sum Across Partial (1/4) Unsigned Byte Saturate
2016     * v.275
2017     */
2018     void ppc_opc_vsum4ubs()
2019     {
2020     VECTOR_DEBUG;
2021     int vrD, vrA, vrB;
2022     uint64 res;
2023     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2024    
2025     /* For this, there's no way to get around 64-bit math. If we use
2026     * the hacks used before, then we have to do it so often, that
2027     * we'll outpace the 64-bit math in execution time.
2028     */
2029    
2030     for (int i=0; i<4; i++) {
2031     res = (uint64)gCPU.vr[vrB].w[i];
2032    
2033     res += (uint64)gCPU.vr[vrA].b[(i<<2)];
2034     res += (uint64)gCPU.vr[vrA].b[(i<<2)+1];
2035     res += (uint64)gCPU.vr[vrA].b[(i<<2)+2];
2036     res += (uint64)gCPU.vr[vrA].b[(i<<2)+3];
2037    
2038     gCPU.vr[vrD].w[i] = SATURATE_UW(res);
2039     }
2040     }
2041    
2042     /* vsum4sbs Vector Sum Across Partial (1/4) Signed Byte Saturate
2043     * v.273
2044     */
2045     void ppc_opc_vsum4sbs()
2046     {
2047     VECTOR_DEBUG;
2048     int vrD, vrA, vrB;
2049     sint64 res;
2050     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2051    
2052     for (int i=0; i<4; i++) {
2053     res = (sint64)gCPU.vr[vrB].sw[i];
2054    
2055     res += (sint64)gCPU.vr[vrA].sb[(i<<2)];
2056     res += (sint64)gCPU.vr[vrA].sb[(i<<2)+1];
2057     res += (sint64)gCPU.vr[vrA].sb[(i<<2)+2];
2058     res += (sint64)gCPU.vr[vrA].sb[(i<<2)+3];
2059    
2060     gCPU.vr[vrD].sw[i] = SATURATE_SW(res);
2061     }
2062     }
2063    
2064     /* vsum4shs Vector Sum Across Partial (1/4) Signed Half Word Saturate
2065     * v.274
2066     */
2067     void ppc_opc_vsum4shs()
2068     {
2069     VECTOR_DEBUG;
2070     int vrD, vrA, vrB;
2071     sint64 res;
2072     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2073    
2074     for (int i=0; i<4; i++) {
2075     res = (sint64)gCPU.vr[vrB].sw[i];
2076    
2077     res += (sint64)gCPU.vr[vrA].sh[(i<<1)];
2078     res += (sint64)gCPU.vr[vrA].sh[(i<<1)+1];
2079    
2080     gCPU.vr[vrD].sw[i] = SATURATE_SW(res);
2081     }
2082     }
2083    
2084     /* vsum2sws Vector Sum Across Partial (1/2) Signed Word Saturate
2085     * v.272
2086     */
2087     void ppc_opc_vsum2sws()
2088     {
2089     VECTOR_DEBUG;
2090     int vrD, vrA, vrB;
2091     sint64 res;
2092     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2093    
2094     res = (sint64)gCPU.vr[vrA].sw[0] + (sint64)gCPU.vr[vrA].sw[1];
2095     res += (sint64)gCPU.vr[vrB].sw[VECT_ODD(0)];
2096    
2097     gCPU.vr[vrD].w[VECT_ODD(0)] = SATURATE_SW(res);
2098     gCPU.vr[vrD].w[VECT_EVEN(0)] = 0;
2099    
2100     res = (sint64)gCPU.vr[vrA].sw[2] + (sint64)gCPU.vr[vrA].sw[3];
2101     res += (sint64)gCPU.vr[vrB].sw[VECT_ODD(1)];
2102    
2103     gCPU.vr[vrD].w[VECT_ODD(1)] = SATURATE_SW(res);
2104     gCPU.vr[vrD].w[VECT_EVEN(1)] = 0;
2105     }
2106    
2107     /* vsumsws Vector Sum Across Signed Word Saturate
2108     * v.271
2109     */
2110     void ppc_opc_vsumsws()
2111     {
2112     VECTOR_DEBUG;
2113     int vrD, vrA, vrB;
2114     sint64 res;
2115     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2116    
2117     res = (sint64)gCPU.vr[vrA].sw[0] + (sint64)gCPU.vr[vrA].sw[1];
2118     res += (sint64)gCPU.vr[vrA].sw[2] + (sint64)gCPU.vr[vrA].sw[3];
2119    
2120     res += (sint64)VECT_W(gCPU.vr[vrB], 3);
2121    
2122     VECT_W(gCPU.vr[vrD], 3) = SATURATE_SW(res);
2123     VECT_W(gCPU.vr[vrD], 2) = 0;
2124     VECT_W(gCPU.vr[vrD], 1) = 0;
2125     VECT_W(gCPU.vr[vrD], 0) = 0;
2126     }
2127    
2128     /* vnmsubfp Vector Negative Multiply-Subtract Floating Point
2129     * v.215
2130     */
2131     void ppc_opc_vnmsubfp()
2132     {
2133     VECTOR_DEBUG;
2134     int vrD, vrA, vrB, vrC;
2135     double res;
2136     PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);
2137    
2138     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2139     res = (double)gCPU.vr[vrA].f[i] * (double)gCPU.vr[vrC].f[i];
2140    
2141     res = (double)gCPU.vr[vrB].f[i] - res;
2142    
2143     gCPU.vr[vrD].f[i] = (float)res;
2144     }
2145     }
2146    
2147     /* vavgub Vector Average Unsigned Byte
2148     * v.152
2149     */
2150     void ppc_opc_vavgub()
2151     {
2152     VECTOR_DEBUG;
2153     int vrD, vrA, vrB;
2154     uint16 res;
2155     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2156    
2157     for (int i=0; i<16; i++) {
2158     res = (uint16)gCPU.vr[vrA].b[i] +
2159     (uint16)gCPU.vr[vrB].b[i] + 1;
2160    
2161     gCPU.vr[vrD].b[i] = (res >> 1);
2162     }
2163     }
2164    
2165     /* vavguh Vector Average Unsigned Half Word
2166     * v.153
2167     */
2168     void ppc_opc_vavguh()
2169     {
2170     VECTOR_DEBUG;
2171     int vrD, vrA, vrB;
2172     uint32 res;
2173     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2174    
2175     for (int i=0; i<8; i++) {
2176     res = (uint32)gCPU.vr[vrA].h[i] +
2177     (uint32)gCPU.vr[vrB].h[i] + 1;
2178    
2179     gCPU.vr[vrD].h[i] = (res >> 1);
2180     }
2181     }
2182    
2183     /* vavguw Vector Average Unsigned Word
2184     * v.154
2185     */
2186     void ppc_opc_vavguw()
2187     {
2188     VECTOR_DEBUG;
2189     int vrD, vrA, vrB;
2190     uint64 res;
2191     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2192    
2193     for (int i=0; i<4; i++) {
2194     res = (uint64)gCPU.vr[vrA].w[i] +
2195     (uint64)gCPU.vr[vrB].w[i] + 1;
2196    
2197     gCPU.vr[vrD].w[i] = (res >> 1);
2198     }
2199     }
2200    
2201     /* vavgsb Vector Average Signed Byte
2202     * v.149
2203     */
2204     void ppc_opc_vavgsb()
2205     {
2206     VECTOR_DEBUG;
2207     int vrD, vrA, vrB;
2208     sint16 res;
2209     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2210    
2211     for (int i=0; i<16; i++) {
2212     res = (sint16)gCPU.vr[vrA].sb[i] +
2213     (sint16)gCPU.vr[vrB].sb[i] + 1;
2214    
2215     gCPU.vr[vrD].sb[i] = (res >> 1);
2216     }
2217     }
2218    
2219     /* vavgsh Vector Average Signed Half Word
2220     * v.150
2221     */
2222     void ppc_opc_vavgsh()
2223     {
2224     VECTOR_DEBUG;
2225     int vrD, vrA, vrB;
2226     sint32 res;
2227     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2228    
2229     for (int i=0; i<8; i++) {
2230     res = (sint32)gCPU.vr[vrA].sh[i] +
2231     (sint32)gCPU.vr[vrB].sh[i] + 1;
2232    
2233     gCPU.vr[vrD].sh[i] = (res >> 1);
2234     }
2235     }
2236    
2237     /* vavgsw Vector Average Signed Word
2238     * v.151
2239     */
2240     void ppc_opc_vavgsw()
2241     {
2242     VECTOR_DEBUG;
2243     int vrD, vrA, vrB;
2244     sint64 res;
2245     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2246    
2247     for (int i=0; i<4; i++) {
2248     res = (sint64)gCPU.vr[vrA].sw[i] +
2249     (sint64)gCPU.vr[vrB].sw[i] + 1;
2250    
2251     gCPU.vr[vrD].sw[i] = (res >> 1);
2252     }
2253     }
2254    
2255     /* vmaxub Vector Maximum Unsigned Byte
2256     * v.182
2257     */
2258     void ppc_opc_vmaxub()
2259     {
2260     VECTOR_DEBUG;
2261     int vrD, vrA, vrB;
2262     uint8 res;
2263     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2264    
2265     for (int i=0; i<16; i++) {
2266     res = gCPU.vr[vrA].b[i];
2267    
2268     if (res < gCPU.vr[vrB].b[i])
2269     res = gCPU.vr[vrB].b[i];
2270    
2271     gCPU.vr[vrD].b[i] = res;
2272     }
2273     }
2274    
2275     /* vmaxuh Vector Maximum Unsigned Half Word
2276     * v.183
2277     */
2278     void ppc_opc_vmaxuh()
2279     {
2280     VECTOR_DEBUG;
2281     int vrD, vrA, vrB;
2282     uint16 res;
2283     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2284    
2285     for (int i=0; i<8; i++) {
2286     res = gCPU.vr[vrA].h[i];
2287    
2288     if (res < gCPU.vr[vrB].h[i])
2289     res = gCPU.vr[vrB].h[i];
2290    
2291     gCPU.vr[vrD].h[i] = res;
2292     }
2293     }
2294    
2295     /* vmaxuw Vector Maximum Unsigned Word
2296     * v.184
2297     */
2298     void ppc_opc_vmaxuw()
2299     {
2300     VECTOR_DEBUG;
2301     int vrD, vrA, vrB;
2302     uint32 res;
2303     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2304    
2305     for (int i=0; i<4; i++) {
2306     res = gCPU.vr[vrA].w[i];
2307    
2308     if (res < gCPU.vr[vrB].w[i])
2309     res = gCPU.vr[vrB].w[i];
2310    
2311     gCPU.vr[vrD].w[i] = res;
2312     }
2313     }
2314    
2315     /* vmaxsb Vector Maximum Signed Byte
2316     * v.179
2317     */
2318     void ppc_opc_vmaxsb()
2319     {
2320     VECTOR_DEBUG;
2321     int vrD, vrA, vrB;
2322     sint8 res;
2323     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2324    
2325     for (int i=0; i<16; i++) {
2326     res = gCPU.vr[vrA].sb[i];
2327    
2328     if (res < gCPU.vr[vrB].sb[i])
2329     res = gCPU.vr[vrB].sb[i];
2330    
2331     gCPU.vr[vrD].sb[i] = res;
2332     }
2333     }
2334    
2335     /* vmaxsh Vector Maximum Signed Half Word
2336     * v.180
2337     */
2338     void ppc_opc_vmaxsh()
2339     {
2340     VECTOR_DEBUG;
2341     int vrD, vrA, vrB;
2342     sint16 res;
2343     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2344    
2345     for (int i=0; i<8; i++) {
2346     res = gCPU.vr[vrA].sh[i];
2347    
2348     if (res < gCPU.vr[vrB].sh[i])
2349     res = gCPU.vr[vrB].sh[i];
2350    
2351     gCPU.vr[vrD].sh[i] = res;
2352     }
2353     }
2354    
2355     /* vmaxsw Vector Maximum Signed Word
2356     * v.181
2357     */
2358     void ppc_opc_vmaxsw()
2359     {
2360     VECTOR_DEBUG;
2361     int vrD, vrA, vrB;
2362     sint32 res;
2363     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2364    
2365     for (int i=0; i<4; i++) {
2366     res = gCPU.vr[vrA].sw[i];
2367    
2368     if (res < gCPU.vr[vrB].sw[i])
2369     res = gCPU.vr[vrB].sw[i];
2370    
2371     gCPU.vr[vrD].sw[i] = res;
2372     }
2373     }
2374    
2375     /* vmaxfp Vector Maximum Floating Point
2376     * v.178
2377     */
2378     void ppc_opc_vmaxfp()
2379     {
2380     VECTOR_DEBUG;
2381     int vrD, vrA, vrB;
2382     float res;
2383     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2384    
2385     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2386     res = gCPU.vr[vrA].f[i];
2387    
2388     if (res < gCPU.vr[vrB].f[i])
2389     res = gCPU.vr[vrB].f[i];
2390    
2391     gCPU.vr[vrD].f[i] = res;
2392     }
2393     }
2394    
2395     /* vminub Vector Minimum Unsigned Byte
2396     * v.191
2397     */
2398     void ppc_opc_vminub()
2399     {
2400     VECTOR_DEBUG;
2401     int vrD, vrA, vrB;
2402     uint8 res;
2403     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2404    
2405     for (int i=0; i<16; i++) {
2406     res = gCPU.vr[vrA].b[i];
2407    
2408     if (res > gCPU.vr[vrB].b[i])
2409     res = gCPU.vr[vrB].b[i];
2410    
2411     gCPU.vr[vrD].b[i] = res;
2412     }
2413     }
2414    
2415     /* vminuh Vector Minimum Unsigned Half Word
2416     * v.192
2417     */
2418     void ppc_opc_vminuh()
2419     {
2420     VECTOR_DEBUG;
2421     int vrD, vrA, vrB;
2422     uint16 res;
2423     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2424    
2425     for (int i=0; i<8; i++) {
2426     res = gCPU.vr[vrA].h[i];
2427    
2428     if (res > gCPU.vr[vrB].h[i])
2429     res = gCPU.vr[vrB].h[i];
2430    
2431     gCPU.vr[vrD].h[i] = res;
2432     }
2433     }
2434    
2435     /* vminuw Vector Minimum Unsigned Word
2436     * v.193
2437     */
2438     void ppc_opc_vminuw()
2439     {
2440     VECTOR_DEBUG;
2441     int vrD, vrA, vrB;
2442     uint32 res;
2443     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2444    
2445     for (int i=0; i<4; i++) {
2446     res = gCPU.vr[vrA].w[i];
2447    
2448     if (res > gCPU.vr[vrB].w[i])
2449     res = gCPU.vr[vrB].w[i];
2450    
2451     gCPU.vr[vrD].w[i] = res;
2452     }
2453     }
2454    
2455     /* vminsb Vector Minimum Signed Byte
2456     * v.188
2457     */
2458     void ppc_opc_vminsb()
2459     {
2460     VECTOR_DEBUG;
2461     int vrD, vrA, vrB;
2462     sint8 res;
2463     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2464    
2465     for (int i=0; i<16; i++) {
2466     res = gCPU.vr[vrA].sb[i];
2467    
2468     if (res > gCPU.vr[vrB].sb[i])
2469     res = gCPU.vr[vrB].sb[i];
2470    
2471     gCPU.vr[vrD].sb[i] = res;
2472     }
2473     }
2474    
2475     /* vminsh Vector Minimum Signed Half Word
2476     * v.189
2477     */
2478     void ppc_opc_vminsh()
2479     {
2480     VECTOR_DEBUG;
2481     int vrD, vrA, vrB;
2482     sint16 res;
2483     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2484    
2485     for (int i=0; i<8; i++) {
2486     res = gCPU.vr[vrA].sh[i];
2487    
2488     if (res > gCPU.vr[vrB].sh[i])
2489     res = gCPU.vr[vrB].sh[i];
2490    
2491     gCPU.vr[vrD].sh[i] = res;
2492     }
2493     }
2494    
2495     /* vminsw Vector Minimum Signed Word
2496     * v.190
2497     */
2498     void ppc_opc_vminsw()
2499     {
2500     VECTOR_DEBUG;
2501     int vrD, vrA, vrB;
2502     sint32 res;
2503     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2504    
2505     for (int i=0; i<4; i++) {
2506     res = gCPU.vr[vrA].sw[i];
2507    
2508     if (res > gCPU.vr[vrB].sw[i])
2509     res = gCPU.vr[vrB].sw[i];
2510    
2511     gCPU.vr[vrD].sw[i] = res;
2512     }
2513     }
2514    
2515     /* vminfp Vector Minimum Floating Point
2516     * v.187
2517     */
2518     void ppc_opc_vminfp()
2519     {
2520     VECTOR_DEBUG;
2521     int vrD, vrA, vrB;
2522     float res;
2523     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2524    
2525     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2526     res = gCPU.vr[vrA].f[i];
2527    
2528     if (res > gCPU.vr[vrB].f[i])
2529     res = gCPU.vr[vrB].f[i];
2530    
2531     gCPU.vr[vrD].f[i] = res;
2532     }
2533     }
2534    
2535     /* vrfin Vector Round to Floating-Point Integer Nearest
2536     * v.231
2537     */
2538     void ppc_opc_vrfin()
2539     {
2540     VECTOR_DEBUG;
2541     int vrD, vrA, vrB;
2542     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2543     PPC_OPC_ASSERT(vrA==0);
2544    
2545     /* Documentation doesn't dictate how this instruction should
2546     * round from a middle point. With a test on a real G4, it was
2547     * found to be round to nearest, with bias to even if equidistant.
2548     *
2549     * This is covered by the function rint()
2550     */
2551     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2552     gCPU.vr[vrD].f[i] = rintf(gCPU.vr[vrB].f[i]);
2553     }
2554     }
2555    
2556     /* vrfip Vector Round to Floating-Point Integer toward Plus Infinity
2557     * v.232
2558     */
2559     void ppc_opc_vrfip()
2560     {
2561     VECTOR_DEBUG;
2562     int vrD, vrA, vrB;
2563     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2564     PPC_OPC_ASSERT(vrA==0);
2565    
2566     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2567     gCPU.vr[vrD].f[i] = ceilf(gCPU.vr[vrB].f[i]);
2568     }
2569     }
2570    
2571     /* vrfim Vector Round to Floating-Point Integer toward Minus Infinity
2572     * v.230
2573     */
2574     void ppc_opc_vrfim()
2575     {
2576     VECTOR_DEBUG;
2577     int vrD, vrA, vrB;
2578     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2579     PPC_OPC_ASSERT(vrA==0);
2580    
2581     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2582     gCPU.vr[vrD].f[i] = floorf(gCPU.vr[vrB].f[i]);
2583     }
2584     }
2585    
2586     /* vrfiz Vector Round to Floating-Point Integer toward Zero
2587     * v.233
2588     */
2589     void ppc_opc_vrfiz()
2590     {
2591     VECTOR_DEBUG;
2592     int vrD, vrA, vrB;
2593     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2594     PPC_OPC_ASSERT(vrA==0);
2595    
2596     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2597     gCPU.vr[vrD].f[i] = truncf(gCPU.vr[vrD].f[i]);
2598     }
2599     }
2600    
2601     /* vrefp Vector Reciprocal Estimate Floating Point
2602     * v.228
2603     */
2604     void ppc_opc_vrefp()
2605     {
2606     VECTOR_DEBUG;
2607     int vrD, vrA, vrB;
2608     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2609     PPC_OPC_ASSERT(vrA==0);
2610    
2611     /* This emulation generates an exact value, instead of an estimate.
2612     * This is technically within specs, but some test-suites expect the
2613     * exact estimate value returned by G4s. These anomolous failures
2614     * should be ignored.
2615     */
2616    
2617     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2618     gCPU.vr[vrD].f[i] = 1 / gCPU.vr[vrB].f[i];
2619     }
2620     }
2621    
2622     /* vrsqrtefp Vector Reciprocal Square Root Estimate Floating Point
2623     * v.237
2624     */
2625     void ppc_opc_vrsqrtefp()
2626     {
2627     VECTOR_DEBUG;
2628     int vrD, vrA, vrB;
2629     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2630     PPC_OPC_ASSERT(vrA==0);
2631    
2632     /* This emulation generates an exact value, instead of an estimate.
2633     * This is technically within specs, but some test-suites expect the
2634     * exact estimate value returned by G4s. These anomolous failures
2635     * should be ignored.
2636     */
2637    
2638     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2639     gCPU.vr[vrD].f[i] = 1 / sqrt(gCPU.vr[vrB].f[i]);
2640     }
2641     }
2642    
2643     /* vlogefp Vector Log2 Estimate Floating Point
2644     * v.175
2645     */
2646     void ppc_opc_vlogefp()
2647     {
2648     VECTOR_DEBUG;
2649     int vrD, vrA, vrB;
2650     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2651     PPC_OPC_ASSERT(vrA==0);
2652    
2653     /* This emulation generates an exact value, instead of an estimate.
2654     * This is technically within specs, but some test-suites expect the
2655     * exact estimate value returned by G4s. These anomolous failures
2656     * should be ignored.
2657     */
2658    
2659     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2660     gCPU.vr[vrD].f[i] = log2(gCPU.vr[vrB].f[i]);
2661     }
2662     }
2663    
2664     /* vexptefp Vector 2 Raised to the Exponent Estimate Floating Point
2665     * v.173
2666     */
2667     void ppc_opc_vexptefp()
2668     {
2669     VECTOR_DEBUG;
2670     int vrD, vrA, vrB;
2671     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2672     PPC_OPC_ASSERT(vrA==0);
2673    
2674     /* This emulation generates an exact value, instead of an estimate.
2675     * This is technically within specs, but some test-suites expect the
2676     * exact estimate value returned by G4s. These anomolous failures
2677     * should be ignored.
2678     */
2679    
2680     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2681     gCPU.vr[vrD].f[i] = exp2(gCPU.vr[vrB].f[i]);
2682     }
2683     }
2684    
2685     /* vcfux Vector Convert from Unsigned Fixed-Point Word
2686     * v.156
2687     */
2688     void ppc_opc_vcfux()
2689     {
2690     VECTOR_DEBUG;
2691     int vrD, vrB;
2692     uint32 uimm;
2693     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
2694    
2695     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2696     gCPU.vr[vrD].f[i] = ((float)gCPU.vr[vrB].w[i]) / (1 << uimm);
2697     }
2698     }
2699    
2700     /* vcfsx Vector Convert from Signed Fixed-Point Word
2701     * v.155
2702     */
2703     void ppc_opc_vcfsx()
2704     {
2705     VECTOR_DEBUG;
2706     int vrD, vrB;
2707     uint32 uimm;
2708     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
2709    
2710     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2711     gCPU.vr[vrD].f[i] = ((float)gCPU.vr[vrB].sw[i]) / (1 << uimm);
2712     }
2713     }
2714    
2715     /* vctsxs Vector Convert To Signed Fixed-Point Word Saturate
2716     * v.171
2717     */
2718     void ppc_opc_vctsxs()
2719     {
2720     VECTOR_DEBUG;
2721     int vrD, vrB;
2722     uint32 uimm;
2723     float ftmp;
2724     sint32 tmp;
2725     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
2726    
2727     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2728     ftmp = gCPU.vr[vrB].f[i] * (float)(1 << uimm);
2729     ftmp = truncf(ftmp);
2730    
2731     tmp = (sint32)ftmp;
2732    
2733     if (ftmp > 2147483647.0) {
2734     tmp = 2147483647; // 0x7fffffff
2735     gCPU.vscr |= VSCR_SAT;
2736     } else if (ftmp < -2147483648.0) {
2737     tmp = -2147483648LL; // 0x80000000
2738     gCPU.vscr |= VSCR_SAT;
2739     }
2740    
2741     gCPU.vr[vrD].sw[i] = tmp;
2742     }
2743     }
2744    
2745     /* vctuxs Vector Convert to Unsigned Fixed-Point Word Saturate
2746     * v.172
2747     */
2748     void ppc_opc_vctuxs()
2749     {
2750     VECTOR_DEBUG;
2751     int vrD, vrB;
2752     uint32 tmp, uimm;
2753     float ftmp;
2754     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB);
2755    
2756     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2757     ftmp = gCPU.vr[vrB].f[i] * (float)(1 << uimm);
2758     ftmp = truncf(ftmp);
2759    
2760     tmp = (uint32)ftmp;
2761    
2762     if (ftmp > 4294967295.0) {
2763     tmp = 0xffffffff;
2764     gCPU.vscr |= VSCR_SAT;
2765     } else if (ftmp < 0) {
2766     tmp = 0;
2767     gCPU.vscr |= VSCR_SAT;
2768     }
2769    
2770     gCPU.vr[vrD].w[i] = tmp;
2771     }
2772     }
2773    
2774     /* vand Vector Logical AND
2775     * v.147
2776     */
2777     void ppc_opc_vand()
2778     {
2779     VECTOR_DEBUG;
2780     int vrD, vrA, vrB;
2781     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2782    
2783     gCPU.vr[vrD].d[0] = gCPU.vr[vrA].d[0] & gCPU.vr[vrB].d[0];
2784     gCPU.vr[vrD].d[1] = gCPU.vr[vrA].d[1] & gCPU.vr[vrB].d[1];
2785     }
2786    
2787     /* vandc Vector Logical AND with Complement
2788     * v.148
2789     */
2790     void ppc_opc_vandc()
2791     {
2792     VECTOR_DEBUG;
2793     int vrD, vrA, vrB;
2794     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2795    
2796     gCPU.vr[vrD].d[0] = gCPU.vr[vrA].d[0] & ~gCPU.vr[vrB].d[0];
2797     gCPU.vr[vrD].d[1] = gCPU.vr[vrA].d[1] & ~gCPU.vr[vrB].d[1];
2798     }
2799    
2800     /* vor Vector Logical OR
2801     * v.217
2802     */
2803     void ppc_opc_vor()
2804     {
2805     VECTOR_DEBUG_COMMON;
2806     int vrD, vrA, vrB;
2807     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2808    
2809     gCPU.vr[vrD].d[0] = gCPU.vr[vrA].d[0] | gCPU.vr[vrB].d[0];
2810     gCPU.vr[vrD].d[1] = gCPU.vr[vrA].d[1] | gCPU.vr[vrB].d[1];
2811     }
2812    
2813     /* vnor Vector Logical NOR
2814     * v.216
2815     */
2816     void ppc_opc_vnor()
2817     {
2818     VECTOR_DEBUG;
2819     int vrD, vrA, vrB;
2820     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2821    
2822     gCPU.vr[vrD].d[0] = ~(gCPU.vr[vrA].d[0] | gCPU.vr[vrB].d[0]);
2823     gCPU.vr[vrD].d[1] = ~(gCPU.vr[vrA].d[1] | gCPU.vr[vrB].d[1]);
2824     }
2825    
2826     /* vxor Vector Logical XOR
2827     * v.282
2828     */
2829     void ppc_opc_vxor()
2830     {
2831     VECTOR_DEBUG_COMMON;
2832     int vrD, vrA, vrB;
2833     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2834    
2835     gCPU.vr[vrD].d[0] = gCPU.vr[vrA].d[0] ^ gCPU.vr[vrB].d[0];
2836     gCPU.vr[vrD].d[1] = gCPU.vr[vrA].d[1] ^ gCPU.vr[vrB].d[1];
2837     }
2838    
2839     #define CR_CR6 (0x00f0)
2840     #define CR_CR6_EQ (1<<7)
2841     #define CR_CR6_NE_SOME (1<<6)
2842     #define CR_CR6_NE (1<<5)
2843     #define CR_CR6_EQ_SOME (1<<4)
2844    
2845     /* vcmpequbx Vector Compare Equal-to Unsigned Byte
2846     * v.160
2847     */
2848     void ppc_opc_vcmpequbx()
2849     {
2850     VECTOR_DEBUG;
2851     int vrD, vrA, vrB;
2852     int tf=CR_CR6_EQ | CR_CR6_NE;
2853     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2854    
2855     for (int i=0; i<16; i++) {
2856     if (gCPU.vr[vrA].b[i] == gCPU.vr[vrB].b[i]) {
2857     gCPU.vr[vrD].b[i] = 0xff;
2858     tf &= ~CR_CR6_NE;
2859     tf |= CR_CR6_EQ_SOME;
2860     } else {
2861     gCPU.vr[vrD].b[i] = 0;
2862     tf &= ~CR_CR6_EQ;
2863     tf |= CR_CR6_NE_SOME;
2864     }
2865     }
2866    
2867     if (PPC_OPC_VRc & gCPU.current_opc) {
2868     gCPU.cr &= ~CR_CR6;
2869     gCPU.cr |= tf;
2870     }
2871     }
2872    
2873     /* vcmpequhx Vector Compare Equal-to Unsigned Half Word
2874     * v.161
2875     */
2876     void ppc_opc_vcmpequhx()
2877     {
2878     VECTOR_DEBUG;
2879     int vrD, vrA, vrB;
2880     int tf=CR_CR6_EQ | CR_CR6_NE;
2881     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2882    
2883     for (int i=0; i<8; i++) {
2884     if (gCPU.vr[vrA].h[i] == gCPU.vr[vrB].h[i]) {
2885     gCPU.vr[vrD].h[i] = 0xffff;
2886     tf &= ~CR_CR6_NE;
2887     tf |= CR_CR6_EQ_SOME;
2888     } else {
2889     gCPU.vr[vrD].h[i] = 0;
2890     tf &= ~CR_CR6_EQ;
2891     tf |= CR_CR6_NE_SOME;
2892     }
2893     }
2894    
2895     if (PPC_OPC_VRc & gCPU.current_opc) {
2896     gCPU.cr &= ~CR_CR6;
2897     gCPU.cr |= tf;
2898     }
2899     }
2900    
2901     /* vcmpequwx Vector Compare Equal-to Unsigned Word
2902     * v.162
2903     */
2904     void ppc_opc_vcmpequwx()
2905     {
2906     VECTOR_DEBUG;
2907     int vrD, vrA, vrB;
2908     int tf=CR_CR6_EQ | CR_CR6_NE;
2909     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2910    
2911     for (int i=0; i<4; i++) {
2912     if (gCPU.vr[vrA].w[i] == gCPU.vr[vrB].w[i]) {
2913     gCPU.vr[vrD].w[i] = 0xffffffff;
2914     tf &= ~CR_CR6_NE;
2915     tf |= CR_CR6_EQ_SOME;
2916     } else {
2917     gCPU.vr[vrD].w[i] = 0;
2918     tf &= ~CR_CR6_EQ;
2919     tf |= CR_CR6_NE_SOME;
2920     }
2921     }
2922    
2923     if (PPC_OPC_VRc & gCPU.current_opc) {
2924     gCPU.cr &= ~CR_CR6;
2925     gCPU.cr |= tf;
2926     }
2927     }
2928    
2929     /* vcmpeqfpx Vector Compare Equal-to-Floating Point
2930     * v.159
2931     */
2932     void ppc_opc_vcmpeqfpx()
2933     {
2934     VECTOR_DEBUG;
2935     int vrD, vrA, vrB;
2936     int tf=CR_CR6_EQ | CR_CR6_NE;
2937     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2938    
2939     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
2940     if (gCPU.vr[vrA].f[i] == gCPU.vr[vrB].f[i]) {
2941     gCPU.vr[vrD].w[i] = 0xffffffff;
2942     tf &= ~CR_CR6_NE;
2943     tf |= CR_CR6_EQ_SOME;
2944     } else {
2945     gCPU.vr[vrD].w[i] = 0;
2946     tf &= ~CR_CR6_EQ;
2947     tf |= CR_CR6_NE_SOME;
2948     }
2949     }
2950    
2951     if (PPC_OPC_VRc & gCPU.current_opc) {
2952     gCPU.cr &= ~CR_CR6;
2953     gCPU.cr |= tf;
2954     }
2955     }
2956    
2957     /* vcmpgtubx Vector Compare Greater-Than Unsigned Byte
2958     * v.168
2959     */
2960     void ppc_opc_vcmpgtubx()
2961     {
2962     VECTOR_DEBUG;
2963     int vrD, vrA, vrB;
2964     int tf=CR_CR6_EQ | CR_CR6_NE;
2965     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2966    
2967     for (int i=0; i<16; i++) {
2968     if (gCPU.vr[vrA].b[i] > gCPU.vr[vrB].b[i]) {
2969     gCPU.vr[vrD].b[i] = 0xff;
2970     tf &= ~CR_CR6_NE;
2971     tf |= CR_CR6_EQ_SOME;
2972     } else {
2973     gCPU.vr[vrD].b[i] = 0;
2974     tf &= ~CR_CR6_EQ;
2975     tf |= CR_CR6_NE_SOME;
2976     }
2977     }
2978    
2979     if (PPC_OPC_VRc & gCPU.current_opc) {
2980     gCPU.cr &= ~CR_CR6;
2981     gCPU.cr |= tf;
2982     }
2983     }
2984    
2985     /* vcmpgtsbx Vector Compare Greater-Than Signed Byte
2986     * v.165
2987     */
2988     void ppc_opc_vcmpgtsbx()
2989     {
2990     VECTOR_DEBUG;
2991     int vrD, vrA, vrB;
2992     int tf=CR_CR6_EQ | CR_CR6_NE;
2993     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
2994    
2995     for (int i=0; i<16; i++) {
2996     if (gCPU.vr[vrA].sb[i] > gCPU.vr[vrB].sb[i]) {
2997     gCPU.vr[vrD].b[i] = 0xff;
2998     tf &= ~CR_CR6_NE;
2999     tf |= CR_CR6_EQ_SOME;
3000     } else {
3001     gCPU.vr[vrD].b[i] = 0;
3002     tf &= ~CR_CR6_EQ;
3003     tf |= CR_CR6_NE_SOME;
3004     }
3005     }
3006    
3007     if (PPC_OPC_VRc & gCPU.current_opc) {
3008     gCPU.cr &= ~CR_CR6;
3009     gCPU.cr |= tf;
3010     }
3011     }
3012    
3013     /* vcmpgtuhx Vector Compare Greater-Than Unsigned Half Word
3014     * v.169
3015     */
3016     void ppc_opc_vcmpgtuhx()
3017     {
3018     VECTOR_DEBUG;
3019     int vrD, vrA, vrB;
3020     int tf=CR_CR6_EQ | CR_CR6_NE;
3021     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
3022    
3023     for (int i=0; i<8; i++) {
3024     if (gCPU.vr[vrA].h[i] > gCPU.vr[vrB].h[i]) {
3025     gCPU.vr[vrD].h[i] = 0xffff;
3026     tf &= ~CR_CR6_NE;
3027     tf |= CR_CR6_EQ_SOME;
3028     } else {
3029     gCPU.vr[vrD].h[i] = 0;
3030     tf &= ~CR_CR6_EQ;
3031     tf |= CR_CR6_NE_SOME;
3032     }
3033     }
3034    
3035     if (PPC_OPC_VRc & gCPU.current_opc) {
3036     gCPU.cr &= ~CR_CR6;
3037     gCPU.cr |= tf;
3038     }
3039     }
3040    
3041     /* vcmpgtshx Vector Compare Greater-Than Signed Half Word
3042     * v.166
3043     */
3044     void ppc_opc_vcmpgtshx()
3045     {
3046     VECTOR_DEBUG;
3047     int vrD, vrA, vrB;
3048     int tf=CR_CR6_EQ | CR_CR6_NE;
3049     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
3050    
3051     for (int i=0; i<8; i++) {
3052     if (gCPU.vr[vrA].sh[i] > gCPU.vr[vrB].sh[i]) {
3053     gCPU.vr[vrD].h[i] = 0xffff;
3054     tf &= ~CR_CR6_NE;
3055     tf |= CR_CR6_EQ_SOME;
3056     } else {
3057     gCPU.vr[vrD].h[i] = 0;
3058     tf &= ~CR_CR6_EQ;
3059     tf |= CR_CR6_NE_SOME;
3060     }
3061     }
3062    
3063     if (PPC_OPC_VRc & gCPU.current_opc) {
3064     gCPU.cr &= ~CR_CR6;
3065     gCPU.cr |= tf;
3066     }
3067     }
3068    
3069     /* vcmpgtuwx Vector Compare Greater-Than Unsigned Word
3070     * v.170
3071     */
3072     void ppc_opc_vcmpgtuwx()
3073     {
3074     VECTOR_DEBUG;
3075     int vrD, vrA, vrB;
3076     int tf=CR_CR6_EQ | CR_CR6_NE;
3077     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
3078    
3079     for (int i=0; i<4; i++) {
3080     if (gCPU.vr[vrA].w[i] > gCPU.vr[vrB].w[i]) {
3081     gCPU.vr[vrD].w[i] = 0xffffffff;
3082     tf &= ~CR_CR6_NE;
3083     tf |= CR_CR6_EQ_SOME;
3084     } else {
3085     gCPU.vr[vrD].w[i] = 0;
3086     tf &= ~CR_CR6_EQ;
3087     tf |= CR_CR6_NE_SOME;
3088     }
3089     }
3090    
3091     if (PPC_OPC_VRc & gCPU.current_opc) {
3092     gCPU.cr &= ~CR_CR6;
3093     gCPU.cr |= tf;
3094     }
3095     }
3096    
3097     /* vcmpgtswx Vector Compare Greater-Than Signed Word
3098     * v.167
3099     */
3100     void ppc_opc_vcmpgtswx()
3101     {
3102     VECTOR_DEBUG;
3103     int vrD, vrA, vrB;
3104     int tf=CR_CR6_EQ | CR_CR6_NE;
3105     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
3106    
3107     for (int i=0; i<4; i++) {
3108     if (gCPU.vr[vrA].sw[i] > gCPU.vr[vrB].sw[i]) {
3109     gCPU.vr[vrD].w[i] = 0xffffffff;
3110     tf &= ~CR_CR6_NE;
3111     tf |= CR_CR6_EQ_SOME;
3112     } else {
3113     gCPU.vr[vrD].w[i] = 0;
3114     tf &= ~CR_CR6_EQ;
3115     tf |= CR_CR6_NE_SOME;
3116     }
3117     }
3118    
3119     if (PPC_OPC_VRc & gCPU.current_opc) {
3120     gCPU.cr &= ~CR_CR6;
3121     gCPU.cr |= tf;
3122     }
3123     }
3124    
3125     /* vcmpgtfpx Vector Compare Greater-Than Floating-Point
3126     * v.164
3127     */
3128     void ppc_opc_vcmpgtfpx()
3129     {
3130     VECTOR_DEBUG;
3131     int vrD, vrA, vrB;
3132     int tf=CR_CR6_EQ | CR_CR6_NE;
3133     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
3134    
3135     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
3136     if (gCPU.vr[vrA].f[i] > gCPU.vr[vrB].f[i]) {
3137     gCPU.vr[vrD].w[i] = 0xffffffff;
3138     tf &= ~CR_CR6_NE;
3139     tf |= CR_CR6_EQ_SOME;
3140     } else {
3141     gCPU.vr[vrD].w[i] = 0;
3142     tf &= ~CR_CR6_EQ;
3143     tf |= CR_CR6_NE_SOME;
3144     }
3145     }
3146    
3147     if (PPC_OPC_VRc & gCPU.current_opc) {
3148     gCPU.cr &= ~CR_CR6;
3149     gCPU.cr |= tf;
3150     }
3151     }
3152    
3153     /* vcmpgefpx Vector Compare Greater-Than-or-Equal-to Floating Point
3154     * v.163
3155     */
3156     void ppc_opc_vcmpgefpx()
3157     {
3158     VECTOR_DEBUG;
3159     int vrD, vrA, vrB;
3160     int tf=CR_CR6_EQ | CR_CR6_NE;
3161     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
3162    
3163     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
3164     if (gCPU.vr[vrA].f[i] >= gCPU.vr[vrB].f[i]) {
3165     gCPU.vr[vrD].w[i] = 0xffffffff;
3166     tf &= ~CR_CR6_NE;
3167     tf |= CR_CR6_EQ_SOME;
3168     } else {
3169     gCPU.vr[vrD].w[i] = 0;
3170     tf &= ~CR_CR6_EQ;
3171     tf |= CR_CR6_NE_SOME;
3172     }
3173     }
3174    
3175     if (PPC_OPC_VRc & gCPU.current_opc) {
3176     gCPU.cr &= ~CR_CR6;
3177     gCPU.cr |= tf;
3178     }
3179     }
3180    
3181     /* vcmpbfpx Vector Compare Bounds Floating Point
3182     * v.157
3183     */
3184     void ppc_opc_vcmpbfpx()
3185     {
3186     VECTOR_DEBUG;
3187     int vrD, vrA, vrB;
3188     int le, ge;
3189     int ib=CR_CR6_NE;
3190     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);
3191    
3192     for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP
3193     le = (gCPU.vr[vrA].f[i] <= gCPU.vr[vrB].f[i]) ? 0 : 0x80000000;
3194     ge = (gCPU.vr[vrA].f[i] >= -gCPU.vr[vrB].f[i]) ? 0 : 0x40000000;
3195    
3196     gCPU.vr[vrD].w[i] = le | ge;
3197     if (le | ge) {
3198     ib = 0;
3199     }
3200     }
3201    
3202     if (PPC_OPC_VRc & gCPU.current_opc) {
3203     gCPU.cr &= ~CR_CR6;
3204     gCPU.cr |= ib;
3205     }
3206     }

  ViewVC Help
Powered by ViewVC 1.1.26