/[pearpc]/src/cpu/cpu_jitc_x86/ppc_mmu.cc
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /src/cpu/cpu_jitc_x86/ppc_mmu.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations)
Wed Sep 5 17:11:21 2007 UTC (16 years, 7 months ago) by dpavlin
File size: 104660 byte(s)
import upstream CVS
1 dpavlin 1 /*
2     * PearPC
3     * ppc_mmu.cc
4     *
5     * Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net)
6     * Copyright (C) 2004 Daniel Foesch (dfoesch@cs.nmsu.edu)
7     *
8     * This program is free software; you can redistribute it and/or modify
9     * it under the terms of the GNU General Public License version 2 as
10     * published by the Free Software Foundation.
11     *
12     * This program is distributed in the hope that it will be useful,
13     * but WITHOUT ANY WARRANTY; without even the implied warranty of
14     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15     * GNU General Public License for more details.
16     *
17     * You should have received a copy of the GNU General Public License
18     * along with this program; if not, write to the Free Software
19     * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20     */
21    
22     #include <cstdlib>
23     #include <cstring>
24     #include "tools/snprintf.h"
25     #include "debug/tracers.h"
26     #include "io/prom/prom.h"
27     #include "io/io.h"
28     #include "ppc_cpu.h"
29     #include "ppc_fpu.h"
30     #include "ppc_vec.h"
31     #include "ppc_mmu.h"
32     #include "ppc_exc.h"
33     #include "ppc_tools.h"
34    
35     #include "x86asm.h"
36     #include "jitc_asm.h"
37    
38     byte *gMemory = NULL;
39     uint32 gMemorySize;
40    
41     #undef TLB
42    
43     static int ppc_pte_protection[] = {
44     // read(0)/write(1) key pp
45    
46     // read
47     1, // r/w
48     1, // r/w
49     1, // r/w
50     1, // r
51     0, // -
52     1, // r
53     1, // r/w
54     1, // r
55    
56     // write
57     1, // r/w
58     1, // r/w
59     1, // r/w
60     0, // r
61     0, // -
62     0, // r
63     1, // r/w
64     0, // r
65     };
66    
67     int FASTCALL ppc_effective_to_physical(uint32 addr, int flags, uint32 &result)
68     {
69     if (flags & PPC_MMU_CODE) {
70     if (!(gCPU.msr & MSR_IR)) {
71     result = addr;
72     return PPC_MMU_OK;
73     }
74     /*
75     * BAT translation .329
76     */
77     for (int i=0; i<4; i++) {
78     if ((addr & gCPU.ibat_bl[i]) == gCPU.ibat_bepi[i]) {
79     // bat applies to this address
80     if (((gCPU.ibatu[i] & BATU_Vs) && !(gCPU.msr & MSR_PR))
81     || ((gCPU.ibatu[i] & BATU_Vp) && (gCPU.msr & MSR_PR))) {
82     // bat entry valid
83     addr &= gCPU.ibat_nbl[i];
84     addr |= gCPU.ibat_brpn[i];
85     result = addr;
86     // FIXME: check access rights
87     return PPC_MMU_OK;
88     }
89     }
90     }
91     } else {
92     if (!(gCPU.msr & MSR_DR)) {
93     result = addr;
94     return PPC_MMU_OK;
95     }
96     /*
97     * BAT translation .329
98     */
99     for (int i=0; i<4; i++) {
100     if ((addr & gCPU.dbat_bl[i]) == gCPU.dbat_bepi[i]) {
101     // bat applies to this address
102     if (((gCPU.dbatu[i] & BATU_Vs) && !(gCPU.msr & MSR_PR))
103     || ((gCPU.dbatu[i] & BATU_Vp) && (gCPU.msr & MSR_PR))) {
104     // bat entry valid
105     addr &= gCPU.dbat_nbl[i];
106     addr |= gCPU.dbat_brpn[i];
107     result = addr;
108     // FIXME: check access rights
109     return PPC_MMU_OK;
110     }
111     }
112     }
113     }
114    
115     /*
116     * Address translation with segment register
117     */
118     uint32 sr = gCPU.sr[EA_SR(addr)];
119    
120     if (sr & SR_T) {
121     // woea
122     // FIXME: implement me
123     PPC_MMU_ERR("sr & T\n");
124     } else {
125     #ifdef TLB
126     for (int i=0; i<4; i++) {
127     if ((addr & ~0xfff) == (gCPU.tlb_va[i])) {
128     gCPU.tlb_last = i;
129     // ht_printf("TLB: %d: %08x -> %08x\n", i, addr, gCPU.tlb_pa[i] | (addr & 0xfff));
130     result = gCPU.tlb_pa[i] | (addr & 0xfff);
131     return PPC_MMU_OK;
132     }
133     }
134     #endif
135     // page address translation
136     if ((flags & PPC_MMU_CODE) && (sr & SR_N)) {
137     // segment isnt executable
138     if (!(flags & PPC_MMU_NO_EXC)) {
139     ppc_exception(PPC_EXC_ISI, PPC_EXC_SRR1_GUARD);
140     return PPC_MMU_EXC;
141     }
142     return PPC_MMU_FATAL;
143     }
144     uint32 offset = EA_Offset(addr); // 12 bit
145     uint32 page_index = EA_PageIndex(addr); // 16 bit
146     uint32 VSID = SR_VSID(sr); // 24 bit
147     uint32 api = EA_API(addr); // 6 bit (part of page_index)
148     // VSID.page_index = Virtual Page Number (VPN)
149    
150     // Hashfunction no 1 "xor" .360
151     uint32 hash1 = (VSID ^ page_index);
152     uint32 pteg_addr = ((hash1 & gCPU.pagetable_hashmask)<<6) | gCPU.pagetable_base;
153     for (int i=0; i<8; i++) {
154     uint32 pte;
155     if (ppc_read_physical_word(pteg_addr, pte)) {
156     if (!(flags & PPC_MMU_NO_EXC)) {
157     PPC_MMU_ERR("read physical in address translate failed\n");
158     return PPC_MMU_EXC;
159     }
160     return PPC_MMU_FATAL;
161     }
162     if ((pte & PTE1_V) && (!(pte & PTE1_H))) {
163     if (VSID == PTE1_VSID(pte) && (api == PTE1_API(pte))) {
164     // page found
165     if (ppc_read_physical_word(pteg_addr+4, pte)) {
166     if (!(flags & PPC_MMU_NO_EXC)) {
167     PPC_MMU_ERR("read physical in address translate failed\n");
168     return PPC_MMU_EXC;
169     }
170     return PPC_MMU_FATAL;
171     }
172     // check accessmode .346
173     int key;
174     if (gCPU.msr & MSR_PR) {
175     key = (sr & SR_Kp) ? 4 : 0;
176     } else {
177     key = (sr & SR_Ks) ? 4 : 0;
178     }
179     if (!ppc_pte_protection[((flags&PPC_MMU_WRITE)?8:0) + key + PTE2_PP(pte)]) {
180     if (!(flags & PPC_MMU_NO_EXC)) {
181     if (flags & PPC_MMU_CODE) {
182     PPC_MMU_WARN("correct impl? code + read protection\n");
183     ppc_exception(PPC_EXC_ISI, PPC_EXC_SRR1_PROT, addr);
184     return PPC_MMU_EXC;
185     } else {
186     if (flags & PPC_MMU_WRITE) {
187     ppc_exception(PPC_EXC_DSI, PPC_EXC_DSISR_PROT | PPC_EXC_DSISR_STORE, addr);
188     } else {
189     ppc_exception(PPC_EXC_DSI, PPC_EXC_DSISR_PROT, addr);
190     }
191     return PPC_MMU_EXC;
192     }
193     }
194     return PPC_MMU_FATAL;
195     }
196     // ok..
197     uint32 pap = PTE2_RPN(pte);
198     result = pap | offset;
199     #ifdef TLB
200     gCPU.tlb_last++;
201     gCPU.tlb_last &= 3;
202     gCPU.tlb_pa[gCPU.tlb_last] = pap;
203     gCPU.tlb_va[gCPU.tlb_last] = addr & ~0xfff;
204     // ht_printf("TLB: STORE %d: %08x -> %08x\n", gCPU.tlb_last, addr, pap);
205     #endif
206     // update access bits
207     // FIXME: is someone actually using this?
208     if (flags & PPC_MMU_WRITE) {
209     pte |= PTE2_C | PTE2_R;
210     } else {
211     pte |= PTE2_R;
212     }
213     ppc_write_physical_word(pteg_addr+4, pte);
214     return PPC_MMU_OK;
215     }
216     }
217     pteg_addr+=8;
218     }
219    
220     // Hashfunction no 2 "not" .360
221     hash1 = ~hash1;
222     pteg_addr = ((hash1 & gCPU.pagetable_hashmask)<<6) | gCPU.pagetable_base;
223     for (int i=0; i<8; i++) {
224     uint32 pte;
225     if (ppc_read_physical_word(pteg_addr, pte)) {
226     if (!(flags & PPC_MMU_NO_EXC)) {
227     PPC_MMU_ERR("read physical in address translate failed\n");
228     return PPC_MMU_EXC;
229     }
230     return PPC_MMU_FATAL;
231     }
232     if ((pte & PTE1_V) && (pte & PTE1_H)) {
233     if (VSID == PTE1_VSID(pte) && (api == PTE1_API(pte))) {
234     // page found
235     if (ppc_read_physical_word(pteg_addr+4, pte)) {
236     if (!(flags & PPC_MMU_NO_EXC)) {
237     PPC_MMU_ERR("read physical in address translate failed\n");
238     return PPC_MMU_EXC;
239     }
240     return PPC_MMU_FATAL;
241     }
242     // check accessmode
243     int key;
244     if (gCPU.msr & MSR_PR) {
245     key = (sr & SR_Kp) ? 4 : 0;
246     } else {
247     key = (sr & SR_Ks) ? 4 : 0;
248     }
249     if (!ppc_pte_protection[((flags&PPC_MMU_WRITE)?8:0) + key + PTE2_PP(pte)]) {
250     if (!(flags & PPC_MMU_NO_EXC)) {
251     if (flags & PPC_MMU_CODE) {
252     PPC_MMU_WARN("correct impl? code + read protection\n");
253     ppc_exception(PPC_EXC_ISI, PPC_EXC_SRR1_PROT, addr);
254     return PPC_MMU_EXC;
255     } else {
256     if (flags & PPC_MMU_WRITE) {
257     ppc_exception(PPC_EXC_DSI, PPC_EXC_DSISR_PROT | PPC_EXC_DSISR_STORE, addr);
258     } else {
259     ppc_exception(PPC_EXC_DSI, PPC_EXC_DSISR_PROT, addr);
260     }
261     return PPC_MMU_EXC;
262     }
263     }
264     return PPC_MMU_FATAL;
265     }
266     // ok..
267     result = PTE2_RPN(pte) | offset;
268    
269     // update access bits
270     // FIXME: is someone actually using this?
271     if (flags & PPC_MMU_WRITE) {
272     pte |= PTE2_C | PTE2_R;
273     } else {
274     pte |= PTE2_R;
275     }
276     ppc_write_physical_word(pteg_addr+4, pte);
277     // PPC_MMU_WARN("hash function 2 used!\n");
278     // gSinglestep = true;
279     return PPC_MMU_OK;
280     }
281     }
282     pteg_addr+=8;
283     }
284     }
285     // page fault
286     if (!(flags & PPC_MMU_NO_EXC)) {
287     if (flags & PPC_MMU_CODE) {
288     ppc_exception(PPC_EXC_ISI, PPC_EXC_SRR1_PAGE);
289     } else {
290     if (flags & PPC_MMU_WRITE) {
291     ppc_exception(PPC_EXC_DSI, PPC_EXC_DSISR_PAGE | PPC_EXC_DSISR_STORE, addr);
292     } else {
293     ppc_exception(PPC_EXC_DSI, PPC_EXC_DSISR_PAGE, addr);
294     }
295     }
296     return PPC_MMU_EXC;
297     }
298     return PPC_MMU_FATAL;
299     }
300    
301     int FASTCALL ppc_effective_to_physical_vm(uint32 addr, int flags, uint32 &result)
302     {
303     if (!(gCPU.msr & MSR_DR)) {
304     result = addr;
305     return PPC_MMU_READ | PPC_MMU_WRITE;
306     }
307     /*
308     * BAT translation .329
309     */
310     for (int i=0; i<4; i++) {
311     if ((addr & gCPU.dbat_bl[i]) == gCPU.dbat_bepi[i]) {
312     // bat applies to this address
313     if (((gCPU.dbatu[i] & BATU_Vs) && !(gCPU.msr & MSR_PR))
314     || ((gCPU.dbatu[i] & BATU_Vp) && (gCPU.msr & MSR_PR))) {
315     // bat entry valid
316     addr &= gCPU.dbat_nbl[i];
317     addr |= gCPU.dbat_brpn[i];
318     result = addr;
319     // FIXME: check access rights
320     return PPC_MMU_OK;
321     }
322     }
323     }
324    
325     /*
326     * Address translation with segment register
327     */
328     uint32 sr = gCPU.sr[EA_SR(addr)];
329    
330     if (sr & SR_T) {
331     // woea
332     // FIXME: implement me
333     PPC_MMU_ERR("sr & T\n");
334     } else {
335     // page address translation
336     uint32 offset = EA_Offset(addr); // 12 bit
337     uint32 page_index = EA_PageIndex(addr); // 16 bit
338     uint32 VSID = SR_VSID(sr); // 24 bit
339     uint32 api = EA_API(addr); // 6 bit (part of page_index)
340     // VSID.page_index = Virtual Page Number (VPN)
341    
342     // Hashfunction no 1 "xor" .360
343     uint32 hash1 = (VSID ^ page_index);
344     uint32 pteg_addr = ((hash1 & gCPU.pagetable_hashmask)<<6) | gCPU.pagetable_base;
345     for (int i=0; i<8; i++) {
346     uint32 pte;
347     if (ppc_read_physical_word(pteg_addr, pte)) {
348     return PPC_MMU_FATAL;
349     }
350     if ((pte & PTE1_V) && (!(pte & PTE1_H))) {
351     if (VSID == PTE1_VSID(pte) && (api == PTE1_API(pte))) {
352     // page found
353     if (ppc_read_physical_word(pteg_addr+4, pte)) {
354     return 0;
355     }
356     // check accessmode .346
357     int key;
358     if (gCPU.msr & MSR_PR) {
359     key = (sr & SR_Kp) ? 4 : 0;
360     } else {
361     key = (sr & SR_Ks) ? 4 : 0;
362     }
363     int ret = PPC_MMU_WRITE | PPC_MMU_READ;
364     if (!ppc_pte_protection[8 + key + PTE2_PP(pte)]) {
365     if (!(flags & PPC_MMU_NO_EXC)) {
366     if (flags & PPC_MMU_WRITE) {
367     gCPU.dsisr = PPC_EXC_DSISR_PROT | PPC_EXC_DSISR_STORE;
368     }
369     }
370     ret &= ~PPC_MMU_WRITE;
371     }
372     if (!ppc_pte_protection[key + PTE2_PP(pte)]) {
373     if (!(flags & PPC_MMU_NO_EXC)) {
374     if (!(flags & PPC_MMU_WRITE)) {
375     gCPU.dsisr = PPC_EXC_DSISR_PROT;
376     }
377     }
378     return PPC_MMU_OK;
379     }
380     // ok..
381     uint32 pap = PTE2_RPN(pte);
382     result = pap | offset;
383     // update access bits
384     if (ret & PPC_MMU_WRITE) {
385     pte |= PTE2_C | PTE2_R;
386     } else {
387     pte |= PTE2_R;
388     }
389     ppc_write_physical_word(pteg_addr+4, pte);
390     return ret;
391     }
392     }
393     pteg_addr+=8;
394     }
395    
396     // Hashfunction no 2 "not" .360
397     hash1 = ~hash1;
398     pteg_addr = ((hash1 & gCPU.pagetable_hashmask)<<6) | gCPU.pagetable_base;
399     for (int i=0; i<8; i++) {
400     uint32 pte;
401     if (ppc_read_physical_word(pteg_addr, pte)) {
402     return PPC_MMU_FATAL;
403     }
404     if ((pte & PTE1_V) && (pte & PTE1_H)) {
405     if (VSID == PTE1_VSID(pte) && (api == PTE1_API(pte))) {
406     // page found
407     if (ppc_read_physical_word(pteg_addr+4, pte)) {
408     return 0;
409     }
410     // check accessmode
411     int key;
412     if (gCPU.msr & MSR_PR) {
413     key = (sr & SR_Kp) ? 4 : 0;
414     } else {
415     key = (sr & SR_Ks) ? 4 : 0;
416     }
417     int ret = PPC_MMU_WRITE | PPC_MMU_READ;
418     if (!ppc_pte_protection[8 + key + PTE2_PP(pte)]) {
419     if (!(flags & PPC_MMU_NO_EXC)) {
420     if (flags & PPC_MMU_WRITE) {
421     gCPU.dsisr = PPC_EXC_DSISR_PROT | PPC_EXC_DSISR_STORE;
422     }
423     }
424     ret &= ~PPC_MMU_WRITE;
425     }
426     if (!ppc_pte_protection[key + PTE2_PP(pte)]) {
427     if (!(flags & PPC_MMU_NO_EXC)) {
428     if (!(flags & PPC_MMU_WRITE)) {
429     gCPU.dsisr = PPC_EXC_DSISR_PROT;
430     }
431     }
432     return PPC_MMU_OK;
433     }
434     // ok..
435     result = PTE2_RPN(pte) | offset;
436    
437     // update access bits
438     if (ret & PPC_MMU_WRITE) {
439     pte |= PTE2_C | PTE2_R;
440     } else {
441     pte |= PTE2_R;
442     }
443     ppc_write_physical_word(pteg_addr+4, pte);
444     return ret;
445     }
446     }
447     pteg_addr+=8;
448     }
449     }
450     // page fault
451     if (!(flags & PPC_MMU_NO_EXC)) {
452     if (flags & PPC_MMU_WRITE) {
453     gCPU.dsisr = PPC_EXC_DSISR_PAGE | PPC_EXC_DSISR_STORE;
454     } else {
455     gCPU.dsisr = PPC_EXC_DSISR_PAGE;
456     }
457     }
458     return PPC_MMU_OK;
459     }
460    
461     void ppc_mmu_tlb_invalidate()
462     {
463     gCPU.effective_code_page = 0xffffffff;
464     ppc_mmu_tlb_invalidate_all_asm();
465     }
466    
467     /*
468     pagetable:
469     min. 2^10 (64k) PTEGs
470     PTEG = 64byte
471     The page table can be any size 2^n where 16 <= n <= 25.
472    
473     A PTEG contains eight
474     PTEs of eight bytes each; therefore, each PTEG is 64 bytes long.
475     */
476    
477     bool FASTCALL ppc_mmu_set_sdr1(uint32 newval, bool quiesce)
478     {
479     /* if (newval == gCPU.sdr1)*/ quiesce = false;
480     PPC_MMU_TRACE("new pagetable: sdr1 = 0x%08x\n", newval);
481     uint32 htabmask = SDR1_HTABMASK(newval);
482     uint32 x = 1;
483     uint32 xx = 0;
484     int n = 0;
485     while ((htabmask & x) && (n < 9)) {
486     n++;
487     xx|=x;
488     x<<=1;
489     }
490     if (htabmask & ~xx) {
491     PPC_MMU_WARN("new pagetable: broken htabmask (%05x)\n", htabmask);
492     return false;
493     }
494     uint32 htaborg = SDR1_HTABORG(newval);
495     if (htaborg & xx) {
496     PPC_MMU_WARN("new pagetable: broken htaborg (%05x)\n", htaborg);
497     return false;
498     }
499     gCPU.pagetable_base = htaborg<<16;
500     gCPU.sdr1 = newval;
501     gCPU.pagetable_hashmask = ((xx<<10)|0x3ff);
502     uint a = (0xffffffff & gCPU.pagetable_hashmask) | gCPU.pagetable_base;
503     if (a > gMemorySize) {
504     PPC_MMU_WARN("new pagetable: not in memory (%08x)\n", a);
505     return false;
506     }
507     PPC_MMU_TRACE("new pagetable: sdr1 accepted\n");
508     PPC_MMU_TRACE("number of pages: 2^%d pagetable_start: 0x%08x size: 2^%d\n", n+13, gCPU.pagetable_base, n+16);
509     if (quiesce) {
510     prom_quiesce();
511     }
512     return true;
513     }
514    
515     int FASTCALL ppc_direct_physical_memory_handle(uint32 addr, byte *&ptr)
516     {
517     if (addr < gMemorySize) {
518     ptr = &gMemory[addr];
519     return PPC_MMU_OK;
520     }
521     return PPC_MMU_FATAL;
522     }
523    
524     int FASTCALL ppc_direct_effective_memory_handle(uint32 addr, byte *&ptr)
525     {
526     uint32 ea;
527     int r;
528     if (!((r = ppc_effective_to_physical(addr, PPC_MMU_READ, ea)))) {
529     return ppc_direct_physical_memory_handle(ea, ptr);
530     }
531     return r;
532     }
533    
534     int FASTCALL ppc_direct_effective_memory_handle_code(uint32 addr, byte *&ptr)
535     {
536     uint32 ea;
537     int r;
538     if (!((r = ppc_effective_to_physical(addr, PPC_MMU_READ | PPC_MMU_CODE, ea)))) {
539     return ppc_direct_physical_memory_handle(ea, ptr);
540     }
541     return r;
542     }
543    
544     int FASTCALL ppc_read_physical_qword(uint32 addr, Vector_t &result)
545     {
546     if (addr < gMemorySize) {
547     // big endian
548     VECT_D(result,0) = ppc_dword_from_BE(*((uint64*)(gMemory+addr)));
549     VECT_D(result,1) = ppc_dword_from_BE(*((uint64*)(gMemory+addr+8)));
550     return PPC_MMU_OK;
551     }
552     return io_mem_read128(addr, (uint128 *)&result);
553     }
554    
555     int FASTCALL ppc_read_physical_dword(uint32 addr, uint64 &result)
556     {
557     if (addr < gMemorySize) {
558     // big endian
559     result = ppc_dword_from_BE(*((uint64*)(gMemory+addr)));
560     return PPC_MMU_OK;
561     }
562     int ret = io_mem_read64(addr, result);
563     result = ppc_bswap_dword(result);
564     return ret;
565     }
566    
567     int FASTCALL ppc_read_physical_word(uint32 addr, uint32 &result)
568     {
569     if (addr < gMemorySize) {
570     // big endian
571     result = ppc_word_from_BE(*((uint32*)(gMemory+addr)));
572     return PPC_MMU_OK;
573     }
574     int ret = io_mem_read(addr, result, 4);
575     result = ppc_bswap_word(result);
576     return ret;
577     }
578    
579     int FASTCALL ppc_read_physical_half(uint32 addr, uint16 &result)
580     {
581     if (addr < gMemorySize) {
582     // big endian
583     result = ppc_half_from_BE(*((uint16*)(gMemory+addr)));
584     return PPC_MMU_OK;
585     }
586     uint32 r;
587     int ret = io_mem_read(addr, r, 2);
588     result = ppc_bswap_half(r);
589     return ret;
590     }
591    
592     int FASTCALL ppc_read_physical_byte(uint32 addr, uint8 &result)
593     {
594     if (addr < gMemorySize) {
595     // big endian
596     result = gMemory[addr];
597     return PPC_MMU_OK;
598     }
599     uint32 r;
600     int ret = io_mem_read(addr, r, 1);
601     result = r;
602     return ret;
603     }
604    
605     int FASTCALL ppc_read_effective_code(uint32 addr, uint32 &result)
606     {
607     if (addr & 3) {
608     // EXC..bla
609     return PPC_MMU_FATAL;
610     }
611     uint32 p;
612     int r;
613     if (!((r=ppc_effective_to_physical(addr, PPC_MMU_READ | PPC_MMU_CODE, p)))) {
614     return ppc_read_physical_word(p, result);
615     }
616     return r;
617     }
618    
619     int FASTCALL ppc_read_effective_qword(uint32 addr, Vector_t &result)
620     {
621     uint32 p;
622     int r;
623    
624     addr &= ~0x0f;
625    
626     if (!(r = ppc_effective_to_physical(addr, PPC_MMU_READ, p))) {
627     return ppc_read_physical_qword(p, result);
628     }
629    
630     return r;
631     }
632    
633     int FASTCALL ppc_read_effective_dword(uint32 addr, uint64 &result)
634     {
635     uint32 p;
636     int r;
637     if (!(r = ppc_effective_to_physical(addr, PPC_MMU_READ, p))) {
638     if (EA_Offset(addr) > 4088) {
639     // read overlaps two pages.. tricky
640     byte *r1, *r2;
641     byte b[14];
642     ppc_effective_to_physical((addr & ~0xfff)+4089, PPC_MMU_READ, p);
643     if ((r = ppc_direct_physical_memory_handle(p, r1))) return r;
644     if ((r = ppc_effective_to_physical((addr & ~0xfff)+4096, PPC_MMU_READ, p))) return r;
645     if ((r = ppc_direct_physical_memory_handle(p, r2))) return r;
646     memmove(&b[0], r1, 7);
647     memmove(&b[7], r2, 7);
648     memmove(&result, &b[EA_Offset(addr)-4089], 8);
649     result = ppc_dword_from_BE(result);
650     return PPC_MMU_OK;
651     } else {
652     return ppc_read_physical_dword(p, result);
653     }
654     }
655     return r;
656     }
657    
658     int FASTCALL ppc_read_effective_word(uint32 addr, uint32 &result)
659     {
660     uint32 p;
661     int r;
662     if (!(r = ppc_effective_to_physical(addr, PPC_MMU_READ, p))) {
663     if (EA_Offset(addr) > 4092) {
664     // read overlaps two pages.. tricky
665     byte *r1, *r2;
666     byte b[6];
667     ppc_effective_to_physical((addr & ~0xfff)+4093, PPC_MMU_READ, p);
668     if ((r = ppc_direct_physical_memory_handle(p, r1))) return r;
669     if ((r = ppc_effective_to_physical((addr & ~0xfff)+4096, PPC_MMU_READ, p))) return r;
670     if ((r = ppc_direct_physical_memory_handle(p, r2))) return r;
671     memmove(&b[0], r1, 3);
672     memmove(&b[3], r2, 3);
673     memmove(&result, &b[EA_Offset(addr)-4093], 4);
674     result = ppc_word_from_BE(result);
675     return PPC_MMU_OK;
676     } else {
677     return ppc_read_physical_word(p, result);
678     }
679     }
680     return r;
681     }
682    
683     int FASTCALL ppc_read_effective_half(uint32 addr, uint16 &result)
684     {
685     uint32 p;
686     int r;
687     if (!((r = ppc_effective_to_physical(addr, PPC_MMU_READ, p)))) {
688     if (EA_Offset(addr) > 4094) {
689     // read overlaps two pages.. tricky
690     byte b1, b2;
691     ppc_effective_to_physical((addr & ~0xfff)+4095, PPC_MMU_READ, p);
692     if ((r = ppc_read_physical_byte(p, b1))) return r;
693     if ((r = ppc_effective_to_physical((addr & ~0xfff)+4096, PPC_MMU_READ, p))) return r;
694     if ((r = ppc_read_physical_byte(p, b2))) return r;
695     result = (b1<<8)|b2;
696     return PPC_MMU_OK;
697     } else {
698     return ppc_read_physical_half(p, result);
699     }
700     }
701     return r;
702     }
703    
704     int FASTCALL ppc_read_effective_byte(uint32 addr, uint8 &result)
705     {
706     uint32 p;
707     int r;
708     if (!((r = ppc_effective_to_physical(addr, PPC_MMU_READ, p)))) {
709     return ppc_read_physical_byte(p, result);
710     }
711     return r;
712     }
713    
714     int FASTCALL ppc_write_physical_qword(uint32 addr, Vector_t data)
715     {
716     if (addr < gMemorySize) {
717     // big endian
718     *((uint64*)(gMemory+addr)) = ppc_dword_to_BE(VECT_D(data,0));
719     *((uint64*)(gMemory+addr+8)) = ppc_dword_to_BE(VECT_D(data,1));
720     return PPC_MMU_OK;
721     }
722     if (io_mem_write128(addr, (uint128 *)&data) == IO_MEM_ACCESS_OK) {
723     return PPC_MMU_OK;
724     } else {
725     return PPC_MMU_FATAL;
726     }
727     }
728    
729     int FASTCALL ppc_write_physical_dword(uint32 addr, uint64 data)
730     {
731     if (addr < gMemorySize) {
732     // big endian
733     *((uint64*)(gMemory+addr)) = ppc_dword_to_BE(data);
734     return PPC_MMU_OK;
735     }
736     if (io_mem_write64(addr, ppc_bswap_dword(data)) == IO_MEM_ACCESS_OK) {
737     return PPC_MMU_OK;
738     } else {
739     return PPC_MMU_FATAL;
740     }
741     }
742    
743     int FASTCALL ppc_write_physical_word(uint32 addr, uint32 data)
744     {
745     if (addr < gMemorySize) {
746     // big endian
747     *((uint32*)(gMemory+addr)) = ppc_word_to_BE(data);
748     return PPC_MMU_OK;
749     }
750     return io_mem_write(addr, ppc_bswap_word(data), 4);
751     }
752    
753     int FASTCALL ppc_write_physical_half(uint32 addr, uint16 data)
754     {
755     if (addr < gMemorySize) {
756     // big endian
757     *((uint16*)(gMemory+addr)) = ppc_half_to_BE(data);
758     return PPC_MMU_OK;
759     }
760     return io_mem_write(addr, ppc_bswap_half(data), 2);
761     }
762    
763     int FASTCALL ppc_write_physical_byte(uint32 addr, uint8 data)
764     {
765     if (addr < gMemorySize) {
766     // big endian
767     gMemory[addr] = data;
768     return PPC_MMU_OK;
769     }
770     return io_mem_write(addr, data, 1);
771     }
772    
773     int FASTCALL ppc_write_effective_qword(uint32 addr, Vector_t data)
774     {
775     uint32 p;
776     int r;
777    
778     addr &= ~0x0f;
779    
780     if (!((r=ppc_effective_to_physical(addr, PPC_MMU_WRITE, p)))) {
781     return ppc_write_physical_qword(p, data);
782     }
783     return r;
784     }
785    
786     int FASTCALL ppc_write_effective_dword(uint32 addr, uint64 data)
787     {
788     uint32 p;
789     int r;
790     if (!((r=ppc_effective_to_physical(addr, PPC_MMU_WRITE, p)))) {
791     if (EA_Offset(addr) > 4088) {
792     // write overlaps two pages.. tricky
793     byte *r1, *r2;
794     byte b[14];
795     ppc_effective_to_physical((addr & ~0xfff)+4089, PPC_MMU_WRITE, p);
796     if ((r = ppc_direct_physical_memory_handle(p, r1))) return r;
797     if ((r = ppc_effective_to_physical((addr & ~0xfff)+4096, PPC_MMU_WRITE, p))) return r;
798     if ((r = ppc_direct_physical_memory_handle(p, r2))) return r;
799     data = ppc_dword_to_BE(data);
800     memmove(&b[0], r1, 7);
801     memmove(&b[7], r2, 7);
802     memmove(&b[EA_Offset(addr)-4089], &data, 8);
803     memmove(r1, &b[0], 7);
804     memmove(r2, &b[7], 7);
805     return PPC_MMU_OK;
806     } else {
807     return ppc_write_physical_dword(p, data);
808     }
809     }
810     return r;
811     }
812    
813     int FASTCALL ppc_write_effective_word(uint32 addr, uint32 data)
814     {
815     uint32 p;
816     int r;
817     if (!((r=ppc_effective_to_physical(addr, PPC_MMU_WRITE, p)))) {
818     if (EA_Offset(addr) > 4092) {
819     // write overlaps two pages.. tricky
820     byte *r1, *r2;
821     byte b[6];
822     ppc_effective_to_physical((addr & ~0xfff)+4093, PPC_MMU_WRITE, p);
823     if ((r = ppc_direct_physical_memory_handle(p, r1))) return r;
824     if ((r = ppc_effective_to_physical((addr & ~0xfff)+4096, PPC_MMU_WRITE, p))) return r;
825     if ((r = ppc_direct_physical_memory_handle(p, r2))) return r;
826     data = ppc_word_to_BE(data);
827     memmove(&b[0], r1, 3);
828     memmove(&b[3], r2, 3);
829     memmove(&b[EA_Offset(addr)-4093], &data, 4);
830     memmove(r1, &b[0], 3);
831     memmove(r2, &b[3], 3);
832     return PPC_MMU_OK;
833     } else {
834     return ppc_write_physical_word(p, data);
835     }
836     }
837     return r;
838     }
839    
840     int FASTCALL ppc_write_effective_half(uint32 addr, uint16 data)
841     {
842     uint32 p;
843     int r;
844     if (!((r=ppc_effective_to_physical(addr, PPC_MMU_WRITE, p)))) {
845     if (EA_Offset(addr) > 4094) {
846     // write overlaps two pages.. tricky
847     ppc_effective_to_physical((addr & ~0xfff)+4095, PPC_MMU_WRITE, p);
848     if ((r = ppc_write_physical_byte(p, data>>8))) return r;
849     if ((r = ppc_effective_to_physical((addr & ~0xfff)+4096, PPC_MMU_WRITE, p))) return r;
850     if ((r = ppc_write_physical_byte(p, data))) return r;
851     return PPC_MMU_OK;
852     } else {
853     return ppc_write_physical_half(p, data);
854     }
855     }
856     return r;
857     }
858    
859     int FASTCALL ppc_write_effective_byte(uint32 addr, uint8 data)
860     {
861     uint32 p;
862     int r;
863     if (!((r=ppc_effective_to_physical(addr, PPC_MMU_WRITE, p)))) {
864     return ppc_write_physical_byte(p, data);
865     }
866     return r;
867     }
868    
869     bool FASTCALL ppc_init_physical_memory(uint size)
870     {
871     if (size < 64*1024*1024) {
872     PPC_MMU_ERR("Main memory size must >= 64MB!\n");
873     }
874     gMemory = (byte*)malloc(size+16);
875     if ((uint32)gMemory & 0x0f) {
876     gMemory += 16 - ((uint32)gMemory & 0x0f);
877     }
878     gMemorySize = size;
879     return gMemory != NULL;
880     }
881    
882     uint32 ppc_get_memory_size()
883     {
884     return gMemorySize;
885     }
886    
887     /***************************************************************************
888     * DMA Interface
889     */
890    
891     bool ppc_dma_write(uint32 dest, const void *src, uint32 size)
892     {
893     if (dest > gMemorySize || (dest+size) > gMemorySize) return false;
894    
895     byte *ptr;
896     ppc_direct_physical_memory_handle(dest, ptr);
897    
898     memcpy(ptr, src, size);
899     return true;
900     }
901    
902     bool ppc_dma_read(void *dest, uint32 src, uint32 size)
903     {
904     if (src > gMemorySize || (src+size) > gMemorySize) return false;
905    
906     byte *ptr;
907     ppc_direct_physical_memory_handle(src, ptr);
908    
909     memcpy(dest, ptr, size);
910     return true;
911     }
912    
913     bool ppc_dma_set(uint32 dest, int c, uint32 size)
914     {
915     if (dest > gMemorySize || (dest+size) > gMemorySize) return false;
916    
917     byte *ptr;
918     ppc_direct_physical_memory_handle(dest, ptr);
919    
920     memset(ptr, c, size);
921     return true;
922     }
923    
924    
925     /***************************************************************************
926     * DEPRECATED prom interface
927     */
928     bool ppc_prom_set_sdr1(uint32 newval, bool quiesce)
929     {
930     return ppc_mmu_set_sdr1(newval, quiesce);
931     }
932    
933     bool ppc_prom_effective_to_physical(uint32 &result, uint32 ea)
934     {
935     return ppc_effective_to_physical(ea, PPC_MMU_READ|PPC_MMU_SV|PPC_MMU_NO_EXC, result) == PPC_MMU_OK;
936     }
937    
938     bool ppc_prom_page_create(uint32 ea, uint32 pa)
939     {
940     uint32 sr = gCPU.sr[EA_SR(ea)];
941     uint32 page_index = EA_PageIndex(ea); // 16 bit
942     uint32 VSID = SR_VSID(sr); // 24 bit
943     uint32 api = EA_API(ea); // 6 bit (part of page_index)
944     uint32 hash1 = (VSID ^ page_index);
945     uint32 pte, pte2;
946     uint32 h = 0;
947     for (int j=0; j<2; j++) {
948     uint32 pteg_addr = ((hash1 & gCPU.pagetable_hashmask)<<6) | gCPU.pagetable_base;
949     for (int i=0; i<8; i++) {
950     if (ppc_read_physical_word(pteg_addr, pte)) {
951     PPC_MMU_ERR("read physical in address translate failed\n");
952     return false;
953     }
954     if (!(pte & PTE1_V)) {
955     // free pagetable entry found
956     pte = PTE1_V | (VSID << 7) | h | api;
957     pte2 = (PA_RPN(pa) << 12) | 0;
958     if (ppc_write_physical_word(pteg_addr, pte)
959     || ppc_write_physical_word(pteg_addr+4, pte2)) {
960     return false;
961     } else {
962     // ok
963     return true;
964     }
965     }
966     pteg_addr+=8;
967     }
968     hash1 = ~hash1;
969     h = PTE1_H;
970     }
971     return false;
972     }
973    
974     bool ppc_prom_page_free(uint32 ea)
975     {
976     return true;
977     }
978    
979     /***************************************************************************
980     * MMU Opcodes
981     */
982    
983     #include "ppc_dec.h"
984    
985     /*
986     * puts the sum of cr1 and cr2 into EAX
987     * (in the most clever way)
988     */
989     static void getEAXRsum(PPC_Register cr1, PPC_Register cr2)
990     {
991     NativeReg r1 = jitcGetClientRegisterMapping(cr1);
992     NativeReg r2 = jitcGetClientRegisterMapping(cr2);
993     if (r1 == EAX) {
994     /* intentional left empty */
995     } else if (r2 == EAX) {
996     if (r1 == REG_NO) {
997     byte modrm[6];
998     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
999     } else {
1000     asmALURegReg(X86_ADD, EAX, r1);
1001     }
1002     return;
1003     } else {
1004     /*
1005     * We load cr1 into EAX but have to clobber it since
1006     * we're going to modify EAX.
1007     */
1008     jitcGetClientRegister(cr1, NATIVE_REG | EAX);
1009     }
1010     jitcClobberRegister(NATIVE_REG | EAX);
1011     r2 = jitcGetClientRegisterMapping(cr2);
1012     if (r2 == REG_NO) {
1013     byte modrm[6];
1014     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1015     } else {
1016     asmALURegReg(X86_ADD, EAX, r2);
1017     }
1018     }
1019    
1020     static void getEAX_0_Rsum(PPC_Register cr1, PPC_Register cr2)
1021     {
1022     if (cr1 == PPC_GPR(0)) {
1023     jitcGetClientRegister(cr2, NATIVE_REG | EAX);
1024     } else {
1025     getEAXRsum(cr1, cr2);
1026     }
1027     }
1028    
1029     static void getEAXRsumAndEDX(PPC_Register cr1, PPC_Register cr2, PPC_Register cr3)
1030     {
1031     NativeReg r1 = jitcGetClientRegisterMapping(cr1);
1032     NativeReg r2 = jitcGetClientRegisterMapping(cr2);
1033     if (r1 == EAX) {
1034     jitcTouchRegister(EAX);
1035     jitcClobberRegister(NATIVE_REG | EDX);
1036     if (cr1 == cr3) {
1037     asmALURegReg(X86_MOV, EDX, EAX);
1038     } else {
1039     jitcGetClientRegister(cr3, NATIVE_REG | EDX);
1040     }
1041     r2 = jitcGetClientRegisterMapping(cr2);
1042     if (r2 == REG_NO) {
1043     byte modrm[6];
1044     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1045     return;
1046     } else {
1047     asmALURegReg(X86_ADD, EAX, r2);
1048     return;
1049     }
1050     } else if (r2 == EAX) {
1051     jitcTouchRegister(EAX);
1052     jitcClobberRegister(NATIVE_REG | EDX);
1053     if (cr2 == cr3) {
1054     asmALURegReg(X86_MOV, EDX, EAX);
1055     } else {
1056     jitcGetClientRegister(cr3, NATIVE_REG | EDX);
1057     }
1058     r1 = jitcGetClientRegisterMapping(cr1);
1059     if (r1 == REG_NO) {
1060     byte modrm[6];
1061     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1062     return;
1063     }
1064     asmALURegReg(X86_ADD, EAX, r1);
1065     return;
1066     } else {
1067     jitcGetClientRegister(cr1, NATIVE_REG | EAX);
1068     jitcClobberRegister(NATIVE_REG | EDX);
1069     if (cr1 == cr3) {
1070     asmALURegReg(X86_MOV, EDX, EAX);
1071     } else {
1072     jitcGetClientRegister(cr3, NATIVE_REG | EDX);
1073     }
1074     }
1075     // FIXME: what if mapping of cr3==EDX?
1076     r2 = jitcGetClientRegisterMapping(cr2);
1077     if (r2 == REG_NO) {
1078     byte modrm[6];
1079     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1080     } else {
1081     asmALURegReg(X86_ADD, EAX, r2);
1082     }
1083     }
1084    
1085     static void getEAX_0_RsumAndEDX(PPC_Register cr1, PPC_Register cr2, PPC_Register cr3)
1086     {
1087     if (cr1 == PPC_GPR(0)) {
1088     if (jitcGetClientRegisterMapping(cr2) == EDX) jitcTouchRegister(EDX);
1089     jitcGetClientRegister(cr2, NATIVE_REG | EAX);
1090     if (cr2 == cr3) {
1091     asmALURegReg(X86_MOV, EDX, EAX);
1092     } else {
1093     jitcGetClientRegister(cr3, NATIVE_REG | EDX);
1094     }
1095     } else {
1096     getEAXRsumAndEDX(cr1, cr2, cr3);
1097     }
1098     }
1099    
1100     /*
1101     * puts the sum of cr1 and imm into EAX
1102     * (in the most clever way)
1103     */
1104     static void getEAXIsum(PPC_Register cr1, uint32 imm)
1105     {
1106     jitcGetClientRegister(cr1, NATIVE_REG | EAX);
1107     if (imm) {
1108     asmALURegImm(X86_ADD, EAX, imm);
1109     }
1110     }
1111    
1112     static void getEAX_0_Isum(PPC_Register cr1, uint32 imm)
1113     {
1114     if (cr1 == PPC_GPR(0)) {
1115     asmALURegImm(X86_MOV, EAX, imm);
1116     } else {
1117     getEAXIsum(cr1, imm);
1118     }
1119     }
1120    
1121     static void getEAXIsumAndEDX(PPC_Register cr1, uint32 imm, PPC_Register cr2)
1122     {
1123     if (jitcGetClientRegisterMapping(cr2) == EDX) jitcTouchRegister(EDX);
1124     jitcGetClientRegister(cr1, NATIVE_REG | EAX);
1125     if (cr1 == cr2) {
1126     asmALURegReg(X86_MOV, EDX, EAX);
1127     } else {
1128     jitcGetClientRegister(cr2, NATIVE_REG | EDX);
1129     }
1130     jitcClobberRegister(NATIVE_REG | EAX);
1131     if (imm) {
1132     asmALURegImm(X86_ADD, EAX, imm);
1133     }
1134     }
1135    
1136     static void getEAX_0_IsumAndEDX(PPC_Register cr1, uint32 imm, PPC_Register cr2)
1137     {
1138     if (cr1 == PPC_GPR(0)) {
1139     jitcGetClientRegister(cr2, NATIVE_REG | EDX);
1140     jitcClobberRegister(NATIVE_REG | EAX);
1141     asmALURegImm(X86_MOV, EAX, imm);
1142     } else {
1143     getEAXIsumAndEDX(cr1, imm, cr2);
1144     }
1145     }
1146    
1147    
1148     void ppc_opc_gen_helper_l(PPC_Register cr1, uint32 imm)
1149     {
1150     jitcClobberCarryAndFlags();
1151     jitcFlushRegister();
1152     getEAX_0_Isum(cr1, imm);
1153     jitcClobberAll();
1154     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1155     }
1156    
1157     void ppc_opc_gen_helper_lu(PPC_Register cr1, uint32 imm)
1158     {
1159     jitcClobberCarryAndFlags();
1160     jitcFlushRegister();
1161     getEAXIsum(cr1, imm);
1162     jitcClobberAll();
1163     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1164     }
1165    
1166     void ppc_opc_gen_helper_lux(PPC_Register cr1, PPC_Register cr2)
1167     {
1168     jitcClobberCarryAndFlags();
1169     jitcFlushRegister();
1170     getEAXRsum(cr1, cr2);
1171     jitcClobberAll();
1172     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1173     }
1174    
1175     void ppc_opc_gen_helper_lx(PPC_Register cr1, PPC_Register cr2)
1176     {
1177     jitcClobberCarryAndFlags();
1178     jitcFlushRegister();
1179     getEAX_0_Rsum(cr1, cr2);
1180     jitcClobberAll();
1181     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1182     }
1183    
1184     void ppc_opc_gen_helper_st(PPC_Register cr1, uint32 imm, PPC_Register cr2)
1185     {
1186     jitcClobberCarryAndFlags();
1187     jitcFlushRegister();
1188     getEAX_0_IsumAndEDX(cr1, imm, cr2);
1189     jitcClobberAll();
1190     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1191     }
1192    
1193     void ppc_opc_gen_helper_stu(PPC_Register cr1, uint32 imm, PPC_Register cr2)
1194     {
1195     jitcClobberCarryAndFlags();
1196     jitcFlushRegister();
1197     getEAXIsumAndEDX(cr1, imm, cr2);
1198     jitcClobberAll();
1199     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1200     }
1201    
1202     void ppc_opc_gen_helper_stux(PPC_Register cr1, PPC_Register cr2, PPC_Register cr3)
1203     {
1204     jitcClobberCarryAndFlags();
1205     jitcFlushRegister();
1206     getEAXRsumAndEDX(cr1, cr2, cr3);
1207     jitcClobberAll();
1208     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1209     }
1210    
1211     void ppc_opc_gen_helper_stx(PPC_Register cr1, PPC_Register cr2, PPC_Register cr3)
1212     {
1213     jitcClobberCarryAndFlags();
1214     jitcFlushRegister();
1215     getEAX_0_RsumAndEDX(cr1, cr2, cr3);
1216     jitcClobberAll();
1217     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1218     }
1219     /*
1220    
1221     void ppc_opc_gen_helper_l(PPC_Register cr1, uint32 imm)
1222     {
1223     jitcClobberAll();
1224     byte modrm[6];
1225     if (cr1 == PPC_GPR(0)) {
1226     asmALURegImm(X86_MOV, EAX, imm);
1227     } else {
1228     asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1229     asmALURegImm(X86_ADD, EAX, imm);
1230     }
1231     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1232     }
1233    
1234     void ppc_opc_gen_helper_lu(PPC_Register cr1, uint32 imm)
1235     {
1236     jitcClobberAll();
1237     byte modrm[6];
1238     asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1239     asmALURegImm(X86_ADD, EAX, imm);
1240     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1241     }
1242    
1243     void ppc_opc_gen_helper_lux(PPC_Register cr1, PPC_Register cr2)
1244     {
1245     jitcClobberAll();
1246     byte modrm[6];
1247     asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1248     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1249     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1250     }
1251    
1252     void ppc_opc_gen_helper_lx(PPC_Register cr1, PPC_Register cr2)
1253     {
1254     jitcClobberAll();
1255     byte modrm[6];
1256     if (cr1 == PPC_GPR(0)) {
1257     asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1258     } else {
1259     asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1260     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1261     }
1262     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1263     }
1264    
1265     void ppc_opc_gen_helper_st(PPC_Register cr1, uint32 imm, PPC_Register cr2)
1266     {
1267     jitcClobberAll();
1268     byte modrm[6];
1269     if (cr1 == PPC_GPR(0)) {
1270     asmALURegImm(X86_MOV, EAX, imm);
1271     asmALURegMem(X86_MOV, EDX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1272     } else {
1273     asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1274     asmALURegMem(X86_MOV, EDX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1275     asmALURegImm(X86_ADD, EAX, imm);
1276     }
1277     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1278     }
1279    
1280     void ppc_opc_gen_helper_stu(PPC_Register cr1, uint32 imm, PPC_Register cr2)
1281     {
1282     jitcClobberAll();
1283     byte modrm[6];
1284     asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1285     asmALURegMem(X86_MOV, EDX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1286     asmALURegImm(X86_ADD, EAX, imm);
1287     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1288     }
1289    
1290     void ppc_opc_gen_helper_stux(PPC_Register cr1, PPC_Register cr2, PPC_Register cr3)
1291     {
1292     jitcClobberAll();
1293     byte modrm[6];
1294     asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1295     asmALURegMem(X86_MOV, EDX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr3));
1296     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1297     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1298     }
1299    
1300     void ppc_opc_gen_helper_stx(PPC_Register cr1, PPC_Register cr2, PPC_Register cr3)
1301     {
1302     jitcClobberAll();
1303     byte modrm[6];
1304     if (cr1 == PPC_GPR(0)) {
1305     asmALURegMem(X86_MOV, EDX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr3));
1306     asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1307     } else {
1308     asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1309     asmALURegMem(X86_MOV, EDX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr3));
1310     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1311     }
1312     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1313     }
1314     */
1315    
1316     uint64 FASTCALL ppc_opc_single_to_double(uint32 r)
1317     {
1318     ppc_single s;
1319     ppc_double d;
1320     uint64 ret;
1321     ppc_fpu_unpack_single(s, r);
1322     ppc_fpu_single_to_double(s, d);
1323     ppc_fpu_pack_double(d, ret);
1324     return ret;
1325     }
1326    
1327     uint32 FASTCALL ppc_opc_double_to_single(uint64 r)
1328     {
1329     uint32 s;
1330     ppc_double d;
1331     ppc_fpu_unpack_double(d, r);
1332     ppc_fpu_pack_single(d, s);
1333     return s;
1334     }
1335    
1336    
1337     /*
1338     * dcbz Data Cache Clear to Zero
1339     * .464
1340     */
1341     void ppc_opc_dcbz()
1342     {
1343     //PPC_L1_CACHE_LINE_SIZE
1344     int rA, rD, rB;
1345     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
1346     // assert rD=0
1347     uint32 a = (rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB];
1348     // BAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
1349     ppc_write_effective_dword(a, 0)
1350     || ppc_write_effective_dword(a+8, 0)
1351     || ppc_write_effective_dword(a+16, 0)
1352     || ppc_write_effective_dword(a+24, 0);
1353     }
1354     JITCFlow ppc_opc_gen_dcbz()
1355     {
1356     int rA, rD, rB;
1357     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
1358     jitcClobberCarryAndFlags();
1359     jitcFlushRegister();
1360     getEAX_0_Rsum(PPC_GPR(rA), PPC_GPR(rB));
1361     jitcClobberRegister();
1362     asmALURegImm(X86_MOV, ECX, 0);
1363     asmMOVDMemReg((uint32)&gCPU.temp, EAX);
1364     asmALURegImm(X86_MOV, EDX, 0);
1365     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1366     asmCALL((NativeAddress)ppc_write_effective_dword_asm);
1367     asmALURegImm(X86_MOV, ECX, 0);
1368     asmMOVRegDMem(EAX, (uint32)&gCPU.temp);
1369     asmALURegImm(X86_MOV, EDX, 0);
1370     asmALURegImm(X86_ADD, EAX, 8);
1371     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1372     asmCALL((NativeAddress)ppc_write_effective_dword_asm);
1373     asmALURegImm(X86_MOV, ECX, 0);
1374     asmMOVRegDMem(EAX, (uint32)&gCPU.temp);
1375     asmALURegImm(X86_MOV, EDX, 0);
1376     asmALURegImm(X86_ADD, EAX, 16);
1377     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1378     asmCALL((NativeAddress)ppc_write_effective_dword_asm);
1379     asmALURegImm(X86_MOV, ECX, 0);
1380     asmMOVRegDMem(EAX, (uint32)&gCPU.temp);
1381     asmALURegImm(X86_MOV, EDX, 0);
1382     asmALURegImm(X86_ADD, EAX, 24);
1383     asmALURegImm(X86_MOV, ESI, gJITC.pc);
1384     asmCALL((NativeAddress)ppc_write_effective_dword_asm);
1385     return flowEndBlock;
1386     }
1387    
1388     /*
1389     * lbz Load Byte and Zero
1390     * .521
1391     */
1392     void ppc_opc_lbz()
1393     {
1394     int rA, rD;
1395     uint32 imm;
1396     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
1397     uint8 r;
1398     int ret = ppc_read_effective_byte((rA?gCPU.gpr[rA]:0)+imm, r);
1399     if (ret == PPC_MMU_OK) {
1400     gCPU.gpr[rD] = r;
1401     }
1402     }
1403     JITCFlow ppc_opc_gen_lbz()
1404     {
1405     int rA, rD;
1406     uint32 imm;
1407     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
1408     ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
1409     asmCALL((NativeAddress)ppc_read_effective_byte_asm);
1410     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1411     return flowContinue;
1412     }
1413     /*
1414     * lbzu Load Byte and Zero with Update
1415     * .522
1416     */
1417     void ppc_opc_lbzu()
1418     {
1419     int rA, rD;
1420     uint32 imm;
1421     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
1422     // FIXME: check rA!=0 && rA!=rD
1423     uint8 r;
1424     int ret = ppc_read_effective_byte(gCPU.gpr[rA]+imm, r);
1425     if (ret == PPC_MMU_OK) {
1426     gCPU.gpr[rA] += imm;
1427     gCPU.gpr[rD] = r;
1428     }
1429     }
1430     JITCFlow ppc_opc_gen_lbzu()
1431     {
1432     int rA, rD;
1433     uint32 imm;
1434     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
1435     ppc_opc_gen_helper_lu(PPC_GPR(rA), imm);
1436     asmCALL((NativeAddress)ppc_read_effective_byte_asm);
1437     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1438     if (imm) {
1439     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1440     asmALURegImm(X86_ADD, a, imm);
1441     }
1442     return flowContinue;
1443     }
1444     /*
1445     * lbzux Load Byte and Zero with Update Indexed
1446     * .523
1447     */
1448     void ppc_opc_lbzux()
1449     {
1450     int rA, rD, rB;
1451     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
1452     // FIXME: check rA!=0 && rA!=rD
1453     uint8 r;
1454     int ret = ppc_read_effective_byte(gCPU.gpr[rA]+gCPU.gpr[rB], r);
1455     if (ret == PPC_MMU_OK) {
1456     gCPU.gpr[rA] += gCPU.gpr[rB];
1457     gCPU.gpr[rD] = r;
1458     }
1459     }
1460     JITCFlow ppc_opc_gen_lbzux()
1461     {
1462     int rA, rD, rB;
1463     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
1464     ppc_opc_gen_helper_lux(PPC_GPR(rA), PPC_GPR(rB));
1465     asmCALL((NativeAddress)ppc_read_effective_byte_asm);
1466     if (rD == rB) {
1467     // don't ask...
1468     byte modrm[6];
1469     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA), NATIVE_REG | EAX);
1470     asmALURegMem(X86_ADD, a, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
1471     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1472     } else {
1473     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1474     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1475     NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
1476     asmALURegReg(X86_ADD, a, b);
1477     }
1478     return flowContinue;
1479     }
1480     /*
1481     * lbzx Load Byte and Zero Indexed
1482     * .524
1483     */
1484     void ppc_opc_lbzx()
1485     {
1486     int rA, rD, rB;
1487     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
1488     uint8 r;
1489     int ret = ppc_read_effective_byte((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
1490     if (ret == PPC_MMU_OK) {
1491     gCPU.gpr[rD] = r;
1492     }
1493     }
1494     JITCFlow ppc_opc_gen_lbzx()
1495     {
1496     int rA, rD, rB;
1497     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
1498     ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
1499     asmCALL((NativeAddress)ppc_read_effective_byte_asm);
1500     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1501     return flowContinue;
1502     }
1503     /*
1504     * lfd Load Floating-Point Double
1505     * .530
1506     */
1507     void ppc_opc_lfd()
1508     {
1509     if ((gCPU.msr & MSR_FP) == 0) {
1510     ppc_exception(PPC_EXC_NO_FPU);
1511     return;
1512     }
1513     int rA, frD;
1514     uint32 imm;
1515     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, frD, rA, imm);
1516     uint64 r;
1517     int ret = ppc_read_effective_dword((rA?gCPU.gpr[rA]:0)+imm, r);
1518     if (ret == PPC_MMU_OK) {
1519     gCPU.fpr[frD] = r;
1520     }
1521     }
1522     JITCFlow ppc_opc_gen_lfd()
1523     {
1524     ppc_opc_gen_check_fpu();
1525     int rA, frD;
1526     uint32 imm;
1527     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, frD, rA, imm);
1528     jitcFloatRegisterClobberAll();
1529     ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
1530     asmCALL((NativeAddress)ppc_read_effective_dword_asm);
1531     jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | ECX);
1532     jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EDX);
1533     return flowContinue;
1534     }
1535     /*
1536     * lfdu Load Floating-Point Double with Update
1537     * .531
1538     */
1539     void ppc_opc_lfdu()
1540     {
1541     if ((gCPU.msr & MSR_FP) == 0) {
1542     ppc_exception(PPC_EXC_NO_FPU);
1543     return;
1544     }
1545     int rA, frD;
1546     uint32 imm;
1547     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, frD, rA, imm);
1548     // FIXME: check rA!=0
1549     uint64 r;
1550     int ret = ppc_read_effective_dword(gCPU.gpr[rA]+imm, r);
1551     if (ret == PPC_MMU_OK) {
1552     gCPU.fpr[frD] = r;
1553     gCPU.gpr[rA] += imm;
1554     }
1555     }
1556     JITCFlow ppc_opc_gen_lfdu()
1557     {
1558     ppc_opc_gen_check_fpu();
1559     int rA, frD;
1560     uint32 imm;
1561     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, frD, rA, imm);
1562     jitcFloatRegisterClobberAll();
1563     ppc_opc_gen_helper_lu(PPC_GPR(rA), imm);
1564     asmCALL((NativeAddress)ppc_read_effective_dword_asm);
1565     jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | ECX);
1566     jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EDX);
1567     if (imm) {
1568     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1569     asmALURegImm(X86_ADD, a, imm);
1570     }
1571     return flowContinue;
1572     }
1573     /*
1574     * lfdux Load Floating-Point Double with Update Indexed
1575     * .532
1576     */
1577     void ppc_opc_lfdux()
1578     {
1579     if ((gCPU.msr & MSR_FP) == 0) {
1580     ppc_exception(PPC_EXC_NO_FPU);
1581     return;
1582     }
1583     int rA, frD, rB;
1584     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, rB);
1585     // FIXME: check rA!=0
1586     uint64 r;
1587     int ret = ppc_read_effective_dword(gCPU.gpr[rA]+gCPU.gpr[rB], r);
1588     if (ret == PPC_MMU_OK) {
1589     gCPU.gpr[rA] += gCPU.gpr[rB];
1590     gCPU.fpr[frD] = r;
1591     }
1592     }
1593     JITCFlow ppc_opc_gen_lfdux()
1594     {
1595     ppc_opc_gen_check_fpu();
1596     int rA, frD, rB;
1597     PPC_OPC_TEMPL_X(gJITC.current_opc, frD, rA, rB);
1598     ppc_opc_gen_helper_lux(PPC_GPR(rA), PPC_GPR(rB));
1599     jitcFloatRegisterClobberAll();
1600     asmCALL((NativeAddress)ppc_read_effective_dword_asm);
1601     jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | ECX);
1602     jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EDX);
1603     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1604     NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
1605     asmALURegReg(X86_ADD, a, b);
1606     return flowContinue;
1607     }
1608     /*
1609     * lfdx Load Floating-Point Double Indexed
1610     * .533
1611     */
1612     void ppc_opc_lfdx()
1613     {
1614     if ((gCPU.msr & MSR_FP) == 0) {
1615     ppc_exception(PPC_EXC_NO_FPU);
1616     return;
1617     }
1618     int rA, frD, rB;
1619     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, rB);
1620     uint64 r;
1621     int ret = ppc_read_effective_dword((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
1622     if (ret == PPC_MMU_OK) {
1623     gCPU.fpr[frD] = r;
1624     }
1625     }
1626     JITCFlow ppc_opc_gen_lfdx()
1627     {
1628     ppc_opc_gen_check_fpu();
1629     int rA, frD, rB;
1630     PPC_OPC_TEMPL_X(gJITC.current_opc, frD, rA, rB);
1631     jitcFloatRegisterClobberAll();
1632     ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
1633     asmCALL((NativeAddress)ppc_read_effective_dword_asm);
1634     jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | ECX);
1635     jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EDX);
1636     return flowContinue;
1637     }
1638     /*
1639     * lfs Load Floating-Point Single
1640     * .534
1641     */
1642     void ppc_opc_lfs()
1643     {
1644     if ((gCPU.msr & MSR_FP) == 0) {
1645     ppc_exception(PPC_EXC_NO_FPU);
1646     return;
1647     }
1648     int rA, frD;
1649     uint32 imm;
1650     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, frD, rA, imm);
1651     uint32 r;
1652     int ret = ppc_read_effective_word((rA?gCPU.gpr[rA]:0)+imm, r);
1653     if (ret == PPC_MMU_OK) {
1654     ppc_single s;
1655     ppc_double d;
1656     ppc_fpu_unpack_single(s, r);
1657     ppc_fpu_single_to_double(s, d);
1658     ppc_fpu_pack_double(d, gCPU.fpr[frD]);
1659     }
1660     }
1661     JITCFlow ppc_opc_gen_lfs()
1662     {
1663     ppc_opc_gen_check_fpu();
1664     int rA, frD;
1665     uint32 imm;
1666     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, frD, rA, imm);
1667     jitcFloatRegisterClobberAll();
1668     ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
1669     asmCALL((NativeAddress)ppc_read_effective_word_asm);
1670     asmALURegReg(X86_MOV, EAX, EDX);
1671     asmCALL((NativeAddress)ppc_opc_single_to_double);
1672     jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | EDX);
1673     jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EAX);
1674     return flowContinue;
1675     }
1676     /*
1677     * lfsu Load Floating-Point Single with Update
1678     * .535
1679     */
1680     void ppc_opc_lfsu()
1681     {
1682     if ((gCPU.msr & MSR_FP) == 0) {
1683     ppc_exception(PPC_EXC_NO_FPU);
1684     return;
1685     }
1686     int rA, frD;
1687     uint32 imm;
1688     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, frD, rA, imm);
1689     // FIXME: check rA!=0
1690     uint32 r;
1691     int ret = ppc_read_effective_word(gCPU.gpr[rA]+imm, r);
1692     if (ret == PPC_MMU_OK) {
1693     ppc_single s;
1694     ppc_double d;
1695     ppc_fpu_unpack_single(s, r);
1696     ppc_fpu_single_to_double(s, d);
1697     ppc_fpu_pack_double(d, gCPU.fpr[frD]);
1698     gCPU.gpr[rA] += imm;
1699     }
1700     }
1701     JITCFlow ppc_opc_gen_lfsu()
1702     {
1703     ppc_opc_gen_check_fpu();
1704     int rA, frD;
1705     uint32 imm;
1706     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, frD, rA, imm);
1707     jitcFloatRegisterClobberAll();
1708     ppc_opc_gen_helper_lu(PPC_GPR(rA), imm);
1709     asmCALL((NativeAddress)ppc_read_effective_word_asm);
1710     asmALURegReg(X86_MOV, EAX, EDX);
1711     asmCALL((NativeAddress)ppc_opc_single_to_double);
1712     jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | EDX);
1713     jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EAX);
1714     if (imm) {
1715     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1716     asmALURegImm(X86_ADD, a, imm);
1717     }
1718     return flowContinue;
1719     }
1720     /*
1721     * lfsux Load Floating-Point Single with Update Indexed
1722     * .536
1723     */
1724     void ppc_opc_lfsux()
1725     {
1726     if ((gCPU.msr & MSR_FP) == 0) {
1727     ppc_exception(PPC_EXC_NO_FPU);
1728     return;
1729     }
1730     int rA, frD, rB;
1731     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, rB);
1732     // FIXME: check rA!=0
1733     uint32 r;
1734     int ret = ppc_read_effective_word(gCPU.gpr[rA]+gCPU.gpr[rB], r);
1735     if (ret == PPC_MMU_OK) {
1736     gCPU.gpr[rA] += gCPU.gpr[rB];
1737     ppc_single s;
1738     ppc_double d;
1739     ppc_fpu_unpack_single(s, r);
1740     ppc_fpu_single_to_double(s, d);
1741     ppc_fpu_pack_double(d, gCPU.fpr[frD]);
1742     }
1743     }
1744     JITCFlow ppc_opc_gen_lfsux()
1745     {
1746     ppc_opc_gen_check_fpu();
1747     int rA, frD, rB;
1748     PPC_OPC_TEMPL_X(gJITC.current_opc, frD, rA, rB);
1749     jitcFloatRegisterClobberAll();
1750     ppc_opc_gen_helper_lux(PPC_GPR(rA), PPC_GPR(rB));
1751     asmCALL((NativeAddress)ppc_read_effective_word_asm);
1752     asmALURegReg(X86_MOV, EAX, EDX);
1753     asmCALL((NativeAddress)ppc_opc_single_to_double);
1754     jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | EDX);
1755     jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EAX);
1756     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1757     NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
1758     asmALURegReg(X86_ADD, a, b);
1759     return flowContinue;
1760     }
1761     /*
1762     * lfsx Load Floating-Point Single Indexed
1763     * .537
1764     */
1765     void ppc_opc_lfsx()
1766     {
1767     if ((gCPU.msr & MSR_FP) == 0) {
1768     ppc_exception(PPC_EXC_NO_FPU);
1769     return;
1770     }
1771     int rA, frD, rB;
1772     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, rB);
1773     uint32 r;
1774     int ret = ppc_read_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
1775     if (ret == PPC_MMU_OK) {
1776     ppc_single s;
1777     ppc_double d;
1778     ppc_fpu_unpack_single(s, r);
1779     ppc_fpu_single_to_double(s, d);
1780     ppc_fpu_pack_double(d, gCPU.fpr[frD]);
1781     }
1782     }
1783     JITCFlow ppc_opc_gen_lfsx()
1784     {
1785     ppc_opc_gen_check_fpu();
1786     int rA, frD, rB;
1787     PPC_OPC_TEMPL_X(gJITC.current_opc, frD, rA, rB);
1788     jitcFloatRegisterClobberAll();
1789     ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
1790     asmCALL((NativeAddress)ppc_read_effective_word_asm);
1791     asmALURegReg(X86_MOV, EAX, EDX);
1792     asmCALL((NativeAddress)ppc_opc_single_to_double);
1793     jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | EDX);
1794     jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EAX);
1795     return flowContinue;
1796     }
1797     /*
1798     * lha Load Half Word Algebraic
1799     * .538
1800     */
1801     void ppc_opc_lha()
1802     {
1803     int rA, rD;
1804     uint32 imm;
1805     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
1806     uint16 r;
1807     int ret = ppc_read_effective_half((rA?gCPU.gpr[rA]:0)+imm, r);
1808     if (ret == PPC_MMU_OK) {
1809     gCPU.gpr[rD] = (r&0x8000)?(r|0xffff0000):r;
1810     }
1811     }
1812     JITCFlow ppc_opc_gen_lha()
1813     {
1814     int rA, rD;
1815     uint32 imm;
1816     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
1817     ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
1818     asmCALL((NativeAddress)ppc_read_effective_half_s_asm);
1819     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1820     return flowContinue;
1821     }
1822     /*
1823     * lhau Load Half Word Algebraic with Update
1824     * .539
1825     */
1826     void ppc_opc_lhau()
1827     {
1828     int rA, rD;
1829     uint32 imm;
1830     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
1831     uint16 r;
1832     // FIXME: rA != 0
1833     int ret = ppc_read_effective_half(gCPU.gpr[rA]+imm, r);
1834     if (ret == PPC_MMU_OK) {
1835     gCPU.gpr[rA] += imm;
1836     gCPU.gpr[rD] = (r&0x8000)?(r|0xffff0000):r;
1837     }
1838     }
1839     JITCFlow ppc_opc_gen_lhau()
1840     {
1841     int rA, rD;
1842     uint32 imm;
1843     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
1844     ppc_opc_gen_helper_lu(PPC_GPR(rA), imm);
1845     asmCALL((NativeAddress)ppc_read_effective_half_s_asm);
1846     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1847     if (imm) {
1848     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1849     asmALURegImm(X86_ADD, a, imm);
1850     }
1851     return flowContinue;
1852     }
1853     /*
1854     * lhaux Load Half Word Algebraic with Update Indexed
1855     * .540
1856     */
1857     void ppc_opc_lhaux()
1858     {
1859     int rA, rD, rB;
1860     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
1861     uint16 r;
1862     // FIXME: rA != 0
1863     int ret = ppc_read_effective_half(gCPU.gpr[rA]+gCPU.gpr[rB], r);
1864     if (ret == PPC_MMU_OK) {
1865     gCPU.gpr[rA] += gCPU.gpr[rB];
1866     gCPU.gpr[rD] = (r&0x8000)?(r|0xffff0000):r;
1867     }
1868     }
1869     JITCFlow ppc_opc_gen_lhaux()
1870     {
1871     int rA, rD, rB;
1872     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
1873     ppc_opc_gen_helper_lux(PPC_GPR(rA), PPC_GPR(rB));
1874     asmCALL((NativeAddress)ppc_read_effective_half_s_asm);
1875     if (rD == rB) {
1876     byte modrm[6];
1877     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA), NATIVE_REG | EAX);
1878     asmALURegMem(X86_ADD, a, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
1879     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1880     } else {
1881     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1882     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1883     NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
1884     asmALURegReg(X86_ADD, a, b);
1885     }
1886     return flowContinue;
1887     }
1888     /*
1889     * lhax Load Half Word Algebraic Indexed
1890     * .541
1891     */
1892     void ppc_opc_lhax()
1893     {
1894     int rA, rD, rB;
1895     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
1896     uint16 r;
1897     // FIXME: rA != 0
1898     int ret = ppc_read_effective_half((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
1899     if (ret == PPC_MMU_OK) {
1900     gCPU.gpr[rD] = (r&0x8000) ? (r|0xffff0000):r;
1901     }
1902     }
1903     JITCFlow ppc_opc_gen_lhax()
1904     {
1905     int rA, rD, rB;
1906     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
1907     ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
1908     asmCALL((NativeAddress)ppc_read_effective_half_s_asm);
1909     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1910     return flowContinue;
1911     }
1912     /*
1913     * lhbrx Load Half Word Byte-Reverse Indexed
1914     * .542
1915     */
1916     void ppc_opc_lhbrx()
1917     {
1918     int rA, rD, rB;
1919     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
1920     uint16 r;
1921     int ret = ppc_read_effective_half((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
1922     if (ret == PPC_MMU_OK) {
1923     gCPU.gpr[rD] = ppc_bswap_half(r);
1924     }
1925     }
1926     JITCFlow ppc_opc_gen_lhbrx()
1927     {
1928     int rA, rD, rB;
1929     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
1930     ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
1931     asmCALL((NativeAddress)ppc_read_effective_half_z_asm);
1932     asmALURegReg8(X86_XCHG, DL, DH);
1933     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1934     return flowContinue;
1935     }
1936     /*
1937     * lhz Load Half Word and Zero
1938     * .543
1939     */
1940     void ppc_opc_lhz()
1941     {
1942     int rA, rD;
1943     uint32 imm;
1944     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
1945     uint16 r;
1946     int ret = ppc_read_effective_half((rA?gCPU.gpr[rA]:0)+imm, r);
1947     if (ret == PPC_MMU_OK) {
1948     gCPU.gpr[rD] = r;
1949     }
1950     }
1951     JITCFlow ppc_opc_gen_lhz()
1952     {
1953     int rA, rD;
1954     uint32 imm;
1955     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
1956     ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
1957     asmCALL((NativeAddress)ppc_read_effective_half_z_asm);
1958     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1959     return flowContinue;
1960     }
1961     /*
1962     * lhzu Load Half Word and Zero with Update
1963     * .544
1964     */
1965     void ppc_opc_lhzu()
1966     {
1967     int rA, rD;
1968     uint32 imm;
1969     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
1970     uint16 r;
1971     // FIXME: rA!=0
1972     int ret = ppc_read_effective_half(gCPU.gpr[rA]+imm, r);
1973     if (ret == PPC_MMU_OK) {
1974     gCPU.gpr[rD] = r;
1975     gCPU.gpr[rA] += imm;
1976     }
1977     }
1978     JITCFlow ppc_opc_gen_lhzu()
1979     {
1980     int rA, rD;
1981     uint32 imm;
1982     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
1983     ppc_opc_gen_helper_lu(PPC_GPR(rA), imm);
1984     asmCALL((NativeAddress)ppc_read_effective_half_z_asm);
1985     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1986     if (imm) {
1987     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1988     asmALURegImm(X86_ADD, a, imm);
1989     }
1990     return flowContinue;
1991     }
1992     /*
1993     * lhzux Load Half Word and Zero with Update Indexed
1994     * .545
1995     */
1996     void ppc_opc_lhzux()
1997     {
1998     int rA, rD, rB;
1999     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2000     uint16 r;
2001     // FIXME: rA != 0
2002     int ret = ppc_read_effective_half(gCPU.gpr[rA]+gCPU.gpr[rB], r);
2003     if (ret == PPC_MMU_OK) {
2004     gCPU.gpr[rA] += gCPU.gpr[rB];
2005     gCPU.gpr[rD] = r;
2006     }
2007     }
2008     JITCFlow ppc_opc_gen_lhzux()
2009     {
2010     int rA, rD, rB;
2011     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2012     ppc_opc_gen_helper_lux(PPC_GPR(rA), PPC_GPR(rB));
2013     asmCALL((NativeAddress)ppc_read_effective_half_z_asm);
2014     if (rD == rB) {
2015     byte modrm[6];
2016     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA), NATIVE_REG | EAX);
2017     asmALURegMem(X86_ADD, a, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
2018     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2019     } else {
2020     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2021     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
2022     NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
2023     asmALURegReg(X86_ADD, a, b);
2024     }
2025     return flowContinue;
2026     }
2027     /*
2028     * lhzx Load Half Word and Zero Indexed
2029     * .546
2030     */
2031     void ppc_opc_lhzx()
2032     {
2033     int rA, rD, rB;
2034     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2035     uint16 r;
2036     int ret = ppc_read_effective_half((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
2037     if (ret == PPC_MMU_OK) {
2038     gCPU.gpr[rD] = r;
2039     }
2040     }
2041     JITCFlow ppc_opc_gen_lhzx()
2042     {
2043     int rA, rD, rB;
2044     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2045     ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
2046     asmCALL((NativeAddress)ppc_read_effective_half_z_asm);
2047     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2048     return flowContinue;
2049     }
2050     /*
2051     * lmw Load Multiple Word
2052     * .547
2053     */
2054     void ppc_opc_lmw()
2055     {
2056     int rD, rA;
2057     uint32 imm;
2058     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
2059     uint32 ea = (rA ? gCPU.gpr[rA] : 0) + imm;
2060     while (rD <= 31) {
2061     if (ppc_read_effective_word(ea, gCPU.gpr[rD])) {
2062     return;
2063     }
2064     rD++;
2065     ea += 4;
2066     }
2067     }
2068     JITCFlow ppc_opc_gen_lmw()
2069     {
2070     int rD, rA;
2071     uint32 imm;
2072     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
2073     while (rD <= 30) {
2074     ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
2075     asmCALL((NativeAddress)ppc_read_effective_dword_asm);
2076     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | ECX);
2077     jitcMapClientRegisterDirty(PPC_GPR(rD+1), NATIVE_REG | EDX);
2078     rD += 2;
2079     imm += 8;
2080     }
2081     if (rD == 31) {
2082     ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
2083     asmCALL((NativeAddress)ppc_read_effective_word_asm);
2084     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2085     }
2086     return flowContinue;
2087     }
2088     /*
2089     * lswi Load String Word Immediate
2090     * .548
2091     */
2092     void ppc_opc_lswi()
2093     {
2094     int rA, rD, NB;
2095     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, NB);
2096     if (NB==0) NB=32;
2097     uint32 ea = rA ? gCPU.gpr[rA] : 0;
2098     uint32 r = 0;
2099     int i = 4;
2100     uint8 v;
2101     while (NB > 0) {
2102     if (!i) {
2103     i = 4;
2104     gCPU.gpr[rD] = r;
2105     rD++;
2106     rD%=32;
2107     r = 0;
2108     }
2109     if (ppc_read_effective_byte(ea, v)) {
2110     return;
2111     }
2112     r<<=8;
2113     r|=v;
2114     ea++;
2115     i--;
2116     NB--;
2117     }
2118     while (i) { r<<=8; i--; }
2119     gCPU.gpr[rD] = r;
2120     }
2121     JITCFlow ppc_opc_gen_lswi()
2122     {
2123     int rA, rD, NB;
2124     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, NB);
2125     if (NB==0) NB=32;
2126     jitcClobberCarryAndFlags();
2127     jitcFlushRegister();
2128     if (rA) {
2129     jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
2130     } else {
2131     asmALURegImm(X86_MOV, EAX, 0);
2132     }
2133     asmALURegImm(X86_MOV, ECX, NB);
2134     asmALURegImm(X86_MOV, EBX, rD);
2135     asmALURegImm(X86_MOV, ESI, gJITC.pc);
2136     jitcClobberAll();
2137     asmCALL((NativeAddress)ppc_opc_lswi_asm);
2138     return flowEndBlock;
2139     }
2140     /*
2141     * lswx Load String Word Indexed
2142     * .550
2143     */
2144     void ppc_opc_lswx()
2145     {
2146     int rA, rD, rB;
2147     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2148     int NB = XER_n(gCPU.xer);
2149     uint32 ea = gCPU.gpr[rB] + (rA ? gCPU.gpr[rA] : 0);
2150    
2151     uint32 r = 0;
2152     int i = 4;
2153     uint8 v;
2154     while (NB > 0) {
2155     if (!i) {
2156     i = 4;
2157     gCPU.gpr[rD] = r;
2158     rD++;
2159     rD%=32;
2160     r = 0;
2161     }
2162     if (ppc_read_effective_byte(ea, v)) {
2163     return;
2164     }
2165     r<<=8;
2166     r|=v;
2167     ea++;
2168     i--;
2169     NB--;
2170     }
2171     while (i) { r<<=8; i--; }
2172     gCPU.gpr[rD] = r;
2173     }
2174     JITCFlow ppc_opc_gen_lswx()
2175     {
2176     int rA, rD, rB;
2177     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2178     jitcClobberCarryAndFlags();
2179     jitcFlushRegister();
2180     jitcGetClientRegister(PPC_XER, NATIVE_REG | ECX);
2181     if (rA) {
2182     byte modrm[6];
2183     jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
2184     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
2185     } else {
2186     jitcGetClientRegister(PPC_GPR(rB), NATIVE_REG | EAX);
2187     }
2188     asmALURegImm(X86_AND, ECX, 0x7f);
2189     jitcClobberAll();
2190     NativeAddress fixup = asmJxxFixup(X86_Z);
2191     asmALURegImm(X86_MOV, EBX, rD);
2192     asmALURegImm(X86_MOV, ESI, gJITC.pc);
2193     asmCALL((NativeAddress)ppc_opc_lswi_asm);
2194     asmResolveFixup(fixup, asmHERE());
2195     return flowEndBlock;
2196     }
2197     /*
2198     * lwarx Load Word and Reserve Indexed
2199     * .553
2200     */
2201     void ppc_opc_lwarx()
2202     {
2203     int rA, rD, rB;
2204     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2205     uint32 r;
2206     int ret = ppc_read_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
2207     if (ret == PPC_MMU_OK) {
2208     gCPU.gpr[rD] = r;
2209     gCPU.reserve = r;
2210     gCPU.have_reservation = 1;
2211     }
2212     }
2213     JITCFlow ppc_opc_gen_lwarx()
2214     {
2215     int rA, rD, rB;
2216     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2217     ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
2218     asmCALL((NativeAddress)ppc_read_effective_word_asm);
2219     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2220     byte modrm[6];
2221     asmALUMemReg(X86_MOV, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.reserve), EDX);
2222     asmALUMemImm8(X86_MOV, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.have_reservation), 1);
2223     return flowContinue;
2224     }
2225     /*
2226     * lwbrx Load Word Byte-Reverse Indexed
2227     * .556
2228     */
2229     void ppc_opc_lwbrx()
2230     {
2231     int rA, rD, rB;
2232     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2233     uint32 r;
2234     int ret = ppc_read_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
2235     if (ret == PPC_MMU_OK) {
2236     gCPU.gpr[rD] = ppc_bswap_word(r);
2237     }
2238     }
2239     JITCFlow ppc_opc_gen_lwbrx()
2240     {
2241     int rA, rD, rB;
2242     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2243     ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
2244     asmCALL((NativeAddress)ppc_read_effective_word_asm);
2245     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2246     asmBSWAP(EDX);
2247     return flowContinue;
2248     }
2249     /*
2250     * lwz Load Word and Zero
2251     * .557
2252     */
2253     void ppc_opc_lwz()
2254     {
2255     int rA, rD;
2256     uint32 imm;
2257     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
2258     uint32 r;
2259     int ret = ppc_read_effective_word((rA?gCPU.gpr[rA]:0)+imm, r);
2260     if (ret == PPC_MMU_OK) {
2261     gCPU.gpr[rD] = r;
2262     }
2263     }
2264     JITCFlow ppc_opc_gen_lwz()
2265     {
2266     int rA, rD;
2267     uint32 imm;
2268     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
2269     ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
2270     asmCALL((NativeAddress)ppc_read_effective_word_asm);
2271     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2272     return flowContinue;
2273     }
2274     /*
2275     * lbzu Load Word and Zero with Update
2276     * .558
2277     */
2278     void ppc_opc_lwzu()
2279     {
2280     int rA, rD;
2281     uint32 imm;
2282     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
2283     // FIXME: check rA!=0 && rA!=rD
2284     uint32 r;
2285     int ret = ppc_read_effective_word(gCPU.gpr[rA]+imm, r);
2286     if (ret == PPC_MMU_OK) {
2287     gCPU.gpr[rA] += imm;
2288     gCPU.gpr[rD] = r;
2289     }
2290     }
2291     JITCFlow ppc_opc_gen_lwzu()
2292     {
2293     int rA, rD;
2294     uint32 imm;
2295     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
2296     ppc_opc_gen_helper_lu(PPC_GPR(rA), imm);
2297     asmCALL((NativeAddress)ppc_read_effective_word_asm);
2298     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2299     if (imm) {
2300     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
2301     asmALURegImm(X86_ADD, a, imm);
2302     }
2303     return flowContinue;
2304     }
2305     /*
2306     * lwzux Load Word and Zero with Update Indexed
2307     * .559
2308     */
2309     void ppc_opc_lwzux()
2310     {
2311     int rA, rD, rB;
2312     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2313     // FIXME: check rA!=0 && rA!=rD
2314     uint32 r;
2315     int ret = ppc_read_effective_word(gCPU.gpr[rA]+gCPU.gpr[rB], r);
2316     if (ret == PPC_MMU_OK) {
2317     gCPU.gpr[rA] += gCPU.gpr[rB];
2318     gCPU.gpr[rD] = r;
2319     }
2320     }
2321     JITCFlow ppc_opc_gen_lwzux()
2322     {
2323     int rA, rD, rB;
2324     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2325     ppc_opc_gen_helper_lux(PPC_GPR(rA), PPC_GPR(rB));
2326     asmCALL((NativeAddress)ppc_read_effective_word_asm);
2327     if (rD == rB) {
2328     // don't ask...
2329     byte modrm[6];
2330     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA), NATIVE_REG | EAX);
2331     asmALURegMem(X86_ADD, a, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
2332     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2333     } else {
2334     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2335     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
2336     NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
2337     asmALURegReg(X86_ADD, a, b);
2338     }
2339     return flowContinue;
2340     }
2341     /*
2342     * lwzx Load Word and Zero Indexed
2343     * .560
2344     */
2345     void ppc_opc_lwzx()
2346     {
2347     int rA, rD, rB;
2348     PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2349     uint32 r;
2350     int ret = ppc_read_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
2351     if (ret == PPC_MMU_OK) {
2352     gCPU.gpr[rD] = r;
2353     }
2354     }
2355     JITCFlow ppc_opc_gen_lwzx()
2356     {
2357     int rA, rD, rB;
2358     PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2359     ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
2360     asmCALL((NativeAddress)ppc_read_effective_word_asm);
2361     jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2362     return flowContinue;
2363     }
2364    
2365     static inline NativeReg FASTCALL ppc_opc_gen_helper_lvx_hint(int rA, int rB, int hint)
2366     {
2367     NativeReg ret = REG_NO;
2368     byte modrm[6];
2369    
2370     NativeReg reg1 = jitcGetClientRegisterMapping(PPC_GPR(rA));
2371     NativeReg reg2 = jitcGetClientRegisterMapping(PPC_GPR(rB));
2372    
2373     if (reg1 == hint) {
2374     jitcClobberCarryAndFlags();
2375     jitcClobberRegister(NATIVE_REG | reg1);
2376     ret = reg1;
2377     jitcTouchRegister(ret);
2378    
2379     if (reg2 != REG_NO) {
2380     asmALURegReg(X86_ADD, ret, reg2);
2381     } else {
2382     asmALURegMem(X86_ADD, ret, modrm,
2383     x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
2384     }
2385     } else if (reg2 == hint) {
2386     jitcClobberCarryAndFlags();
2387     jitcClobberRegister(NATIVE_REG | reg2);
2388     ret = reg2;
2389     jitcTouchRegister(ret);
2390    
2391     if (reg1 != REG_NO) {
2392     asmALURegReg(X86_ADD, ret, reg1);
2393     } else {
2394     asmALURegMem(X86_ADD, ret, modrm,
2395     x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rA]));
2396     }
2397     } else if ((reg1 != REG_NO) && (reg2 != REG_NO)) {
2398     /* If both are in register space, and not the hint we're best
2399     * off clobbering the hint, then using leal as a 3-operand
2400     * ADD.
2401     * This gives us the performance of an ADD, and removes the
2402     * need for a later MOV into the hint.
2403     */
2404     jitcClobberRegister(NATIVE_REG | hint);
2405     ret = (NativeReg)hint;
2406     jitcTouchRegister(ret);
2407    
2408     asmLEA(ret, modrm, x86_mem_sib(modrm, reg1, 1, reg2, 0));
2409     }
2410    
2411     return ret;
2412     }
2413    
2414     static inline NativeReg FASTCALL ppc_opc_gen_helper_lvx(int rA, int rB, int hint=0)
2415     {
2416     NativeReg ret = REG_NO;
2417     byte modrm[6];
2418    
2419     if (!rA) {
2420     ret = jitcGetClientRegisterMapping(PPC_GPR(rB));
2421    
2422     if (ret == REG_NO) {
2423     ret = jitcGetClientRegister(PPC_GPR(rB), hint);
2424     }
2425    
2426     jitcClobberRegister(NATIVE_REG | ret);
2427     jitcTouchRegister(ret);
2428    
2429     return ret;
2430     }
2431    
2432     if (hint & NATIVE_REG) {
2433     ret = ppc_opc_gen_helper_lvx_hint(rA, rB, hint & 0x0f);
2434    
2435     if (ret != REG_NO)
2436     return ret;
2437     }
2438    
2439     jitcClobberCarryAndFlags();
2440    
2441     NativeReg reg1 = jitcGetClientRegisterMapping(PPC_GPR(rA));
2442     NativeReg reg2 = jitcGetClientRegisterMapping(PPC_GPR(rB));
2443    
2444     if (reg2 == REG_NO) {
2445     ret = jitcGetClientRegister(PPC_GPR(rA));
2446     jitcClobberRegister(NATIVE_REG | ret);
2447    
2448     asmALURegMem(X86_ADD, ret, modrm,
2449     x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
2450     } else {
2451     jitcClobberRegister(NATIVE_REG | reg2);
2452     ret = reg2;
2453    
2454     if (reg1 != REG_NO) {
2455     asmALURegReg(X86_ADD, ret, reg1);
2456     } else {
2457     asmALURegMem(X86_ADD, ret, modrm,
2458     x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rA]));
2459     }
2460     }
2461    
2462     jitcTouchRegister(ret);
2463     return ret;
2464     }
2465    
2466     /* lvx Load Vector Indexed
2467     * v.127
2468     */
2469     void ppc_opc_lvx()
2470     {
2471     #ifndef __VEC_EXC_OFF__
2472     if ((gCPU.msr & MSR_VEC) == 0) {
2473     ppc_exception(PPC_EXC_NO_VEC);
2474     return;
2475     }
2476     #endif
2477     VECTOR_DEBUG;
2478     int rA, vrD, rB;
2479     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, rA, rB);
2480     Vector_t r;
2481    
2482     int ea = ((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB]);
2483    
2484     int ret = ppc_read_effective_qword(ea, r);
2485     if (ret == PPC_MMU_OK) {
2486     gCPU.vr[vrD] = r;
2487     }
2488     }
2489     JITCFlow ppc_opc_gen_lvx()
2490     {
2491     ppc_opc_gen_check_vec();
2492     int rA, vrD, rB;
2493     PPC_OPC_TEMPL_X(gJITC.current_opc, vrD, rA, rB);
2494     jitcDropClientVectorRegister(vrD);
2495     jitcAssertFlushedVectorRegister(vrD);
2496    
2497     jitcClobberCarryAndFlags();
2498     NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB, NATIVE_REG | EAX);
2499     #if 1
2500     jitcClobberAll();
2501     if (regA != EAX) {
2502     //printf("*** hint miss r%u != r0\n", regA);
2503     asmALURegReg(X86_MOV, EAX, regA);
2504     }
2505     asmALURegImm(X86_MOV, ESI, gJITC.pc);
2506     asmALURegImm(X86_MOV, EDX, (uint32)&(gCPU.vr[vrD]));
2507    
2508     if (0 && gJITC.hostCPUCaps.sse) {
2509     asmCALL((NativeAddress)ppc_read_effective_qword_sse_asm);
2510     gJITC.nativeVectorReg = vrD;
2511     } else {
2512     asmCALL((NativeAddress)ppc_read_effective_qword_asm);
2513     }
2514     #else
2515     asmALURegImm(X86_AND, regA, ~0x0f);
2516     asmMOVDMemReg((uint32)&gCPU.vtemp, regA);
2517    
2518     jitcClobberAll();
2519     if (regA != EAX) {
2520     //printf("*** hint miss r%u != r0\n", regA);
2521     asmALURegReg(X86_MOV, EAX, regA);
2522     }
2523     asmALURegImm(X86_MOV, ESI, gJITC.pc);
2524    
2525     asmCALL((NativeAddress)ppc_read_effective_dword_asm);
2526     asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[3]), ECX);
2527     asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[2]), EDX);
2528    
2529     asmMOVRegDMem(EAX, (uint32)&gCPU.vtemp);
2530     asmALURegImm(X86_OR, EAX, 8);
2531    
2532     asmCALL((NativeAddress)ppc_read_effective_dword_asm);
2533     asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[1]), ECX);
2534     asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[0]), EDX);
2535     #endif
2536    
2537     return flowContinue;
2538     }
2539    
2540    
2541     /* lvxl Load Vector Index LRU
2542     * v.128
2543     */
2544     void ppc_opc_lvxl()
2545     {
2546     ppc_opc_lvx();
2547     /* This instruction should hint to the cache that the value won't be
2548     * needed again in memory anytime soon. We don't emulate the cache,
2549     * so this is effectively exactly the same as lvx.
2550     */
2551     }
2552     JITCFlow ppc_opc_gen_lvxl()
2553     {
2554     return ppc_opc_gen_lvx();
2555     }
2556    
2557    
2558     /* lvebx Load Vector Element Byte Indexed
2559     * v.119
2560     */
2561     void ppc_opc_lvebx()
2562     {
2563     #ifndef __VEC_EXC_OFF__
2564     if ((gCPU.msr & MSR_VEC) == 0) {
2565     ppc_exception(PPC_EXC_NO_VEC);
2566     return;
2567     }
2568     #endif
2569     VECTOR_DEBUG;
2570     int rA, vrD, rB;
2571     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, rA, rB);
2572     uint32 ea;
2573     uint8 r;
2574     ea = (rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB];
2575     int ret = ppc_read_effective_byte(ea, r);
2576     if (ret == PPC_MMU_OK) {
2577     VECT_B(gCPU.vr[vrD], ea & 0x0f) = r;
2578     }
2579     }
2580     JITCFlow ppc_opc_gen_lvebx()
2581     {
2582     ppc_opc_gen_check_vec();
2583     int rA, vrD, rB;
2584     byte modrm[6];
2585     PPC_OPC_TEMPL_X(gJITC.current_opc, vrD, rA, rB);
2586     jitcDropClientVectorRegister(vrD);
2587     jitcAssertFlushedVectorRegister(vrD);
2588    
2589     if (vrD == gJITC.nativeVectorReg) {
2590     gJITC.nativeVectorReg = VECTREG_NO;
2591     }
2592    
2593     jitcClobberCarryAndFlags();
2594     NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB, NATIVE_REG | EAX);
2595     asmMOVDMemReg((uint32)&gCPU.vtemp, regA);
2596    
2597     jitcClobberAll();
2598     if (regA != EAX) asmALURegReg(X86_MOV, EAX, regA);
2599     asmALURegImm(X86_MOV, ESI, gJITC.pc);
2600    
2601     asmCALL((NativeAddress)ppc_read_effective_byte_asm);
2602     asmMOVRegDMem(EAX, (uint32)&gCPU.vtemp);
2603     asmALURegImm(X86_AND, EAX, 0x0f);
2604     asmALUReg(X86_NOT, EAX);
2605    
2606     asmALUMemReg8(X86_MOV, modrm,
2607     x86_mem(modrm, EAX, ((uint32)&gCPU.vr[vrD])+16), DL);
2608    
2609     return flowContinue;
2610     }
2611    
2612     /* lvehx Load Vector Element Half Word Indexed
2613     * v.121
2614     */
2615     void ppc_opc_lvehx()
2616     {
2617     #ifndef __VEC_EXC_OFF__
2618     if ((gCPU.msr & MSR_VEC) == 0) {
2619     ppc_exception(PPC_EXC_NO_VEC);
2620     return;
2621     }
2622     #endif
2623     VECTOR_DEBUG;
2624     int rA, vrD, rB;
2625     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, rA, rB);
2626     uint32 ea;
2627     uint16 r;
2628     ea = ((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB]) & ~1;
2629     int ret = ppc_read_effective_half(ea, r);
2630     if (ret == PPC_MMU_OK) {
2631     VECT_H(gCPU.vr[vrD], (ea & 0x0f) >> 1) = r;
2632     }
2633     }
2634     JITCFlow ppc_opc_gen_lvehx()
2635     {
2636     ppc_opc_gen_check_vec();
2637     int rA, vrD, rB;
2638     byte modrm[6];
2639     PPC_OPC_TEMPL_X(gJITC.current_opc, vrD, rA, rB);
2640     jitcDropClientVectorRegister(vrD);
2641     jitcAssertFlushedVectorRegister(vrD);
2642    
2643     if (vrD == gJITC.nativeVectorReg) {
2644     gJITC.nativeVectorReg = VECTREG_NO;
2645     }
2646    
2647     jitcClobberCarryAndFlags();
2648     NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB, NATIVE_REG | EAX);
2649     asmMOVDMemReg((uint32)&gCPU.vtemp, regA);
2650     asmALURegImm(X86_AND, regA, ~0x01);
2651    
2652     jitcClobberAll();
2653     if (regA != EAX) asmALURegReg(X86_MOV, EAX, regA);
2654     asmALURegImm(X86_MOV, ESI, gJITC.pc);
2655    
2656     asmCALL((NativeAddress)ppc_read_effective_half_z_asm);
2657     asmMOVRegDMem(EAX, (uint32)&gCPU.vtemp);
2658     asmALURegImm(X86_AND, EAX, 0x0e);
2659     asmALUReg(X86_NOT, EAX);
2660    
2661     asmALUMemReg8(X86_MOV, modrm,
2662     x86_mem(modrm, EAX, ((uint32)&gCPU.vr[vrD])+15), DL);
2663     asmALUMemReg8(X86_MOV, modrm,
2664     x86_mem(modrm, EAX, ((uint32)&gCPU.vr[vrD])+16), DH);
2665    
2666     return flowContinue;
2667     }
2668    
2669     /* lvewx Load Vector Element Word Indexed
2670     * v.122
2671     */
2672     void ppc_opc_lvewx()
2673     {
2674     #ifndef __VEC_EXC_OFF__
2675     if ((gCPU.msr & MSR_VEC) == 0) {
2676     ppc_exception(PPC_EXC_NO_VEC);
2677     return;
2678     }
2679     #endif
2680     VECTOR_DEBUG;
2681     int rA, vrD, rB;
2682     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, rA, rB);
2683     uint32 ea;
2684     uint32 r;
2685     ea = ((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB]) & ~3;
2686     int ret = ppc_read_effective_word(ea, r);
2687     if (ret == PPC_MMU_OK) {
2688     VECT_W(gCPU.vr[vrD], (ea & 0xf) >> 2) = r;
2689     }
2690     }
2691     JITCFlow ppc_opc_gen_lvewx()
2692     {
2693     ppc_opc_gen_check_vec();
2694     int rA, vrD, rB;
2695     byte modrm[6];
2696     PPC_OPC_TEMPL_X(gJITC.current_opc, vrD, rA, rB);
2697     jitcDropClientVectorRegister(vrD);
2698     jitcAssertFlushedVectorRegister(vrD);
2699    
2700     if (vrD == gJITC.nativeVectorReg) {
2701     gJITC.nativeVectorReg = VECTREG_NO;
2702     }
2703    
2704     jitcClobberCarryAndFlags();
2705     NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB, NATIVE_REG | EAX);
2706     asmMOVDMemReg((uint32)&gCPU.vtemp, regA);
2707     asmALURegImm(X86_AND, regA, ~0x03);
2708    
2709     jitcClobberAll();
2710     if (regA != EAX) asmALURegReg(X86_MOV, EAX, regA);
2711     asmALURegImm(X86_MOV, ESI, gJITC.pc);
2712    
2713     asmCALL((NativeAddress)ppc_read_effective_word_asm);
2714     asmMOVRegDMem(EAX, (uint32)&gCPU.vtemp);
2715     asmALURegImm(X86_AND, EAX, 0x0c);
2716     asmALUReg(X86_NOT, EAX);
2717    
2718     asmALUMemReg(X86_MOV, modrm,
2719     x86_mem(modrm, EAX, ((uint32)&gCPU.vr[vrD])+13), EDX);
2720     return flowContinue;
2721     }
2722    
2723     const static byte lvsl_helper[] = {
2724     #if HOST_ENDIANESS == HOST_ENDIANESS_LE
2725     0x1F, 0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18,
2726     0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
2727     0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08,
2728     0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
2729     #elif HOST_ENDIANESS == HOST_ENDIANESS_BE
2730     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
2731     0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
2732     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
2733     0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
2734     #else
2735     #error Endianess not supported!
2736     #endif
2737     };
2738    
2739     const static uint32 lvsl_helper_full[16*4] = {
2740     VECT_BUILD(0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F),
2741     VECT_BUILD(0x01020304, 0x05060708, 0x090A0B0C, 0x0D0E0F10),
2742     VECT_BUILD(0x02030405, 0x06070809, 0x0A0B0C0D, 0x0E0F1011),
2743     VECT_BUILD(0x03040506, 0x0708090A, 0x0B0C0D0E, 0x0F101112),
2744     VECT_BUILD(0x04050607, 0x08090A0B, 0x0C0D0E0F, 0x10111213),
2745     VECT_BUILD(0x05060708, 0x090A0B0C, 0x0D0E0F10, 0x11121314),
2746     VECT_BUILD(0x06070809, 0x0A0B0C0D, 0x0E0F1011, 0x12131415),
2747     VECT_BUILD(0x0708090A, 0x0B0C0D0E, 0x0F101112, 0x13141516),
2748     VECT_BUILD(0x08090A0B, 0x0C0D0E0F, 0x10111213, 0x14151617),
2749     VECT_BUILD(0x090A0B0C, 0x0D0E0F10, 0x11121314, 0x15161718),
2750     VECT_BUILD(0x0A0B0C0D, 0x0E0F1011, 0x12131415, 0x16171819),
2751     VECT_BUILD(0x0B0C0D0E, 0x0F101112, 0x13141516, 0x1718191A),
2752     VECT_BUILD(0x0C0D0E0F, 0x10111213, 0x14151617, 0x18191A1B),
2753     VECT_BUILD(0x0D0E0F10, 0x11121314, 0x15161718, 0x191A1B1C),
2754     VECT_BUILD(0x0E0F1011, 0x12131415, 0x16171819, 0x1A1B1C1D),
2755     VECT_BUILD(0x0F101112, 0x13141516, 0x1718191A, 0x1B1C1D1E),
2756     };
2757    
2758     const static uint32 lvsr_helper_full[16*4] = {
2759     VECT_BUILD(0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F),
2760     VECT_BUILD(0x0F101112, 0x13141516, 0x1718191A, 0x1B1C1D1E),
2761     VECT_BUILD(0x0E0F1011, 0x12131415, 0x16171819, 0x1A1B1C1D),
2762     VECT_BUILD(0x0D0E0F10, 0x11121314, 0x15161718, 0x191A1B1C),
2763     VECT_BUILD(0x0C0D0E0F, 0x10111213, 0x14151617, 0x18191A1B),
2764     VECT_BUILD(0x0B0C0D0E, 0x0F101112, 0x13141516, 0x1718191A),
2765     VECT_BUILD(0x0A0B0C0D, 0x0E0F1011, 0x12131415, 0x16171819),
2766     VECT_BUILD(0x090A0B0C, 0x0D0E0F10, 0x11121314, 0x15161718),
2767     VECT_BUILD(0x08090A0B, 0x0C0D0E0F, 0x10111213, 0x14151617),
2768     VECT_BUILD(0x0708090A, 0x0B0C0D0E, 0x0F101112, 0x13141516),
2769     VECT_BUILD(0x06070809, 0x0A0B0C0D, 0x0E0F1011, 0x12131415),
2770     VECT_BUILD(0x05060708, 0x090A0B0C, 0x0D0E0F10, 0x11121314),
2771     VECT_BUILD(0x04050607, 0x08090A0B, 0x0C0D0E0F, 0x10111213),
2772     VECT_BUILD(0x03040506, 0x0708090A, 0x0B0C0D0E, 0x0F101112),
2773     VECT_BUILD(0x02030405, 0x06070809, 0x0A0B0C0D, 0x0E0F1011),
2774     VECT_BUILD(0x01020304, 0x05060708, 0x090A0B0C, 0x0D0E0F10),
2775     };
2776    
2777     /*
2778     * lvsl Load Vector for Shift Left
2779     * v.123
2780     */
2781     void ppc_opc_lvsl()
2782     {
2783     #ifndef __VEC_EXC_OFF__
2784     if ((gCPU.msr & MSR_VEC) == 0) {
2785     ppc_exception(PPC_EXC_NO_VEC);
2786     return;
2787     }
2788     #endif
2789     VECTOR_DEBUG;
2790     int rA, vrD, rB;
2791     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, rA, rB);
2792     uint32 ea;
2793     ea = ((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB]);
2794     #if HOST_ENDIANESS == HOST_ENDIANESS_LE
2795     memmove(&gCPU.vr[vrD], lvsl_helper+0x10-(ea & 0xf), 16);
2796     #elif HOST_ENDIANESS == HOST_ENDIANESS_BE
2797     memmove(&gCPU.vr[vrD], lvsl_helper+(ea & 0xf), 16);
2798     #else
2799     #error Endianess not supported!
2800     #endif
2801     }
2802     JITCFlow ppc_opc_gen_lvsl()
2803     {
2804     ppc_opc_gen_check_vec();
2805     int rA, vrD, rB;
2806     byte modrm[6];
2807     PPC_OPC_TEMPL_X(gJITC.current_opc, vrD, rA, rB);
2808    
2809     if (vrD == gJITC.nativeVectorReg) {
2810     gJITC.nativeVectorReg = VECTREG_NO;
2811     }
2812    
2813     jitcClobberCarryAndFlags();
2814     NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB);
2815     asmALURegImm(X86_AND, regA, 0x0f);
2816    
2817     if (gJITC.hostCPUCaps.sse) {
2818     asmShiftRegImm(X86_SHL, regA, 4);
2819    
2820     NativeVectorReg reg1 = jitcMapClientVectorRegisterDirty(vrD);
2821    
2822     asmALUPS(X86_MOVAPS, reg1,
2823     x86_mem2(modrm, regA, (uint32)&lvsl_helper_full));
2824     } else {
2825     asmALUReg(X86_NOT, regA);
2826     jitcDropClientVectorRegister(vrD);
2827    
2828     NativeReg reg1 = jitcAllocRegister();
2829     NativeReg reg2 = jitcAllocRegister();
2830     NativeReg reg3 = jitcAllocRegister();
2831    
2832     asmALURegMem(X86_MOV, reg1, modrm,
2833     x86_mem(modrm, regA, (uint32)&lvsl_helper+0x11));
2834     asmALURegMem(X86_MOV, reg2, modrm,
2835     x86_mem(modrm, regA, (uint32)&lvsl_helper+0x15));
2836     asmALURegMem(X86_MOV, reg3, modrm,
2837     x86_mem(modrm, regA, (uint32)&lvsl_helper+0x19));
2838     asmALURegMem(X86_MOV, regA, modrm,
2839     x86_mem(modrm, regA, (uint32)&lvsl_helper+0x1d));
2840    
2841     asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[0]), reg1);
2842     asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[1]), reg2);
2843     asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[2]), reg3);
2844     asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[3]), regA);
2845     }
2846    
2847     return flowContinue;
2848     }
2849    
2850     /*
2851     * lvsr Load Vector for Shift Right
2852     * v.125
2853     */
2854     void ppc_opc_lvsr()
2855     {
2856     #ifndef __VEC_EXC_OFF__
2857     if ((gCPU.msr & MSR_VEC) == 0) {
2858     ppc_exception(PPC_EXC_NO_VEC);
2859     return;
2860     }
2861     #endif
2862     VECTOR_DEBUG;
2863     int rA, vrD, rB;
2864     PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, rA, rB);
2865     uint32 ea;
2866     ea = ((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB]);
2867     #if HOST_ENDIANESS == HOST_ENDIANESS_LE
2868     memmove(&gCPU.vr[vrD], lvsl_helper+(ea & 0xf), 16);
2869     #elif HOST_ENDIANESS == HOST_ENDIANESS_BE
2870     memmove(&gCPU.vr[vrD], lvsl_helper+0x10-(ea & 0xf), 16);
2871     #else
2872     #error Endianess not supported!
2873     #endif
2874     }
2875     JITCFlow ppc_opc_gen_lvsr()
2876     {
2877     ppc_opc_gen_check_vec();
2878     int rA, vrD, rB;
2879     byte modrm[6];
2880     PPC_OPC_TEMPL_X(gJITC.current_opc, vrD, rA, rB);
2881    
2882     if (vrD == gJITC.nativeVectorReg) {
2883     gJITC.nativeVectorReg = VECTREG_NO;
2884     }
2885    
2886     jitcClobberCarryAndFlags();
2887     NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB);
2888     asmALURegImm(X86_AND, regA, 0x0f);
2889    
2890     if (gJITC.hostCPUCaps.sse) {
2891     asmShiftRegImm(X86_SHL, regA, 4);
2892    
2893     NativeVectorReg reg1 = jitcMapClientVectorRegisterDirty(vrD);
2894    
2895     asmALUPS(X86_MOVAPS, reg1,
2896     x86_mem2(modrm, regA, (uint32)&lvsr_helper_full));
2897     } else {
2898     jitcDropClientVectorRegister(vrD);
2899     jitcAssertFlushedVectorRegister(vrD);
2900    
2901     NativeReg reg1 = jitcAllocRegister();
2902     NativeReg reg2 = jitcAllocRegister();
2903     NativeReg reg3 = jitcAllocRegister();
2904    
2905     asmALURegMem(X86_MOV, reg1, modrm,
2906     x86_mem(modrm, regA, (uint32)&lvsl_helper));
2907     asmALURegMem(X86_MOV, reg2, modrm,
2908     x86_mem(modrm, regA, (uint32)&lvsl_helper+4));
2909     asmALURegMem(X86_MOV, reg3, modrm,
2910     x86_mem(modrm, regA, (uint32)&lvsl_helper+8));
2911     asmALURegMem(X86_MOV, regA, modrm,
2912     x86_mem(modrm, regA, (uint32)&lvsl_helper+12));
2913    
2914     asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[0]), reg1);
2915     asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[1]), reg2);
2916     asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[2]), reg3);
2917     asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[3]), regA);
2918     }
2919    
2920     return flowContinue;
2921     }
2922    
2923     /*
2924     * dst Data Stream Touch
2925     * v.115
2926     */
2927     void ppc_opc_dst()
2928     {
2929     VECTOR_DEBUG;
2930     /* Since we are not emulating the cache, this is a nop */
2931     }
2932     JITCFlow ppc_opc_gen_dst()
2933     {
2934     /* Since we are not emulating the cache, this is a nop */
2935     return flowContinue;
2936     }
2937    
2938     /*
2939     * stb Store Byte
2940     * .632
2941     */
2942     void ppc_opc_stb()
2943     {
2944     int rA, rS;
2945     uint32 imm;
2946     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rS, rA, imm);
2947     ppc_write_effective_byte((rA?gCPU.gpr[rA]:0)+imm, (uint8)gCPU.gpr[rS]) != PPC_MMU_FATAL;
2948     }
2949     JITCFlow ppc_opc_gen_stb()
2950     {
2951     int rA, rS;
2952     uint32 imm;
2953     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rS, rA, imm);
2954     ppc_opc_gen_helper_st(PPC_GPR(rA), imm, PPC_GPR(rS));
2955     asmCALL((NativeAddress)ppc_write_effective_byte_asm);
2956     return flowEndBlock;
2957     }
2958     /*
2959     * stbu Store Byte with Update
2960     * .633
2961     */
2962     void ppc_opc_stbu()
2963     {
2964     int rA, rS;
2965     uint32 imm;
2966     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rS, rA, imm);
2967     // FIXME: check rA!=0
2968     int ret = ppc_write_effective_byte(gCPU.gpr[rA]+imm, (uint8)gCPU.gpr[rS]);
2969     if (ret == PPC_MMU_OK) {
2970     gCPU.gpr[rA] += imm;
2971     }
2972     }
2973     JITCFlow ppc_opc_gen_stbu()
2974     {
2975     int rA, rS;
2976     uint32 imm;
2977     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rS, rA, imm);
2978     // FIXME: check rA!=0
2979     ppc_opc_gen_helper_stu(PPC_GPR(rA), imm, PPC_GPR(rS));
2980     asmCALL((NativeAddress)ppc_write_effective_byte_asm);
2981     if (imm) {
2982     NativeReg r = jitcGetClientRegisterDirty(PPC_GPR(rA));
2983     asmALURegImm(X86_ADD, r, imm);
2984     }
2985     return flowContinue;
2986     }
2987     /*
2988     * stbux Store Byte with Update Indexed
2989     * .634
2990     */
2991     void ppc_opc_stbux()
2992     {
2993     int rA, rS, rB;
2994     PPC_OPC_TEMPL_X(gCPU.current_opc, rS, rA, rB);
2995     // FIXME: check rA!=0
2996     int ret = ppc_write_effective_byte(gCPU.gpr[rA]+gCPU.gpr[rB], (uint8)gCPU.gpr[rS]);
2997     if (ret == PPC_MMU_OK) {
2998     gCPU.gpr[rA] += gCPU.gpr[rB];
2999     }
3000     }
3001     JITCFlow ppc_opc_gen_stbux()
3002     {
3003     int rA, rS, rB;
3004     PPC_OPC_TEMPL_X(gJITC.current_opc, rS, rA, rB);
3005     ppc_opc_gen_helper_stux(PPC_GPR(rA), PPC_GPR(rB), PPC_GPR(rS));
3006     asmCALL((NativeAddress)ppc_write_effective_byte_asm);
3007     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
3008     NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
3009     asmALURegReg(X86_ADD, a, b);
3010     return flowContinue;
3011     }
3012     /*
3013     * stbx Store Byte Indexed
3014     * .635
3015     */
3016     void ppc_opc_stbx()
3017     {
3018     int rA, rS, rB;
3019     PPC_OPC_TEMPL_X(gCPU.current_opc, rS, rA, rB);
3020     ppc_write_effective_byte((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], (uint8)gCPU.gpr[rS]) != PPC_MMU_FATAL;
3021     }
3022     JITCFlow ppc_opc_gen_stbx()
3023     {
3024     int rA, rS, rB;
3025     PPC_OPC_TEMPL_X(gJITC.current_opc, rS, rA, rB);
3026     ppc_opc_gen_helper_stx(PPC_GPR(rA), PPC_GPR(rB), PPC_GPR(rS));
3027     asmCALL((NativeAddress)ppc_write_effective_byte_asm);
3028     return flowEndBlock;
3029     }
3030     /*
3031     * stfd Store Floating-Point Double
3032     * .642
3033     */
3034     void ppc_opc_stfd()
3035     {
3036     if ((gCPU.msr & MSR_FP) == 0) {
3037     ppc_exception(PPC_EXC_NO_FPU);
3038     return;
3039     }
3040     int rA, frS;
3041     uint32 imm;
3042     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, frS, rA, imm);
3043     ppc_write_effective_dword((rA?gCPU.gpr[rA]:0)+imm, gCPU.fpr[frS]) != PPC_MMU_FATAL;
3044     }
3045     JITCFlow ppc_opc_gen_stfd()
3046     {
3047     ppc_opc_gen_check_fpu();
3048     int rA, frS;
3049     uint32 imm;
3050     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, frS, rA, imm);
3051     jitcFloatRegisterClobberAll();
3052     jitcFlushRegister();
3053     jitcClobberCarryAndFlags();
3054     jitcGetClientRegister(PPC_FPR_U(frS), NATIVE_REG | ECX);
3055     jitcGetClientRegister(PPC_FPR_L(frS), NATIVE_REG | EDX);
3056     if (rA) {
3057     jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
3058     if (imm) {
3059     asmALURegImm(X86_ADD, EAX, imm);
3060     }
3061     } else {
3062     asmALURegImm(X86_MOV, EAX, imm);
3063     }
3064     jitcClobberAll();
3065     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3066     asmCALL((NativeAddress)ppc_write_effective_dword_asm);
3067     return flowEndBlock;
3068     }
3069     /*
3070     * stfdu Store Floating-Point Double with Update
3071     * .643
3072     */
3073     void ppc_opc_stfdu()
3074     {
3075     if ((gCPU.msr & MSR_FP) == 0) {
3076     ppc_exception(PPC_EXC_NO_FPU);
3077     return;
3078     }
3079     int rA, frS;
3080     uint32 imm;
3081     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, frS, rA, imm);
3082     // FIXME: check rA!=0
3083     int ret = ppc_write_effective_dword(gCPU.gpr[rA]+imm, gCPU.fpr[frS]);
3084     if (ret == PPC_MMU_OK) {
3085     gCPU.gpr[rA] += imm;
3086     }
3087     }
3088     JITCFlow ppc_opc_gen_stfdu()
3089     {
3090     ppc_opc_gen_check_fpu();
3091     int rA, frS;
3092     uint32 imm;
3093     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, frS, rA, imm);
3094     jitcFloatRegisterClobberAll();
3095     jitcFlushRegister();
3096     jitcClobberCarryAndFlags();
3097     jitcGetClientRegister(PPC_FPR_U(frS), NATIVE_REG | ECX);
3098     jitcGetClientRegister(PPC_FPR_L(frS), NATIVE_REG | EDX);
3099     // FIXME: check rA!=0
3100     jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
3101     if (imm) {
3102     asmALURegImm(X86_ADD, EAX, imm);
3103     }
3104     jitcClobberAll();
3105     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3106     asmCALL((NativeAddress)ppc_write_effective_dword_asm);
3107     if (imm) {
3108     NativeReg r = jitcGetClientRegisterDirty(PPC_GPR(rA));
3109     asmALURegImm(X86_ADD, r, imm);
3110     }
3111     return flowContinue;
3112     }
3113     /*
3114     * stfd Store Floating-Point Double with Update Indexed
3115     * .644
3116     */
3117     void ppc_opc_stfdux()
3118     {
3119     if ((gCPU.msr & MSR_FP) == 0) {
3120     ppc_exception(PPC_EXC_NO_FPU);
3121     return;
3122     }
3123     int rA, frS, rB;
3124     PPC_OPC_TEMPL_X(gCPU.current_opc, frS, rA, rB);
3125     // FIXME: check rA!=0
3126     int ret = ppc_write_effective_dword(gCPU.gpr[rA]+gCPU.gpr[rB], gCPU.fpr[frS]);
3127     if (ret == PPC_MMU_OK) {
3128     gCPU.gpr[rA] += gCPU.gpr[rB];
3129     }
3130     }
3131     JITCFlow ppc_opc_gen_stfdux()
3132     {
3133     ppc_opc_gen_check_fpu();
3134     int rA, frS, rB;
3135     PPC_OPC_TEMPL_X(gJITC.current_opc, frS, rA, rB);
3136     jitcFloatRegisterClobberAll();
3137     jitcFlushRegister();
3138     jitcClobberCarryAndFlags();
3139     jitcGetClientRegister(PPC_FPR_U(frS), NATIVE_REG | ECX);
3140     jitcGetClientRegister(PPC_FPR_L(frS), NATIVE_REG | EDX);
3141     // FIXME: check rA!=0
3142     jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
3143     byte modrm[6];
3144     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
3145     jitcClobberAll();
3146     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3147     asmCALL((NativeAddress)ppc_write_effective_dword_asm);
3148     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
3149     NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
3150     asmALURegReg(X86_ADD, a, b);
3151     return flowContinue;
3152     }
3153     /*
3154     * stfdx Store Floating-Point Double Indexed
3155     * .645
3156     */
3157     void ppc_opc_stfdx()
3158     {
3159     if ((gCPU.msr & MSR_FP) == 0) {
3160     ppc_exception(PPC_EXC_NO_FPU);
3161     return;
3162     }
3163     int rA, frS, rB;
3164     PPC_OPC_TEMPL_X(gCPU.current_opc, frS, rA, rB);
3165     ppc_write_effective_dword((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], gCPU.fpr[frS]) != PPC_MMU_FATAL;
3166     }
3167     JITCFlow ppc_opc_gen_stfdx()
3168     {
3169     ppc_opc_gen_check_fpu();
3170     int rA, frS, rB;
3171     PPC_OPC_TEMPL_X(gJITC.current_opc, frS, rA, rB);
3172     jitcFloatRegisterClobberAll();
3173     jitcFlushRegister();
3174     jitcClobberCarryAndFlags();
3175     jitcGetClientRegister(PPC_FPR_U(frS), NATIVE_REG | ECX);
3176     jitcGetClientRegister(PPC_FPR_L(frS), NATIVE_REG | EDX);
3177     if (rA) {
3178     jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
3179     byte modrm[6];
3180     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
3181     } else {
3182     jitcGetClientRegister(PPC_GPR(rB), NATIVE_REG | EAX);
3183     }
3184     jitcClobberAll();
3185     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3186     asmCALL((NativeAddress)ppc_write_effective_dword_asm);
3187     return flowEndBlock;
3188     }
3189     /*
3190     * stfiwx Store Floating-Point as Integer Word Indexed
3191     * .646
3192     */
3193     void ppc_opc_stfiwx()
3194     {
3195     if ((gCPU.msr & MSR_FP) == 0) {
3196     ppc_exception(PPC_EXC_NO_FPU);
3197     return;
3198     }
3199     int rA, frS, rB;
3200     PPC_OPC_TEMPL_X(gCPU.current_opc, frS, rA, rB);
3201     ppc_write_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], (uint32)gCPU.fpr[frS]) != PPC_MMU_FATAL;
3202     }
3203     JITCFlow ppc_opc_gen_stfiwx()
3204     {
3205     ppc_opc_gen_check_fpu();
3206     int rA, frS, rB;
3207     PPC_OPC_TEMPL_X(gJITC.current_opc, frS, rA, rB);
3208     jitcFloatRegisterClobberAll();
3209     ppc_opc_gen_helper_stx(PPC_GPR(rA), PPC_GPR(rB), PPC_FPR_L(frS));
3210     asmCALL((NativeAddress)ppc_write_effective_word_asm);
3211     return flowEndBlock;
3212     }
3213     /*
3214     * stfs Store Floating-Point Single
3215     * .647
3216     */
3217     void ppc_opc_stfs()
3218     {
3219     if ((gCPU.msr & MSR_FP) == 0) {
3220     ppc_exception(PPC_EXC_NO_FPU);
3221     return;
3222     }
3223     int rA, frS;
3224     uint32 imm;
3225     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, frS, rA, imm);
3226     uint32 s;
3227     ppc_double d;
3228     ppc_fpu_unpack_double(d, gCPU.fpr[frS]);
3229     ppc_fpu_pack_single(d, s);
3230     ppc_write_effective_word((rA?gCPU.gpr[rA]:0)+imm, s) != PPC_MMU_FATAL;
3231     }
3232     JITCFlow ppc_opc_gen_stfs()
3233     {
3234     ppc_opc_gen_check_fpu();
3235     int rA, frS;
3236     uint32 imm;
3237     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, frS, rA, imm);
3238     jitcFloatRegisterClobberAll();
3239     jitcGetClientRegister(PPC_FPR_U(frS), NATIVE_REG | EDX);
3240     jitcGetClientRegister(PPC_FPR_L(frS), NATIVE_REG | EAX);
3241     jitcClobberAll();
3242     asmCALL((NativeAddress)ppc_opc_double_to_single);
3243     asmALURegReg(X86_MOV, EDX, EAX);
3244     if (rA) {
3245     jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
3246     if (imm) {
3247     asmALURegImm(X86_ADD, EAX, imm);
3248     }
3249     } else {
3250     asmALURegImm(X86_MOV, EAX, imm);
3251     }
3252     jitcClobberAll();
3253     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3254     asmCALL((NativeAddress)ppc_write_effective_word_asm);
3255     return flowEndBlock;
3256     }
3257     /*
3258     * stfsu Store Floating-Point Single with Update
3259     * .648
3260     */
3261     void ppc_opc_stfsu()
3262     {
3263     if ((gCPU.msr & MSR_FP) == 0) {
3264     ppc_exception(PPC_EXC_NO_FPU);
3265     return;
3266     }
3267     int rA, frS;
3268     uint32 imm;
3269     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, frS, rA, imm);
3270     // FIXME: check rA!=0
3271     uint32 s;
3272     ppc_double d;
3273     ppc_fpu_unpack_double(d, gCPU.fpr[frS]);
3274     ppc_fpu_pack_single(d, s);
3275     int ret = ppc_write_effective_word(gCPU.gpr[rA]+imm, s);
3276     if (ret == PPC_MMU_OK) {
3277     gCPU.gpr[rA] += imm;
3278     }
3279     }
3280     JITCFlow ppc_opc_gen_stfsu()
3281     {
3282     ppc_opc_gen_check_fpu();
3283     int rA, frS;
3284     uint32 imm;
3285     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, frS, rA, imm);
3286     jitcFloatRegisterClobberAll();
3287     jitcGetClientRegister(PPC_FPR_U(frS), NATIVE_REG | EDX);
3288     jitcGetClientRegister(PPC_FPR_L(frS), NATIVE_REG | EAX);
3289     jitcClobberAll();
3290     asmCALL((NativeAddress)ppc_opc_double_to_single);
3291     asmALURegReg(X86_MOV, EDX, EAX);
3292     // FIXME: check rA!=0
3293     jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
3294     if (imm) {
3295     asmALURegImm(X86_ADD, EAX, imm);
3296     }
3297     jitcClobberAll();
3298     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3299     asmCALL((NativeAddress)ppc_write_effective_word_asm);
3300     if (imm) {
3301     NativeReg r = jitcGetClientRegisterDirty(PPC_GPR(rA));
3302     asmALURegImm(X86_ADD, r, imm);
3303     }
3304     return flowContinue;
3305     }
3306     /*
3307     * stfsux Store Floating-Point Single with Update Indexed
3308     * .649
3309     */
3310     void ppc_opc_stfsux()
3311     {
3312     if ((gCPU.msr & MSR_FP) == 0) {
3313     ppc_exception(PPC_EXC_NO_FPU);
3314     return;
3315     }
3316     int rA, frS, rB;
3317     PPC_OPC_TEMPL_X(gCPU.current_opc, frS, rA, rB);
3318     // FIXME: check rA!=0
3319     uint32 s;
3320     ppc_double d;
3321     ppc_fpu_unpack_double(d, gCPU.fpr[frS]);
3322     ppc_fpu_pack_single(d, s);
3323     int ret = ppc_write_effective_word(gCPU.gpr[rA]+gCPU.gpr[rB], s);
3324     if (ret == PPC_MMU_OK) {
3325     gCPU.gpr[rA] += gCPU.gpr[rB];
3326     }
3327     }
3328     JITCFlow ppc_opc_gen_stfsux()
3329     {
3330     ppc_opc_gen_check_fpu();
3331     int rA, frS, rB;
3332     PPC_OPC_TEMPL_X(gJITC.current_opc, frS, rA, rB);
3333     jitcFloatRegisterClobberAll();
3334     jitcGetClientRegister(PPC_FPR_U(frS), NATIVE_REG | EDX);
3335     jitcGetClientRegister(PPC_FPR_L(frS), NATIVE_REG | EAX);
3336     jitcClobberAll();
3337     asmCALL((NativeAddress)ppc_opc_double_to_single);
3338     asmALURegReg(X86_MOV, EDX, EAX);
3339     // FIXME: check rA!=0
3340     jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
3341     byte modrm[6];
3342     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
3343     jitcClobberAll();
3344     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3345     asmCALL((NativeAddress)ppc_write_effective_word_asm);
3346     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
3347     NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
3348     asmALURegReg(X86_ADD, a, b);
3349     return flowContinue;
3350     }
3351     /*
3352     * stfsx Store Floating-Point Single Indexed
3353     * .650
3354     */
3355     void ppc_opc_stfsx()
3356     {
3357     if ((gCPU.msr & MSR_FP) == 0) {
3358     ppc_exception(PPC_EXC_NO_FPU);
3359     return;
3360     }
3361     int rA, frS, rB;
3362     PPC_OPC_TEMPL_X(gCPU.current_opc, frS, rA, rB);
3363     uint32 s;
3364     ppc_double d;
3365     ppc_fpu_unpack_double(d, gCPU.fpr[frS]);
3366     ppc_fpu_pack_single(d, s);
3367     ppc_write_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], s) != PPC_MMU_FATAL;
3368     }
3369     JITCFlow ppc_opc_gen_stfsx()
3370     {
3371     ppc_opc_gen_check_fpu();
3372     int rA, frS, rB;
3373     PPC_OPC_TEMPL_X(gJITC.current_opc, frS, rA, rB);
3374     jitcFloatRegisterClobberAll();
3375     jitcGetClientRegister(PPC_FPR_U(frS), NATIVE_REG | EDX);
3376     jitcGetClientRegister(PPC_FPR_L(frS), NATIVE_REG | EAX);
3377     jitcClobberAll();
3378     asmCALL((NativeAddress)ppc_opc_double_to_single);
3379     asmALURegReg(X86_MOV, EDX, EAX);
3380     if (rA) {
3381     jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
3382     byte modrm[6];
3383     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
3384     } else {
3385     jitcGetClientRegister(PPC_GPR(rB), NATIVE_REG | EAX);
3386     }
3387     jitcClobberAll();
3388     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3389     asmCALL((NativeAddress)ppc_write_effective_word_asm);
3390     return flowEndBlock;
3391     }
3392     /*
3393     * sth Store Half Word
3394     * .651
3395     */
3396     void ppc_opc_sth()
3397     {
3398     int rA, rS;
3399     uint32 imm;
3400     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rS, rA, imm);
3401     ppc_write_effective_half((rA?gCPU.gpr[rA]:0)+imm, (uint16)gCPU.gpr[rS]) != PPC_MMU_FATAL;
3402     }
3403     JITCFlow ppc_opc_gen_sth()
3404     {
3405     int rA, rS;
3406     uint32 imm;
3407     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rS, rA, imm);
3408     ppc_opc_gen_helper_st(PPC_GPR(rA), imm, PPC_GPR(rS));
3409     asmCALL((NativeAddress)ppc_write_effective_half_asm);
3410     return flowEndBlock;
3411     }
3412     /*
3413     * sthbrx Store Half Word Byte-Reverse Indexed
3414     * .652
3415     */
3416     void ppc_opc_sthbrx()
3417     {
3418     int rA, rS, rB;
3419     PPC_OPC_TEMPL_X(gCPU.current_opc, rS, rA, rB);
3420     ppc_write_effective_half((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], ppc_bswap_half((uint16)gCPU.gpr[rS])) != PPC_MMU_FATAL;
3421     }
3422     JITCFlow ppc_opc_gen_sthbrx()
3423     {
3424     int rA, rS, rB;
3425     PPC_OPC_TEMPL_X(gJITC.current_opc, rS, rA, rB);
3426     ppc_opc_gen_helper_stx(PPC_GPR(rA), PPC_GPR(rB), PPC_GPR(rS));
3427     asmALURegReg8(X86_XCHG, DL, DH);
3428     asmCALL((NativeAddress)ppc_write_effective_half_asm);
3429     return flowEndBlock;
3430     }
3431     /*
3432     * sthu Store Half Word with Update
3433     * .653
3434     */
3435     void ppc_opc_sthu()
3436     {
3437     int rA, rS;
3438     uint32 imm;
3439     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rS, rA, imm);
3440     // FIXME: check rA!=0
3441     int ret = ppc_write_effective_half(gCPU.gpr[rA]+imm, (uint16)gCPU.gpr[rS]);
3442     if (ret == PPC_MMU_OK) {
3443     gCPU.gpr[rA] += imm;
3444     }
3445     }
3446     JITCFlow ppc_opc_gen_sthu()
3447     {
3448     int rA, rS;
3449     uint32 imm;
3450     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rS, rA, imm);
3451     // FIXME: check rA!=0
3452     ppc_opc_gen_helper_stu(PPC_GPR(rA), imm, PPC_GPR(rS));
3453     asmCALL((NativeAddress)ppc_write_effective_half_asm);
3454     if (imm) {
3455     NativeReg r = jitcGetClientRegisterDirty(PPC_GPR(rA));
3456     asmALURegImm(X86_ADD, r, imm);
3457     }
3458     return flowContinue;
3459     }
3460     /*
3461     * sthux Store Half Word with Update Indexed
3462     * .654
3463     */
3464     void ppc_opc_sthux()
3465     {
3466     int rA, rS, rB;
3467     PPC_OPC_TEMPL_X(gCPU.current_opc, rS, rA, rB);
3468     // FIXME: check rA!=0
3469     int ret = ppc_write_effective_half(gCPU.gpr[rA]+gCPU.gpr[rB], (uint16)gCPU.gpr[rS]);
3470     if (ret == PPC_MMU_OK) {
3471     gCPU.gpr[rA] += gCPU.gpr[rB];
3472     }
3473     }
3474     JITCFlow ppc_opc_gen_sthux()
3475     {
3476     int rA, rS, rB;
3477     PPC_OPC_TEMPL_X(gJITC.current_opc, rS, rA, rB);
3478     ppc_opc_gen_helper_stux(PPC_GPR(rA), PPC_GPR(rB), PPC_GPR(rS));
3479     asmCALL((NativeAddress)ppc_write_effective_half_asm);
3480     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
3481     NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
3482     asmALURegReg(X86_ADD, a, b);
3483     return flowContinue;
3484     }
3485     /*
3486     * sthx Store Half Word Indexed
3487     * .655
3488     */
3489     void ppc_opc_sthx()
3490     {
3491     int rA, rS, rB;
3492     PPC_OPC_TEMPL_X(gCPU.current_opc, rS, rA, rB);
3493     ppc_write_effective_half((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], (uint16)gCPU.gpr[rS]) != PPC_MMU_FATAL;
3494     }
3495     JITCFlow ppc_opc_gen_sthx()
3496     {
3497     int rA, rS, rB;
3498     PPC_OPC_TEMPL_X(gJITC.current_opc, rS, rA, rB);
3499     ppc_opc_gen_helper_stx(PPC_GPR(rA), PPC_GPR(rB), PPC_GPR(rS));
3500     asmCALL((NativeAddress)ppc_write_effective_half_asm);
3501     return flowEndBlock;
3502     }
3503     /*
3504     * stmw Store Multiple Word
3505     * .656
3506     */
3507     void ppc_opc_stmw()
3508     {
3509     int rS, rA;
3510     uint32 imm;
3511     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rS, rA, imm);
3512     uint32 ea = (rA ? gCPU.gpr[rA] : 0) + imm;
3513     while (rS <= 31) {
3514     if (ppc_write_effective_word(ea, gCPU.gpr[rS])) {
3515     return;
3516     }
3517     rS++;
3518     ea += 4;
3519     }
3520     }
3521     JITCFlow ppc_opc_gen_stmw()
3522     {
3523     int rS, rA;
3524     uint32 imm;
3525     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rS, rA, imm);
3526     while (rS <= 30) {
3527     ppc_opc_gen_helper_st(PPC_GPR(rA), imm, PPC_GPR(rS+1));
3528     jitcGetClientRegister(PPC_GPR(rS), NATIVE_REG | ECX);
3529     asmCALL((NativeAddress)ppc_write_effective_dword_asm);
3530     rS += 2;
3531     imm += 8;
3532     }
3533     if (rS == 31) {
3534     ppc_opc_gen_helper_st(PPC_GPR(rA), imm, PPC_GPR(rS));
3535     asmCALL((NativeAddress)ppc_write_effective_word_asm);
3536     }
3537     return flowEndBlock;
3538     }
3539     /*
3540     * stswi Store String Word Immediate
3541     * .657
3542     */
3543     void ppc_opc_stswi()
3544     {
3545     int rA, rS, NB;
3546     PPC_OPC_TEMPL_X(gCPU.current_opc, rS, rA, NB);
3547     if (NB==0) NB=32;
3548     uint32 ea = rA ? gCPU.gpr[rA] : 0;
3549     uint32 r = 0;
3550     int i = 0;
3551    
3552     while (NB > 0) {
3553     if (!i) {
3554     r = gCPU.gpr[rS];
3555     rS++;
3556     rS%=32;
3557     i = 4;
3558     }
3559     if (ppc_write_effective_byte(ea, (r>>24))) {
3560     return;
3561     }
3562     r<<=8;
3563     ea++;
3564     i--;
3565     NB--;
3566     }
3567     }
3568     JITCFlow ppc_opc_gen_stswi()
3569     {
3570     int rA, rS, NB;
3571     PPC_OPC_TEMPL_X(gJITC.current_opc, rS, rA, NB);
3572     if (NB==0) NB=32;
3573     jitcClobberCarryAndFlags();
3574     jitcFlushRegister();
3575     if (rA) {
3576     jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
3577     } else {
3578     asmALURegImm(X86_MOV, EAX, 0);
3579     }
3580     asmALURegImm(X86_MOV, ECX, NB);
3581     asmALURegImm(X86_MOV, EBX, rS);
3582     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3583     jitcClobberAll();
3584     asmCALL((NativeAddress)ppc_opc_stswi_asm);
3585     return flowEndBlock;
3586     }
3587     /*
3588     * stswx Store String Word Indexed
3589     * .658
3590     */
3591     void ppc_opc_stswx()
3592     {
3593     int rA, rS, rB;
3594     PPC_OPC_TEMPL_X(gCPU.current_opc, rS, rA, rB);
3595     int NB = XER_n(gCPU.xer);
3596     uint32 ea = gCPU.gpr[rB] + (rA ? gCPU.gpr[rA] : 0);
3597     uint32 r = 0;
3598     int i = 0;
3599    
3600     while (NB > 0) {
3601     if (!i) {
3602     r = gCPU.gpr[rS];
3603     rS++;
3604     rS%=32;
3605     i = 4;
3606     }
3607     if (ppc_write_effective_byte(ea, (r>>24))) {
3608     return;
3609     }
3610     r<<=8;
3611     ea++;
3612     i--;
3613     NB--;
3614     }
3615     }
3616     JITCFlow ppc_opc_gen_stswx()
3617     {
3618     int rA, rS, rB;
3619     PPC_OPC_TEMPL_X(gJITC.current_opc, rS, rA, rB);
3620     jitcClobberCarryAndFlags();
3621     jitcFlushRegister();
3622     jitcGetClientRegister(PPC_XER, NATIVE_REG | ECX);
3623     if (rA) {
3624     byte modrm[6];
3625     jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
3626     asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
3627     } else {
3628     jitcGetClientRegister(PPC_GPR(rB), NATIVE_REG | EAX);
3629     }
3630     asmALURegImm(X86_AND, ECX, 0x7f);
3631     jitcClobberAll();
3632     NativeAddress fixup = asmJxxFixup(X86_Z);
3633     asmALURegImm(X86_MOV, EBX, rS);
3634     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3635     asmCALL((NativeAddress)ppc_opc_stswi_asm);
3636     asmResolveFixup(fixup, asmHERE());
3637     return flowEndBlock;
3638     }
3639     /*
3640     * stw Store Word
3641     * .659
3642     */
3643     void ppc_opc_stw()
3644     {
3645     int rA, rS;
3646     uint32 imm;
3647     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rS, rA, imm);
3648     ppc_write_effective_word((rA?gCPU.gpr[rA]:0)+imm, gCPU.gpr[rS]) != PPC_MMU_FATAL;
3649     }
3650     JITCFlow ppc_opc_gen_stw()
3651     {
3652     int rA, rS;
3653     uint32 imm;
3654     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rS, rA, imm);
3655     ppc_opc_gen_helper_st(PPC_GPR(rA), imm, PPC_GPR(rS));
3656     asmCALL((NativeAddress)ppc_write_effective_word_asm);
3657     return flowEndBlock;
3658     }
3659     /*
3660     * stwbrx Store Word Byte-Reverse Indexed
3661     * .660
3662     */
3663     void ppc_opc_stwbrx()
3664     {
3665     int rA, rS, rB;
3666     PPC_OPC_TEMPL_X(gCPU.current_opc, rS, rA, rB);
3667     // FIXME: doppelt gemoppelt
3668     ppc_write_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], ppc_bswap_word(gCPU.gpr[rS])) != PPC_MMU_FATAL;
3669     }
3670     JITCFlow ppc_opc_gen_stwbrx()
3671     {
3672     int rA, rS, rB;
3673     PPC_OPC_TEMPL_X(gJITC.current_opc, rS, rA, rB);
3674     ppc_opc_gen_helper_stx(PPC_GPR(rA), PPC_GPR(rB), PPC_GPR(rS));
3675     asmBSWAP(EDX);
3676     asmCALL((NativeAddress)ppc_write_effective_word_asm);
3677     return flowEndBlock;
3678     }
3679     /*
3680     * stwcx. Store Word Conditional Indexed
3681     * .661
3682     */
3683     void ppc_opc_stwcx_()
3684     {
3685     int rA, rS, rB;
3686     PPC_OPC_TEMPL_X(gCPU.current_opc, rS, rA, rB);
3687     gCPU.cr &= 0x0fffffff;
3688     if (gCPU.have_reservation) {
3689     gCPU.have_reservation = false;
3690     uint32 v;
3691     if (ppc_read_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], v)) {
3692     return;
3693     }
3694     if (v==gCPU.reserve) {
3695     if (ppc_write_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], gCPU.gpr[rS])) {
3696     return;
3697     }
3698     gCPU.cr |= CR_CR0_EQ;
3699     }
3700     if (gCPU.xer & XER_SO) {
3701     gCPU.cr |= CR_CR0_SO;
3702     }
3703     }
3704     }
3705     JITCFlow ppc_opc_gen_stwcx_()
3706     {
3707     int rA, rS, rB;
3708     PPC_OPC_TEMPL_X(gJITC.current_opc, rS, rA, rB);
3709     byte modrm[6];
3710     jitcClobberCarryAndFlags();
3711     asmALUMemImm8(X86_AND, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), 0x0f);
3712     asmBTxMemImm(X86_BTR, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.have_reservation), 0);
3713     NativeAddress no_reservation = asmJxxFixup(X86_NC);
3714     ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
3715     asmCALL((NativeAddress)ppc_read_effective_word_asm);
3716     asmALURegMem(X86_CMP, EDX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.reserve));
3717     // FIXME: mapFlags?
3718     NativeAddress fixup = asmJxxFixup(X86_NE);
3719     ppc_opc_gen_helper_stx(PPC_GPR(rA), PPC_GPR(rB), PPC_GPR(rS));
3720     asmCALL((NativeAddress)ppc_write_effective_word_asm);
3721     asmALUMemImm8(X86_OR, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), 0x20); // CR_CR0_EQ
3722     asmResolveFixup(fixup, asmHERE());
3723     asmResolveFixup(no_reservation, asmHERE());
3724     asmTESTDMemImm((uint32)&gCPU.xer, XER_SO);
3725     fixup = asmJxxFixup(X86_Z);
3726     asmALUMemImm8(X86_OR, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), 0x10); // CR_CR0_SO
3727     asmResolveFixup(fixup, asmHERE());
3728     return flowEndBlock;
3729     }
3730     /*
3731     * stwu Store Word with Update
3732     * .663
3733     */
3734     void ppc_opc_stwu()
3735     {
3736     int rA, rS;
3737     uint32 imm;
3738     PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rS, rA, imm);
3739     // FIXME: check rA!=0
3740     int ret = ppc_write_effective_word(gCPU.gpr[rA]+imm, gCPU.gpr[rS]);
3741     if (ret == PPC_MMU_OK) {
3742     gCPU.gpr[rA] += imm;
3743     }
3744     }
3745     JITCFlow ppc_opc_gen_stwu()
3746     {
3747     int rA, rS;
3748     uint32 imm;
3749     PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rS, rA, imm);
3750     // FIXME: check rA!=0
3751     ppc_opc_gen_helper_stu(PPC_GPR(rA), imm, PPC_GPR(rS));
3752     asmCALL((NativeAddress)ppc_write_effective_word_asm);
3753     if (imm) {
3754     NativeReg r = jitcGetClientRegisterDirty(PPC_GPR(rA));
3755     asmALURegImm(X86_ADD, r, imm);
3756     }
3757     return flowContinue;
3758     }
3759     /*
3760     * stwux Store Word with Update Indexed
3761     * .664
3762     */
3763     void ppc_opc_stwux()
3764     {
3765     int rA, rS, rB;
3766     PPC_OPC_TEMPL_X(gCPU.current_opc, rS, rA, rB);
3767     // FIXME: check rA!=0
3768     int ret = ppc_write_effective_word(gCPU.gpr[rA]+gCPU.gpr[rB], gCPU.gpr[rS]);
3769     if (ret == PPC_MMU_OK) {
3770     gCPU.gpr[rA] += gCPU.gpr[rB];
3771     }
3772     }
3773     JITCFlow ppc_opc_gen_stwux()
3774     {
3775     int rA, rS, rB;
3776     PPC_OPC_TEMPL_X(gJITC.current_opc, rS, rA, rB);
3777     ppc_opc_gen_helper_stux(PPC_GPR(rA), PPC_GPR(rB), PPC_GPR(rS));
3778     asmCALL((NativeAddress)ppc_write_effective_word_asm);
3779     NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
3780     NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
3781     asmALURegReg(X86_ADD, a, b);
3782     return flowContinue;
3783     }
3784     /*
3785     * stwx Store Word Indexed
3786     * .665
3787     */
3788     void ppc_opc_stwx()
3789     {
3790     int rA, rS, rB;
3791     PPC_OPC_TEMPL_X(gCPU.current_opc, rS, rA, rB);
3792     ppc_write_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], gCPU.gpr[rS]) != PPC_MMU_FATAL;
3793     }
3794     JITCFlow ppc_opc_gen_stwx()
3795     {
3796     int rA, rS, rB;
3797     PPC_OPC_TEMPL_X(gJITC.current_opc, rS, rA, rB);
3798     ppc_opc_gen_helper_stx(PPC_GPR(rA), PPC_GPR(rB), PPC_GPR(rS));
3799     asmCALL((NativeAddress)ppc_write_effective_word_asm);
3800     return flowEndBlock;
3801     }
3802    
3803     /* stvx Store Vector Indexed
3804     * v.134
3805     */
3806     void ppc_opc_stvx()
3807     {
3808     #ifndef __VEC_EXC_OFF__
3809     if ((gCPU.msr & MSR_VEC) == 0) {
3810     ppc_exception(PPC_EXC_NO_VEC);
3811     return;
3812     }
3813     #endif
3814     VECTOR_DEBUG;
3815     int rA, vrS, rB;
3816     PPC_OPC_TEMPL_X(gCPU.current_opc, vrS, rA, rB);
3817    
3818     int ea = ((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB]);
3819    
3820     ppc_write_effective_qword(ea, gCPU.vr[vrS]) != PPC_MMU_FATAL;
3821     }
3822     JITCFlow ppc_opc_gen_stvx()
3823     {
3824     ppc_opc_gen_check_vec();
3825     int rA, vrS, rB;
3826     PPC_OPC_TEMPL_X(gJITC.current_opc, vrS, rA, rB);
3827    
3828     jitcFlushClientVectorRegister(vrS);
3829     jitcAssertFlushedVectorRegister(vrS);
3830    
3831     jitcClobberCarryAndFlags();
3832     NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB, NATIVE_REG | EAX);
3833    
3834     #if 1
3835     jitcClobberAll();
3836     if (regA != EAX) asmALURegReg(X86_MOV, EAX, regA);
3837     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3838    
3839     if (0 && vrS == gJITC.nativeVectorReg) {
3840     asmALURegImm(X86_MOV, EDX, (uint32)&(gCPU.vr[JITC_VECTOR_TEMP]));
3841     asmCALL((NativeAddress)ppc_write_effective_qword_sse_asm);
3842     } else {
3843     asmALURegImm(X86_MOV, EDX, (uint32)&(gCPU.vr[vrS]));
3844     asmCALL((NativeAddress)ppc_write_effective_qword_asm);
3845     }
3846     #else
3847     asmALURegImm(X86_AND, regA, ~0x0f);
3848     asmMOVDMemReg((uint32)&gCPU.vtemp, regA);
3849    
3850     jitcClobberAll();
3851     if (regA != EAX) asmALURegReg(X86_MOV, EAX, regA);
3852     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3853    
3854     asmMOVRegDMem(ECX, (uint32)&(gCPU.vr[vrS])+12);
3855     asmMOVRegDMem(EDX, (uint32)&(gCPU.vr[vrS])+8);
3856    
3857     asmCALL((NativeAddress)ppc_write_effective_dword_asm);
3858    
3859     asmMOVRegDMem(EAX, (uint32)&gCPU.vtemp);
3860     asmALURegImm(X86_OR, EAX, 8);
3861    
3862     asmMOVRegDMem(ECX, (uint32)&(gCPU.vr[vrS])+4);
3863     asmMOVRegDMem(EDX, (uint32)&(gCPU.vr[vrS])+0);
3864    
3865     asmCALL((NativeAddress)ppc_write_effective_dword_asm);
3866     #endif
3867     return flowEndBlock;
3868     }
3869    
3870     /* stvxl Store Vector Indexed LRU
3871     * v.135
3872     */
3873     void ppc_opc_stvxl()
3874     {
3875     ppc_opc_stvx();
3876     /* This instruction should hint to the cache that the value won't be
3877     * needed again in memory anytime soon. We don't emulate the cache,
3878     * so this is effectively exactly the same as stvx.
3879     */
3880     }
3881     JITCFlow ppc_opc_gen_stvxl()
3882     {
3883     return ppc_opc_gen_stvx();
3884     }
3885    
3886     /* stvebx Store Vector Element Byte Indexed
3887     * v.131
3888     */
3889     void ppc_opc_stvebx()
3890     {
3891     #ifndef __VEC_EXC_OFF__
3892     if ((gCPU.msr & MSR_VEC) == 0) {
3893     ppc_exception(PPC_EXC_NO_VEC);
3894     return;
3895     }
3896     #endif
3897     VECTOR_DEBUG;
3898     int rA, vrS, rB;
3899     PPC_OPC_TEMPL_X(gCPU.current_opc, vrS, rA, rB);
3900     uint32 ea;
3901     ea = (rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB];
3902     ppc_write_effective_byte(ea, VECT_B(gCPU.vr[vrS], ea & 0xf));
3903     }
3904     JITCFlow ppc_opc_gen_stvebx()
3905     {
3906     ppc_opc_gen_check_vec();
3907     int rA, vrS, rB;
3908     byte modrm[6];
3909     PPC_OPC_TEMPL_X(gJITC.current_opc, vrS, rA, rB);
3910    
3911     jitcFlushClientVectorRegister(vrS);
3912     jitcAssertFlushedVectorRegister(vrS);
3913    
3914     jitcClobberCarryAndFlags();
3915     NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB, NATIVE_REG | EAX);
3916     asmMOVDMemReg((uint32)&gCPU.vtemp, regA);
3917     asmALURegImm(X86_AND, regA, 0x0f);
3918     asmALUReg(X86_NOT, regA);
3919    
3920     jitcClobberAll();
3921     if (regA != EAX) asmALURegReg(X86_MOV, EAX, regA);
3922     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3923    
3924     asmALURegMem8(X86_MOV, DL, modrm,
3925     x86_mem(modrm, EAX, ((uint32)&gCPU.vr[vrS])+16));
3926    
3927     asmMOVRegDMem(EAX, (uint32)&gCPU.vtemp);
3928    
3929     asmCALL((NativeAddress)ppc_write_effective_byte_asm);
3930     return flowEndBlock;
3931     }
3932    
3933    
3934     /* stvehx Store Vector Element Half Word Indexed
3935     * v.132
3936     */
3937     void ppc_opc_stvehx()
3938     {
3939     #ifndef __VEC_EXC_OFF__
3940     if ((gCPU.msr & MSR_VEC) == 0) {
3941     ppc_exception(PPC_EXC_NO_VEC);
3942     return;
3943     }
3944     #endif
3945     VECTOR_DEBUG;
3946     int rA, vrS, rB;
3947     PPC_OPC_TEMPL_X(gCPU.current_opc, vrS, rA, rB);
3948     uint32 ea;
3949     ea = ((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB]) & ~1;
3950     ppc_write_effective_half(ea, VECT_H(gCPU.vr[vrS], (ea & 0xf) >> 1));
3951     }
3952     JITCFlow ppc_opc_gen_stvehx()
3953     {
3954     ppc_opc_gen_check_vec();
3955     int rA, vrS, rB;
3956     byte modrm[6];
3957     PPC_OPC_TEMPL_X(gJITC.current_opc, vrS, rA, rB);
3958    
3959     jitcFlushClientVectorRegister(vrS);
3960     jitcAssertFlushedVectorRegister(vrS);
3961    
3962     jitcClobberCarryAndFlags();
3963     NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB, NATIVE_REG | EAX);
3964     asmMOVDMemReg((uint32)&gCPU.vtemp, regA);
3965     asmALURegImm(X86_AND, regA, 0x0e);
3966     asmALUReg(X86_NOT, regA);
3967    
3968     jitcClobberAll();
3969     if (regA != EAX) asmALURegReg(X86_MOV, EAX, regA);
3970     asmALURegImm(X86_MOV, ESI, gJITC.pc);
3971    
3972     asmALURegMem8(X86_MOV, DL, modrm,
3973     x86_mem(modrm, EAX, ((uint32)&gCPU.vr[vrS])+15));
3974     asmALURegMem8(X86_MOV, DH, modrm,
3975     x86_mem(modrm, EAX, ((uint32)&gCPU.vr[vrS])+16));
3976    
3977     asmMOVRegDMem(EAX, (uint32)&gCPU.vtemp);
3978     asmALURegImm(X86_AND, EAX, ~0x1);
3979    
3980     asmCALL((NativeAddress)ppc_write_effective_half_asm);
3981     return flowEndBlock;
3982     }
3983    
3984    
3985     /* stvewx Store Vector Element Word Indexed
3986     * v.133
3987     */
3988     void ppc_opc_stvewx()
3989     {
3990     #ifndef __VEC_EXC_OFF__
3991     if ((gCPU.msr & MSR_VEC) == 0) {
3992     ppc_exception(PPC_EXC_NO_VEC);
3993     return;
3994     }
3995     #endif
3996     VECTOR_DEBUG;
3997     int rA, vrS, rB;
3998     PPC_OPC_TEMPL_X(gCPU.current_opc, vrS, rA, rB);
3999     uint32 ea;
4000     ea = ((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB]) & ~3;
4001     ppc_write_effective_word(ea, VECT_W(gCPU.vr[vrS], (ea & 0xf) >> 2));
4002     }
4003     JITCFlow ppc_opc_gen_stvewx()
4004     {
4005     ppc_opc_gen_check_vec();
4006     int rA, vrS, rB;
4007     byte modrm[6];
4008     PPC_OPC_TEMPL_X(gJITC.current_opc, vrS, rA, rB);
4009    
4010     jitcFlushClientVectorRegister(vrS);
4011     jitcAssertFlushedVectorRegister(vrS);
4012    
4013     jitcClobberCarryAndFlags();
4014     NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB, NATIVE_REG | EAX);
4015     asmMOVDMemReg((uint32)&gCPU.vtemp, regA);
4016     asmALURegImm(X86_AND, regA, 0x0c);
4017     asmALUReg(X86_NOT, regA);
4018    
4019     jitcClobberAll();
4020     if (regA != EAX) asmALURegReg(X86_MOV, EAX, regA);
4021     asmALURegImm(X86_MOV, ESI, gJITC.pc);
4022    
4023     asmALURegMem(X86_MOV, EDX, modrm,
4024     x86_mem(modrm, EAX, ((uint32)&gCPU.vr[vrS])+13));
4025    
4026     asmMOVRegDMem(EAX, (uint32)&gCPU.vtemp);
4027     asmALURegImm(X86_AND, EAX, ~0x3);
4028    
4029     asmCALL((NativeAddress)ppc_write_effective_word_asm);
4030     return flowEndBlock;
4031     }
4032    
4033     /* dstst Data Stream Touch for Store
4034     * v.117
4035     */
4036     void ppc_opc_dstst()
4037     {
4038     VECTOR_DEBUG;
4039     /* Since we are not emulating the cache, this is a nop */
4040     }
4041     JITCFlow ppc_opc_gen_dstst()
4042     {
4043     /* Since we are not emulating the cache, this is a nop */
4044     return flowContinue;
4045     }
4046    
4047     /* dss Data Stream Stop
4048     * v.114
4049     */
4050     void ppc_opc_dss()
4051     {
4052     VECTOR_DEBUG;
4053     /* Since we are not emulating the cache, this is a nop */
4054     }
4055     JITCFlow ppc_opc_gen_dss()
4056     {
4057     /* Since we are not emulating the cache, this is a nop */
4058     return flowContinue;
4059     }

  ViewVC Help
Powered by ViewVC 1.1.26