/[pearpc]/src/cpu/cpu_jitc_x86/ppc_mmu.cc
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /src/cpu/cpu_jitc_x86/ppc_mmu.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Wed Sep 5 17:11:21 2007 UTC (12 years, 2 months ago) by dpavlin
File size: 104660 byte(s)
import upstream CVS
1 /*
2 * PearPC
3 * ppc_mmu.cc
4 *
5 * Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net)
6 * Copyright (C) 2004 Daniel Foesch (dfoesch@cs.nmsu.edu)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21
22 #include <cstdlib>
23 #include <cstring>
24 #include "tools/snprintf.h"
25 #include "debug/tracers.h"
26 #include "io/prom/prom.h"
27 #include "io/io.h"
28 #include "ppc_cpu.h"
29 #include "ppc_fpu.h"
30 #include "ppc_vec.h"
31 #include "ppc_mmu.h"
32 #include "ppc_exc.h"
33 #include "ppc_tools.h"
34
35 #include "x86asm.h"
36 #include "jitc_asm.h"
37
38 byte *gMemory = NULL;
39 uint32 gMemorySize;
40
41 #undef TLB
42
43 static int ppc_pte_protection[] = {
44 // read(0)/write(1) key pp
45
46 // read
47 1, // r/w
48 1, // r/w
49 1, // r/w
50 1, // r
51 0, // -
52 1, // r
53 1, // r/w
54 1, // r
55
56 // write
57 1, // r/w
58 1, // r/w
59 1, // r/w
60 0, // r
61 0, // -
62 0, // r
63 1, // r/w
64 0, // r
65 };
66
67 int FASTCALL ppc_effective_to_physical(uint32 addr, int flags, uint32 &result)
68 {
69 if (flags & PPC_MMU_CODE) {
70 if (!(gCPU.msr & MSR_IR)) {
71 result = addr;
72 return PPC_MMU_OK;
73 }
74 /*
75 * BAT translation .329
76 */
77 for (int i=0; i<4; i++) {
78 if ((addr & gCPU.ibat_bl[i]) == gCPU.ibat_bepi[i]) {
79 // bat applies to this address
80 if (((gCPU.ibatu[i] & BATU_Vs) && !(gCPU.msr & MSR_PR))
81 || ((gCPU.ibatu[i] & BATU_Vp) && (gCPU.msr & MSR_PR))) {
82 // bat entry valid
83 addr &= gCPU.ibat_nbl[i];
84 addr |= gCPU.ibat_brpn[i];
85 result = addr;
86 // FIXME: check access rights
87 return PPC_MMU_OK;
88 }
89 }
90 }
91 } else {
92 if (!(gCPU.msr & MSR_DR)) {
93 result = addr;
94 return PPC_MMU_OK;
95 }
96 /*
97 * BAT translation .329
98 */
99 for (int i=0; i<4; i++) {
100 if ((addr & gCPU.dbat_bl[i]) == gCPU.dbat_bepi[i]) {
101 // bat applies to this address
102 if (((gCPU.dbatu[i] & BATU_Vs) && !(gCPU.msr & MSR_PR))
103 || ((gCPU.dbatu[i] & BATU_Vp) && (gCPU.msr & MSR_PR))) {
104 // bat entry valid
105 addr &= gCPU.dbat_nbl[i];
106 addr |= gCPU.dbat_brpn[i];
107 result = addr;
108 // FIXME: check access rights
109 return PPC_MMU_OK;
110 }
111 }
112 }
113 }
114
115 /*
116 * Address translation with segment register
117 */
118 uint32 sr = gCPU.sr[EA_SR(addr)];
119
120 if (sr & SR_T) {
121 // woea
122 // FIXME: implement me
123 PPC_MMU_ERR("sr & T\n");
124 } else {
125 #ifdef TLB
126 for (int i=0; i<4; i++) {
127 if ((addr & ~0xfff) == (gCPU.tlb_va[i])) {
128 gCPU.tlb_last = i;
129 // ht_printf("TLB: %d: %08x -> %08x\n", i, addr, gCPU.tlb_pa[i] | (addr & 0xfff));
130 result = gCPU.tlb_pa[i] | (addr & 0xfff);
131 return PPC_MMU_OK;
132 }
133 }
134 #endif
135 // page address translation
136 if ((flags & PPC_MMU_CODE) && (sr & SR_N)) {
137 // segment isnt executable
138 if (!(flags & PPC_MMU_NO_EXC)) {
139 ppc_exception(PPC_EXC_ISI, PPC_EXC_SRR1_GUARD);
140 return PPC_MMU_EXC;
141 }
142 return PPC_MMU_FATAL;
143 }
144 uint32 offset = EA_Offset(addr); // 12 bit
145 uint32 page_index = EA_PageIndex(addr); // 16 bit
146 uint32 VSID = SR_VSID(sr); // 24 bit
147 uint32 api = EA_API(addr); // 6 bit (part of page_index)
148 // VSID.page_index = Virtual Page Number (VPN)
149
150 // Hashfunction no 1 "xor" .360
151 uint32 hash1 = (VSID ^ page_index);
152 uint32 pteg_addr = ((hash1 & gCPU.pagetable_hashmask)<<6) | gCPU.pagetable_base;
153 for (int i=0; i<8; i++) {
154 uint32 pte;
155 if (ppc_read_physical_word(pteg_addr, pte)) {
156 if (!(flags & PPC_MMU_NO_EXC)) {
157 PPC_MMU_ERR("read physical in address translate failed\n");
158 return PPC_MMU_EXC;
159 }
160 return PPC_MMU_FATAL;
161 }
162 if ((pte & PTE1_V) && (!(pte & PTE1_H))) {
163 if (VSID == PTE1_VSID(pte) && (api == PTE1_API(pte))) {
164 // page found
165 if (ppc_read_physical_word(pteg_addr+4, pte)) {
166 if (!(flags & PPC_MMU_NO_EXC)) {
167 PPC_MMU_ERR("read physical in address translate failed\n");
168 return PPC_MMU_EXC;
169 }
170 return PPC_MMU_FATAL;
171 }
172 // check accessmode .346
173 int key;
174 if (gCPU.msr & MSR_PR) {
175 key = (sr & SR_Kp) ? 4 : 0;
176 } else {
177 key = (sr & SR_Ks) ? 4 : 0;
178 }
179 if (!ppc_pte_protection[((flags&PPC_MMU_WRITE)?8:0) + key + PTE2_PP(pte)]) {
180 if (!(flags & PPC_MMU_NO_EXC)) {
181 if (flags & PPC_MMU_CODE) {
182 PPC_MMU_WARN("correct impl? code + read protection\n");
183 ppc_exception(PPC_EXC_ISI, PPC_EXC_SRR1_PROT, addr);
184 return PPC_MMU_EXC;
185 } else {
186 if (flags & PPC_MMU_WRITE) {
187 ppc_exception(PPC_EXC_DSI, PPC_EXC_DSISR_PROT | PPC_EXC_DSISR_STORE, addr);
188 } else {
189 ppc_exception(PPC_EXC_DSI, PPC_EXC_DSISR_PROT, addr);
190 }
191 return PPC_MMU_EXC;
192 }
193 }
194 return PPC_MMU_FATAL;
195 }
196 // ok..
197 uint32 pap = PTE2_RPN(pte);
198 result = pap | offset;
199 #ifdef TLB
200 gCPU.tlb_last++;
201 gCPU.tlb_last &= 3;
202 gCPU.tlb_pa[gCPU.tlb_last] = pap;
203 gCPU.tlb_va[gCPU.tlb_last] = addr & ~0xfff;
204 // ht_printf("TLB: STORE %d: %08x -> %08x\n", gCPU.tlb_last, addr, pap);
205 #endif
206 // update access bits
207 // FIXME: is someone actually using this?
208 if (flags & PPC_MMU_WRITE) {
209 pte |= PTE2_C | PTE2_R;
210 } else {
211 pte |= PTE2_R;
212 }
213 ppc_write_physical_word(pteg_addr+4, pte);
214 return PPC_MMU_OK;
215 }
216 }
217 pteg_addr+=8;
218 }
219
220 // Hashfunction no 2 "not" .360
221 hash1 = ~hash1;
222 pteg_addr = ((hash1 & gCPU.pagetable_hashmask)<<6) | gCPU.pagetable_base;
223 for (int i=0; i<8; i++) {
224 uint32 pte;
225 if (ppc_read_physical_word(pteg_addr, pte)) {
226 if (!(flags & PPC_MMU_NO_EXC)) {
227 PPC_MMU_ERR("read physical in address translate failed\n");
228 return PPC_MMU_EXC;
229 }
230 return PPC_MMU_FATAL;
231 }
232 if ((pte & PTE1_V) && (pte & PTE1_H)) {
233 if (VSID == PTE1_VSID(pte) && (api == PTE1_API(pte))) {
234 // page found
235 if (ppc_read_physical_word(pteg_addr+4, pte)) {
236 if (!(flags & PPC_MMU_NO_EXC)) {
237 PPC_MMU_ERR("read physical in address translate failed\n");
238 return PPC_MMU_EXC;
239 }
240 return PPC_MMU_FATAL;
241 }
242 // check accessmode
243 int key;
244 if (gCPU.msr & MSR_PR) {
245 key = (sr & SR_Kp) ? 4 : 0;
246 } else {
247 key = (sr & SR_Ks) ? 4 : 0;
248 }
249 if (!ppc_pte_protection[((flags&PPC_MMU_WRITE)?8:0) + key + PTE2_PP(pte)]) {
250 if (!(flags & PPC_MMU_NO_EXC)) {
251 if (flags & PPC_MMU_CODE) {
252 PPC_MMU_WARN("correct impl? code + read protection\n");
253 ppc_exception(PPC_EXC_ISI, PPC_EXC_SRR1_PROT, addr);
254 return PPC_MMU_EXC;
255 } else {
256 if (flags & PPC_MMU_WRITE) {
257 ppc_exception(PPC_EXC_DSI, PPC_EXC_DSISR_PROT | PPC_EXC_DSISR_STORE, addr);
258 } else {
259 ppc_exception(PPC_EXC_DSI, PPC_EXC_DSISR_PROT, addr);
260 }
261 return PPC_MMU_EXC;
262 }
263 }
264 return PPC_MMU_FATAL;
265 }
266 // ok..
267 result = PTE2_RPN(pte) | offset;
268
269 // update access bits
270 // FIXME: is someone actually using this?
271 if (flags & PPC_MMU_WRITE) {
272 pte |= PTE2_C | PTE2_R;
273 } else {
274 pte |= PTE2_R;
275 }
276 ppc_write_physical_word(pteg_addr+4, pte);
277 // PPC_MMU_WARN("hash function 2 used!\n");
278 // gSinglestep = true;
279 return PPC_MMU_OK;
280 }
281 }
282 pteg_addr+=8;
283 }
284 }
285 // page fault
286 if (!(flags & PPC_MMU_NO_EXC)) {
287 if (flags & PPC_MMU_CODE) {
288 ppc_exception(PPC_EXC_ISI, PPC_EXC_SRR1_PAGE);
289 } else {
290 if (flags & PPC_MMU_WRITE) {
291 ppc_exception(PPC_EXC_DSI, PPC_EXC_DSISR_PAGE | PPC_EXC_DSISR_STORE, addr);
292 } else {
293 ppc_exception(PPC_EXC_DSI, PPC_EXC_DSISR_PAGE, addr);
294 }
295 }
296 return PPC_MMU_EXC;
297 }
298 return PPC_MMU_FATAL;
299 }
300
301 int FASTCALL ppc_effective_to_physical_vm(uint32 addr, int flags, uint32 &result)
302 {
303 if (!(gCPU.msr & MSR_DR)) {
304 result = addr;
305 return PPC_MMU_READ | PPC_MMU_WRITE;
306 }
307 /*
308 * BAT translation .329
309 */
310 for (int i=0; i<4; i++) {
311 if ((addr & gCPU.dbat_bl[i]) == gCPU.dbat_bepi[i]) {
312 // bat applies to this address
313 if (((gCPU.dbatu[i] & BATU_Vs) && !(gCPU.msr & MSR_PR))
314 || ((gCPU.dbatu[i] & BATU_Vp) && (gCPU.msr & MSR_PR))) {
315 // bat entry valid
316 addr &= gCPU.dbat_nbl[i];
317 addr |= gCPU.dbat_brpn[i];
318 result = addr;
319 // FIXME: check access rights
320 return PPC_MMU_OK;
321 }
322 }
323 }
324
325 /*
326 * Address translation with segment register
327 */
328 uint32 sr = gCPU.sr[EA_SR(addr)];
329
330 if (sr & SR_T) {
331 // woea
332 // FIXME: implement me
333 PPC_MMU_ERR("sr & T\n");
334 } else {
335 // page address translation
336 uint32 offset = EA_Offset(addr); // 12 bit
337 uint32 page_index = EA_PageIndex(addr); // 16 bit
338 uint32 VSID = SR_VSID(sr); // 24 bit
339 uint32 api = EA_API(addr); // 6 bit (part of page_index)
340 // VSID.page_index = Virtual Page Number (VPN)
341
342 // Hashfunction no 1 "xor" .360
343 uint32 hash1 = (VSID ^ page_index);
344 uint32 pteg_addr = ((hash1 & gCPU.pagetable_hashmask)<<6) | gCPU.pagetable_base;
345 for (int i=0; i<8; i++) {
346 uint32 pte;
347 if (ppc_read_physical_word(pteg_addr, pte)) {
348 return PPC_MMU_FATAL;
349 }
350 if ((pte & PTE1_V) && (!(pte & PTE1_H))) {
351 if (VSID == PTE1_VSID(pte) && (api == PTE1_API(pte))) {
352 // page found
353 if (ppc_read_physical_word(pteg_addr+4, pte)) {
354 return 0;
355 }
356 // check accessmode .346
357 int key;
358 if (gCPU.msr & MSR_PR) {
359 key = (sr & SR_Kp) ? 4 : 0;
360 } else {
361 key = (sr & SR_Ks) ? 4 : 0;
362 }
363 int ret = PPC_MMU_WRITE | PPC_MMU_READ;
364 if (!ppc_pte_protection[8 + key + PTE2_PP(pte)]) {
365 if (!(flags & PPC_MMU_NO_EXC)) {
366 if (flags & PPC_MMU_WRITE) {
367 gCPU.dsisr = PPC_EXC_DSISR_PROT | PPC_EXC_DSISR_STORE;
368 }
369 }
370 ret &= ~PPC_MMU_WRITE;
371 }
372 if (!ppc_pte_protection[key + PTE2_PP(pte)]) {
373 if (!(flags & PPC_MMU_NO_EXC)) {
374 if (!(flags & PPC_MMU_WRITE)) {
375 gCPU.dsisr = PPC_EXC_DSISR_PROT;
376 }
377 }
378 return PPC_MMU_OK;
379 }
380 // ok..
381 uint32 pap = PTE2_RPN(pte);
382 result = pap | offset;
383 // update access bits
384 if (ret & PPC_MMU_WRITE) {
385 pte |= PTE2_C | PTE2_R;
386 } else {
387 pte |= PTE2_R;
388 }
389 ppc_write_physical_word(pteg_addr+4, pte);
390 return ret;
391 }
392 }
393 pteg_addr+=8;
394 }
395
396 // Hashfunction no 2 "not" .360
397 hash1 = ~hash1;
398 pteg_addr = ((hash1 & gCPU.pagetable_hashmask)<<6) | gCPU.pagetable_base;
399 for (int i=0; i<8; i++) {
400 uint32 pte;
401 if (ppc_read_physical_word(pteg_addr, pte)) {
402 return PPC_MMU_FATAL;
403 }
404 if ((pte & PTE1_V) && (pte & PTE1_H)) {
405 if (VSID == PTE1_VSID(pte) && (api == PTE1_API(pte))) {
406 // page found
407 if (ppc_read_physical_word(pteg_addr+4, pte)) {
408 return 0;
409 }
410 // check accessmode
411 int key;
412 if (gCPU.msr & MSR_PR) {
413 key = (sr & SR_Kp) ? 4 : 0;
414 } else {
415 key = (sr & SR_Ks) ? 4 : 0;
416 }
417 int ret = PPC_MMU_WRITE | PPC_MMU_READ;
418 if (!ppc_pte_protection[8 + key + PTE2_PP(pte)]) {
419 if (!(flags & PPC_MMU_NO_EXC)) {
420 if (flags & PPC_MMU_WRITE) {
421 gCPU.dsisr = PPC_EXC_DSISR_PROT | PPC_EXC_DSISR_STORE;
422 }
423 }
424 ret &= ~PPC_MMU_WRITE;
425 }
426 if (!ppc_pte_protection[key + PTE2_PP(pte)]) {
427 if (!(flags & PPC_MMU_NO_EXC)) {
428 if (!(flags & PPC_MMU_WRITE)) {
429 gCPU.dsisr = PPC_EXC_DSISR_PROT;
430 }
431 }
432 return PPC_MMU_OK;
433 }
434 // ok..
435 result = PTE2_RPN(pte) | offset;
436
437 // update access bits
438 if (ret & PPC_MMU_WRITE) {
439 pte |= PTE2_C | PTE2_R;
440 } else {
441 pte |= PTE2_R;
442 }
443 ppc_write_physical_word(pteg_addr+4, pte);
444 return ret;
445 }
446 }
447 pteg_addr+=8;
448 }
449 }
450 // page fault
451 if (!(flags & PPC_MMU_NO_EXC)) {
452 if (flags & PPC_MMU_WRITE) {
453 gCPU.dsisr = PPC_EXC_DSISR_PAGE | PPC_EXC_DSISR_STORE;
454 } else {
455 gCPU.dsisr = PPC_EXC_DSISR_PAGE;
456 }
457 }
458 return PPC_MMU_OK;
459 }
460
461 void ppc_mmu_tlb_invalidate()
462 {
463 gCPU.effective_code_page = 0xffffffff;
464 ppc_mmu_tlb_invalidate_all_asm();
465 }
466
467 /*
468 pagetable:
469 min. 2^10 (64k) PTEGs
470 PTEG = 64byte
471 The page table can be any size 2^n where 16 <= n <= 25.
472
473 A PTEG contains eight
474 PTEs of eight bytes each; therefore, each PTEG is 64 bytes long.
475 */
476
477 bool FASTCALL ppc_mmu_set_sdr1(uint32 newval, bool quiesce)
478 {
479 /* if (newval == gCPU.sdr1)*/ quiesce = false;
480 PPC_MMU_TRACE("new pagetable: sdr1 = 0x%08x\n", newval);
481 uint32 htabmask = SDR1_HTABMASK(newval);
482 uint32 x = 1;
483 uint32 xx = 0;
484 int n = 0;
485 while ((htabmask & x) && (n < 9)) {
486 n++;
487 xx|=x;
488 x<<=1;
489 }
490 if (htabmask & ~xx) {
491 PPC_MMU_WARN("new pagetable: broken htabmask (%05x)\n", htabmask);
492 return false;
493 }
494 uint32 htaborg = SDR1_HTABORG(newval);
495 if (htaborg & xx) {
496 PPC_MMU_WARN("new pagetable: broken htaborg (%05x)\n", htaborg);
497 return false;
498 }
499 gCPU.pagetable_base = htaborg<<16;
500 gCPU.sdr1 = newval;
501 gCPU.pagetable_hashmask = ((xx<<10)|0x3ff);
502 uint a = (0xffffffff & gCPU.pagetable_hashmask) | gCPU.pagetable_base;
503 if (a > gMemorySize) {
504 PPC_MMU_WARN("new pagetable: not in memory (%08x)\n", a);
505 return false;
506 }
507 PPC_MMU_TRACE("new pagetable: sdr1 accepted\n");
508 PPC_MMU_TRACE("number of pages: 2^%d pagetable_start: 0x%08x size: 2^%d\n", n+13, gCPU.pagetable_base, n+16);
509 if (quiesce) {
510 prom_quiesce();
511 }
512 return true;
513 }
514
515 int FASTCALL ppc_direct_physical_memory_handle(uint32 addr, byte *&ptr)
516 {
517 if (addr < gMemorySize) {
518 ptr = &gMemory[addr];
519 return PPC_MMU_OK;
520 }
521 return PPC_MMU_FATAL;
522 }
523
524 int FASTCALL ppc_direct_effective_memory_handle(uint32 addr, byte *&ptr)
525 {
526 uint32 ea;
527 int r;
528 if (!((r = ppc_effective_to_physical(addr, PPC_MMU_READ, ea)))) {
529 return ppc_direct_physical_memory_handle(ea, ptr);
530 }
531 return r;
532 }
533
534 int FASTCALL ppc_direct_effective_memory_handle_code(uint32 addr, byte *&ptr)
535 {
536 uint32 ea;
537 int r;
538 if (!((r = ppc_effective_to_physical(addr, PPC_MMU_READ | PPC_MMU_CODE, ea)))) {
539 return ppc_direct_physical_memory_handle(ea, ptr);
540 }
541 return r;
542 }
543
544 int FASTCALL ppc_read_physical_qword(uint32 addr, Vector_t &result)
545 {
546 if (addr < gMemorySize) {
547 // big endian
548 VECT_D(result,0) = ppc_dword_from_BE(*((uint64*)(gMemory+addr)));
549 VECT_D(result,1) = ppc_dword_from_BE(*((uint64*)(gMemory+addr+8)));
550 return PPC_MMU_OK;
551 }
552 return io_mem_read128(addr, (uint128 *)&result);
553 }
554
555 int FASTCALL ppc_read_physical_dword(uint32 addr, uint64 &result)
556 {
557 if (addr < gMemorySize) {
558 // big endian
559 result = ppc_dword_from_BE(*((uint64*)(gMemory+addr)));
560 return PPC_MMU_OK;
561 }
562 int ret = io_mem_read64(addr, result);
563 result = ppc_bswap_dword(result);
564 return ret;
565 }
566
567 int FASTCALL ppc_read_physical_word(uint32 addr, uint32 &result)
568 {
569 if (addr < gMemorySize) {
570 // big endian
571 result = ppc_word_from_BE(*((uint32*)(gMemory+addr)));
572 return PPC_MMU_OK;
573 }
574 int ret = io_mem_read(addr, result, 4);
575 result = ppc_bswap_word(result);
576 return ret;
577 }
578
579 int FASTCALL ppc_read_physical_half(uint32 addr, uint16 &result)
580 {
581 if (addr < gMemorySize) {
582 // big endian
583 result = ppc_half_from_BE(*((uint16*)(gMemory+addr)));
584 return PPC_MMU_OK;
585 }
586 uint32 r;
587 int ret = io_mem_read(addr, r, 2);
588 result = ppc_bswap_half(r);
589 return ret;
590 }
591
592 int FASTCALL ppc_read_physical_byte(uint32 addr, uint8 &result)
593 {
594 if (addr < gMemorySize) {
595 // big endian
596 result = gMemory[addr];
597 return PPC_MMU_OK;
598 }
599 uint32 r;
600 int ret = io_mem_read(addr, r, 1);
601 result = r;
602 return ret;
603 }
604
605 int FASTCALL ppc_read_effective_code(uint32 addr, uint32 &result)
606 {
607 if (addr & 3) {
608 // EXC..bla
609 return PPC_MMU_FATAL;
610 }
611 uint32 p;
612 int r;
613 if (!((r=ppc_effective_to_physical(addr, PPC_MMU_READ | PPC_MMU_CODE, p)))) {
614 return ppc_read_physical_word(p, result);
615 }
616 return r;
617 }
618
619 int FASTCALL ppc_read_effective_qword(uint32 addr, Vector_t &result)
620 {
621 uint32 p;
622 int r;
623
624 addr &= ~0x0f;
625
626 if (!(r = ppc_effective_to_physical(addr, PPC_MMU_READ, p))) {
627 return ppc_read_physical_qword(p, result);
628 }
629
630 return r;
631 }
632
633 int FASTCALL ppc_read_effective_dword(uint32 addr, uint64 &result)
634 {
635 uint32 p;
636 int r;
637 if (!(r = ppc_effective_to_physical(addr, PPC_MMU_READ, p))) {
638 if (EA_Offset(addr) > 4088) {
639 // read overlaps two pages.. tricky
640 byte *r1, *r2;
641 byte b[14];
642 ppc_effective_to_physical((addr & ~0xfff)+4089, PPC_MMU_READ, p);
643 if ((r = ppc_direct_physical_memory_handle(p, r1))) return r;
644 if ((r = ppc_effective_to_physical((addr & ~0xfff)+4096, PPC_MMU_READ, p))) return r;
645 if ((r = ppc_direct_physical_memory_handle(p, r2))) return r;
646 memmove(&b[0], r1, 7);
647 memmove(&b[7], r2, 7);
648 memmove(&result, &b[EA_Offset(addr)-4089], 8);
649 result = ppc_dword_from_BE(result);
650 return PPC_MMU_OK;
651 } else {
652 return ppc_read_physical_dword(p, result);
653 }
654 }
655 return r;
656 }
657
658 int FASTCALL ppc_read_effective_word(uint32 addr, uint32 &result)
659 {
660 uint32 p;
661 int r;
662 if (!(r = ppc_effective_to_physical(addr, PPC_MMU_READ, p))) {
663 if (EA_Offset(addr) > 4092) {
664 // read overlaps two pages.. tricky
665 byte *r1, *r2;
666 byte b[6];
667 ppc_effective_to_physical((addr & ~0xfff)+4093, PPC_MMU_READ, p);
668 if ((r = ppc_direct_physical_memory_handle(p, r1))) return r;
669 if ((r = ppc_effective_to_physical((addr & ~0xfff)+4096, PPC_MMU_READ, p))) return r;
670 if ((r = ppc_direct_physical_memory_handle(p, r2))) return r;
671 memmove(&b[0], r1, 3);
672 memmove(&b[3], r2, 3);
673 memmove(&result, &b[EA_Offset(addr)-4093], 4);
674 result = ppc_word_from_BE(result);
675 return PPC_MMU_OK;
676 } else {
677 return ppc_read_physical_word(p, result);
678 }
679 }
680 return r;
681 }
682
683 int FASTCALL ppc_read_effective_half(uint32 addr, uint16 &result)
684 {
685 uint32 p;
686 int r;
687 if (!((r = ppc_effective_to_physical(addr, PPC_MMU_READ, p)))) {
688 if (EA_Offset(addr) > 4094) {
689 // read overlaps two pages.. tricky
690 byte b1, b2;
691 ppc_effective_to_physical((addr & ~0xfff)+4095, PPC_MMU_READ, p);
692 if ((r = ppc_read_physical_byte(p, b1))) return r;
693 if ((r = ppc_effective_to_physical((addr & ~0xfff)+4096, PPC_MMU_READ, p))) return r;
694 if ((r = ppc_read_physical_byte(p, b2))) return r;
695 result = (b1<<8)|b2;
696 return PPC_MMU_OK;
697 } else {
698 return ppc_read_physical_half(p, result);
699 }
700 }
701 return r;
702 }
703
704 int FASTCALL ppc_read_effective_byte(uint32 addr, uint8 &result)
705 {
706 uint32 p;
707 int r;
708 if (!((r = ppc_effective_to_physical(addr, PPC_MMU_READ, p)))) {
709 return ppc_read_physical_byte(p, result);
710 }
711 return r;
712 }
713
714 int FASTCALL ppc_write_physical_qword(uint32 addr, Vector_t data)
715 {
716 if (addr < gMemorySize) {
717 // big endian
718 *((uint64*)(gMemory+addr)) = ppc_dword_to_BE(VECT_D(data,0));
719 *((uint64*)(gMemory+addr+8)) = ppc_dword_to_BE(VECT_D(data,1));
720 return PPC_MMU_OK;
721 }
722 if (io_mem_write128(addr, (uint128 *)&data) == IO_MEM_ACCESS_OK) {
723 return PPC_MMU_OK;
724 } else {
725 return PPC_MMU_FATAL;
726 }
727 }
728
729 int FASTCALL ppc_write_physical_dword(uint32 addr, uint64 data)
730 {
731 if (addr < gMemorySize) {
732 // big endian
733 *((uint64*)(gMemory+addr)) = ppc_dword_to_BE(data);
734 return PPC_MMU_OK;
735 }
736 if (io_mem_write64(addr, ppc_bswap_dword(data)) == IO_MEM_ACCESS_OK) {
737 return PPC_MMU_OK;
738 } else {
739 return PPC_MMU_FATAL;
740 }
741 }
742
743 int FASTCALL ppc_write_physical_word(uint32 addr, uint32 data)
744 {
745 if (addr < gMemorySize) {
746 // big endian
747 *((uint32*)(gMemory+addr)) = ppc_word_to_BE(data);
748 return PPC_MMU_OK;
749 }
750 return io_mem_write(addr, ppc_bswap_word(data), 4);
751 }
752
753 int FASTCALL ppc_write_physical_half(uint32 addr, uint16 data)
754 {
755 if (addr < gMemorySize) {
756 // big endian
757 *((uint16*)(gMemory+addr)) = ppc_half_to_BE(data);
758 return PPC_MMU_OK;
759 }
760 return io_mem_write(addr, ppc_bswap_half(data), 2);
761 }
762
763 int FASTCALL ppc_write_physical_byte(uint32 addr, uint8 data)
764 {
765 if (addr < gMemorySize) {
766 // big endian
767 gMemory[addr] = data;
768 return PPC_MMU_OK;
769 }
770 return io_mem_write(addr, data, 1);
771 }
772
773 int FASTCALL ppc_write_effective_qword(uint32 addr, Vector_t data)
774 {
775 uint32 p;
776 int r;
777
778 addr &= ~0x0f;
779
780 if (!((r=ppc_effective_to_physical(addr, PPC_MMU_WRITE, p)))) {
781 return ppc_write_physical_qword(p, data);
782 }
783 return r;
784 }
785
786 int FASTCALL ppc_write_effective_dword(uint32 addr, uint64 data)
787 {
788 uint32 p;
789 int r;
790 if (!((r=ppc_effective_to_physical(addr, PPC_MMU_WRITE, p)))) {
791 if (EA_Offset(addr) > 4088) {
792 // write overlaps two pages.. tricky
793 byte *r1, *r2;
794 byte b[14];
795 ppc_effective_to_physical((addr & ~0xfff)+4089, PPC_MMU_WRITE, p);
796 if ((r = ppc_direct_physical_memory_handle(p, r1))) return r;
797 if ((r = ppc_effective_to_physical((addr & ~0xfff)+4096, PPC_MMU_WRITE, p))) return r;
798 if ((r = ppc_direct_physical_memory_handle(p, r2))) return r;
799 data = ppc_dword_to_BE(data);
800 memmove(&b[0], r1, 7);
801 memmove(&b[7], r2, 7);
802 memmove(&b[EA_Offset(addr)-4089], &data, 8);
803 memmove(r1, &b[0], 7);
804 memmove(r2, &b[7], 7);
805 return PPC_MMU_OK;
806 } else {
807 return ppc_write_physical_dword(p, data);
808 }
809 }
810 return r;
811 }
812
813 int FASTCALL ppc_write_effective_word(uint32 addr, uint32 data)
814 {
815 uint32 p;
816 int r;
817 if (!((r=ppc_effective_to_physical(addr, PPC_MMU_WRITE, p)))) {
818 if (EA_Offset(addr) > 4092) {
819 // write overlaps two pages.. tricky
820 byte *r1, *r2;
821 byte b[6];
822 ppc_effective_to_physical((addr & ~0xfff)+4093, PPC_MMU_WRITE, p);
823 if ((r = ppc_direct_physical_memory_handle(p, r1))) return r;
824 if ((r = ppc_effective_to_physical((addr & ~0xfff)+4096, PPC_MMU_WRITE, p))) return r;
825 if ((r = ppc_direct_physical_memory_handle(p, r2))) return r;
826 data = ppc_word_to_BE(data);
827 memmove(&b[0], r1, 3);
828 memmove(&b[3], r2, 3);
829 memmove(&b[EA_Offset(addr)-4093], &data, 4);
830 memmove(r1, &b[0], 3);
831 memmove(r2, &b[3], 3);
832 return PPC_MMU_OK;
833 } else {
834 return ppc_write_physical_word(p, data);
835 }
836 }
837 return r;
838 }
839
840 int FASTCALL ppc_write_effective_half(uint32 addr, uint16 data)
841 {
842 uint32 p;
843 int r;
844 if (!((r=ppc_effective_to_physical(addr, PPC_MMU_WRITE, p)))) {
845 if (EA_Offset(addr) > 4094) {
846 // write overlaps two pages.. tricky
847 ppc_effective_to_physical((addr & ~0xfff)+4095, PPC_MMU_WRITE, p);
848 if ((r = ppc_write_physical_byte(p, data>>8))) return r;
849 if ((r = ppc_effective_to_physical((addr & ~0xfff)+4096, PPC_MMU_WRITE, p))) return r;
850 if ((r = ppc_write_physical_byte(p, data))) return r;
851 return PPC_MMU_OK;
852 } else {
853 return ppc_write_physical_half(p, data);
854 }
855 }
856 return r;
857 }
858
859 int FASTCALL ppc_write_effective_byte(uint32 addr, uint8 data)
860 {
861 uint32 p;
862 int r;
863 if (!((r=ppc_effective_to_physical(addr, PPC_MMU_WRITE, p)))) {
864 return ppc_write_physical_byte(p, data);
865 }
866 return r;
867 }
868
869 bool FASTCALL ppc_init_physical_memory(uint size)
870 {
871 if (size < 64*1024*1024) {
872 PPC_MMU_ERR("Main memory size must >= 64MB!\n");
873 }
874 gMemory = (byte*)malloc(size+16);
875 if ((uint32)gMemory & 0x0f) {
876 gMemory += 16 - ((uint32)gMemory & 0x0f);
877 }
878 gMemorySize = size;
879 return gMemory != NULL;
880 }
881
882 uint32 ppc_get_memory_size()
883 {
884 return gMemorySize;
885 }
886
887 /***************************************************************************
888 * DMA Interface
889 */
890
891 bool ppc_dma_write(uint32 dest, const void *src, uint32 size)
892 {
893 if (dest > gMemorySize || (dest+size) > gMemorySize) return false;
894
895 byte *ptr;
896 ppc_direct_physical_memory_handle(dest, ptr);
897
898 memcpy(ptr, src, size);
899 return true;
900 }
901
902 bool ppc_dma_read(void *dest, uint32 src, uint32 size)
903 {
904 if (src > gMemorySize || (src+size) > gMemorySize) return false;
905
906 byte *ptr;
907 ppc_direct_physical_memory_handle(src, ptr);
908
909 memcpy(dest, ptr, size);
910 return true;
911 }
912
913 bool ppc_dma_set(uint32 dest, int c, uint32 size)
914 {
915 if (dest > gMemorySize || (dest+size) > gMemorySize) return false;
916
917 byte *ptr;
918 ppc_direct_physical_memory_handle(dest, ptr);
919
920 memset(ptr, c, size);
921 return true;
922 }
923
924
925 /***************************************************************************
926 * DEPRECATED prom interface
927 */
928 bool ppc_prom_set_sdr1(uint32 newval, bool quiesce)
929 {
930 return ppc_mmu_set_sdr1(newval, quiesce);
931 }
932
933 bool ppc_prom_effective_to_physical(uint32 &result, uint32 ea)
934 {
935 return ppc_effective_to_physical(ea, PPC_MMU_READ|PPC_MMU_SV|PPC_MMU_NO_EXC, result) == PPC_MMU_OK;
936 }
937
938 bool ppc_prom_page_create(uint32 ea, uint32 pa)
939 {
940 uint32 sr = gCPU.sr[EA_SR(ea)];
941 uint32 page_index = EA_PageIndex(ea); // 16 bit
942 uint32 VSID = SR_VSID(sr); // 24 bit
943 uint32 api = EA_API(ea); // 6 bit (part of page_index)
944 uint32 hash1 = (VSID ^ page_index);
945 uint32 pte, pte2;
946 uint32 h = 0;
947 for (int j=0; j<2; j++) {
948 uint32 pteg_addr = ((hash1 & gCPU.pagetable_hashmask)<<6) | gCPU.pagetable_base;
949 for (int i=0; i<8; i++) {
950 if (ppc_read_physical_word(pteg_addr, pte)) {
951 PPC_MMU_ERR("read physical in address translate failed\n");
952 return false;
953 }
954 if (!(pte & PTE1_V)) {
955 // free pagetable entry found
956 pte = PTE1_V | (VSID << 7) | h | api;
957 pte2 = (PA_RPN(pa) << 12) | 0;
958 if (ppc_write_physical_word(pteg_addr, pte)
959 || ppc_write_physical_word(pteg_addr+4, pte2)) {
960 return false;
961 } else {
962 // ok
963 return true;
964 }
965 }
966 pteg_addr+=8;
967 }
968 hash1 = ~hash1;
969 h = PTE1_H;
970 }
971 return false;
972 }
973
974 bool ppc_prom_page_free(uint32 ea)
975 {
976 return true;
977 }
978
979 /***************************************************************************
980 * MMU Opcodes
981 */
982
983 #include "ppc_dec.h"
984
985 /*
986 * puts the sum of cr1 and cr2 into EAX
987 * (in the most clever way)
988 */
989 static void getEAXRsum(PPC_Register cr1, PPC_Register cr2)
990 {
991 NativeReg r1 = jitcGetClientRegisterMapping(cr1);
992 NativeReg r2 = jitcGetClientRegisterMapping(cr2);
993 if (r1 == EAX) {
994 /* intentional left empty */
995 } else if (r2 == EAX) {
996 if (r1 == REG_NO) {
997 byte modrm[6];
998 asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
999 } else {
1000 asmALURegReg(X86_ADD, EAX, r1);
1001 }
1002 return;
1003 } else {
1004 /*
1005 * We load cr1 into EAX but have to clobber it since
1006 * we're going to modify EAX.
1007 */
1008 jitcGetClientRegister(cr1, NATIVE_REG | EAX);
1009 }
1010 jitcClobberRegister(NATIVE_REG | EAX);
1011 r2 = jitcGetClientRegisterMapping(cr2);
1012 if (r2 == REG_NO) {
1013 byte modrm[6];
1014 asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1015 } else {
1016 asmALURegReg(X86_ADD, EAX, r2);
1017 }
1018 }
1019
1020 static void getEAX_0_Rsum(PPC_Register cr1, PPC_Register cr2)
1021 {
1022 if (cr1 == PPC_GPR(0)) {
1023 jitcGetClientRegister(cr2, NATIVE_REG | EAX);
1024 } else {
1025 getEAXRsum(cr1, cr2);
1026 }
1027 }
1028
1029 static void getEAXRsumAndEDX(PPC_Register cr1, PPC_Register cr2, PPC_Register cr3)
1030 {
1031 NativeReg r1 = jitcGetClientRegisterMapping(cr1);
1032 NativeReg r2 = jitcGetClientRegisterMapping(cr2);
1033 if (r1 == EAX) {
1034 jitcTouchRegister(EAX);
1035 jitcClobberRegister(NATIVE_REG | EDX);
1036 if (cr1 == cr3) {
1037 asmALURegReg(X86_MOV, EDX, EAX);
1038 } else {
1039 jitcGetClientRegister(cr3, NATIVE_REG | EDX);
1040 }
1041 r2 = jitcGetClientRegisterMapping(cr2);
1042 if (r2 == REG_NO) {
1043 byte modrm[6];
1044 asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1045 return;
1046 } else {
1047 asmALURegReg(X86_ADD, EAX, r2);
1048 return;
1049 }
1050 } else if (r2 == EAX) {
1051 jitcTouchRegister(EAX);
1052 jitcClobberRegister(NATIVE_REG | EDX);
1053 if (cr2 == cr3) {
1054 asmALURegReg(X86_MOV, EDX, EAX);
1055 } else {
1056 jitcGetClientRegister(cr3, NATIVE_REG | EDX);
1057 }
1058 r1 = jitcGetClientRegisterMapping(cr1);
1059 if (r1 == REG_NO) {
1060 byte modrm[6];
1061 asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1062 return;
1063 }
1064 asmALURegReg(X86_ADD, EAX, r1);
1065 return;
1066 } else {
1067 jitcGetClientRegister(cr1, NATIVE_REG | EAX);
1068 jitcClobberRegister(NATIVE_REG | EDX);
1069 if (cr1 == cr3) {
1070 asmALURegReg(X86_MOV, EDX, EAX);
1071 } else {
1072 jitcGetClientRegister(cr3, NATIVE_REG | EDX);
1073 }
1074 }
1075 // FIXME: what if mapping of cr3==EDX?
1076 r2 = jitcGetClientRegisterMapping(cr2);
1077 if (r2 == REG_NO) {
1078 byte modrm[6];
1079 asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1080 } else {
1081 asmALURegReg(X86_ADD, EAX, r2);
1082 }
1083 }
1084
1085 static void getEAX_0_RsumAndEDX(PPC_Register cr1, PPC_Register cr2, PPC_Register cr3)
1086 {
1087 if (cr1 == PPC_GPR(0)) {
1088 if (jitcGetClientRegisterMapping(cr2) == EDX) jitcTouchRegister(EDX);
1089 jitcGetClientRegister(cr2, NATIVE_REG | EAX);
1090 if (cr2 == cr3) {
1091 asmALURegReg(X86_MOV, EDX, EAX);
1092 } else {
1093 jitcGetClientRegister(cr3, NATIVE_REG | EDX);
1094 }
1095 } else {
1096 getEAXRsumAndEDX(cr1, cr2, cr3);
1097 }
1098 }
1099
1100 /*
1101 * puts the sum of cr1 and imm into EAX
1102 * (in the most clever way)
1103 */
1104 static void getEAXIsum(PPC_Register cr1, uint32 imm)
1105 {
1106 jitcGetClientRegister(cr1, NATIVE_REG | EAX);
1107 if (imm) {
1108 asmALURegImm(X86_ADD, EAX, imm);
1109 }
1110 }
1111
1112 static void getEAX_0_Isum(PPC_Register cr1, uint32 imm)
1113 {
1114 if (cr1 == PPC_GPR(0)) {
1115 asmALURegImm(X86_MOV, EAX, imm);
1116 } else {
1117 getEAXIsum(cr1, imm);
1118 }
1119 }
1120
1121 static void getEAXIsumAndEDX(PPC_Register cr1, uint32 imm, PPC_Register cr2)
1122 {
1123 if (jitcGetClientRegisterMapping(cr2) == EDX) jitcTouchRegister(EDX);
1124 jitcGetClientRegister(cr1, NATIVE_REG | EAX);
1125 if (cr1 == cr2) {
1126 asmALURegReg(X86_MOV, EDX, EAX);
1127 } else {
1128 jitcGetClientRegister(cr2, NATIVE_REG | EDX);
1129 }
1130 jitcClobberRegister(NATIVE_REG | EAX);
1131 if (imm) {
1132 asmALURegImm(X86_ADD, EAX, imm);
1133 }
1134 }
1135
1136 static void getEAX_0_IsumAndEDX(PPC_Register cr1, uint32 imm, PPC_Register cr2)
1137 {
1138 if (cr1 == PPC_GPR(0)) {
1139 jitcGetClientRegister(cr2, NATIVE_REG | EDX);
1140 jitcClobberRegister(NATIVE_REG | EAX);
1141 asmALURegImm(X86_MOV, EAX, imm);
1142 } else {
1143 getEAXIsumAndEDX(cr1, imm, cr2);
1144 }
1145 }
1146
1147
1148 void ppc_opc_gen_helper_l(PPC_Register cr1, uint32 imm)
1149 {
1150 jitcClobberCarryAndFlags();
1151 jitcFlushRegister();
1152 getEAX_0_Isum(cr1, imm);
1153 jitcClobberAll();
1154 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1155 }
1156
1157 void ppc_opc_gen_helper_lu(PPC_Register cr1, uint32 imm)
1158 {
1159 jitcClobberCarryAndFlags();
1160 jitcFlushRegister();
1161 getEAXIsum(cr1, imm);
1162 jitcClobberAll();
1163 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1164 }
1165
1166 void ppc_opc_gen_helper_lux(PPC_Register cr1, PPC_Register cr2)
1167 {
1168 jitcClobberCarryAndFlags();
1169 jitcFlushRegister();
1170 getEAXRsum(cr1, cr2);
1171 jitcClobberAll();
1172 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1173 }
1174
1175 void ppc_opc_gen_helper_lx(PPC_Register cr1, PPC_Register cr2)
1176 {
1177 jitcClobberCarryAndFlags();
1178 jitcFlushRegister();
1179 getEAX_0_Rsum(cr1, cr2);
1180 jitcClobberAll();
1181 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1182 }
1183
1184 void ppc_opc_gen_helper_st(PPC_Register cr1, uint32 imm, PPC_Register cr2)
1185 {
1186 jitcClobberCarryAndFlags();
1187 jitcFlushRegister();
1188 getEAX_0_IsumAndEDX(cr1, imm, cr2);
1189 jitcClobberAll();
1190 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1191 }
1192
1193 void ppc_opc_gen_helper_stu(PPC_Register cr1, uint32 imm, PPC_Register cr2)
1194 {
1195 jitcClobberCarryAndFlags();
1196 jitcFlushRegister();
1197 getEAXIsumAndEDX(cr1, imm, cr2);
1198 jitcClobberAll();
1199 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1200 }
1201
1202 void ppc_opc_gen_helper_stux(PPC_Register cr1, PPC_Register cr2, PPC_Register cr3)
1203 {
1204 jitcClobberCarryAndFlags();
1205 jitcFlushRegister();
1206 getEAXRsumAndEDX(cr1, cr2, cr3);
1207 jitcClobberAll();
1208 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1209 }
1210
1211 void ppc_opc_gen_helper_stx(PPC_Register cr1, PPC_Register cr2, PPC_Register cr3)
1212 {
1213 jitcClobberCarryAndFlags();
1214 jitcFlushRegister();
1215 getEAX_0_RsumAndEDX(cr1, cr2, cr3);
1216 jitcClobberAll();
1217 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1218 }
1219 /*
1220
1221 void ppc_opc_gen_helper_l(PPC_Register cr1, uint32 imm)
1222 {
1223 jitcClobberAll();
1224 byte modrm[6];
1225 if (cr1 == PPC_GPR(0)) {
1226 asmALURegImm(X86_MOV, EAX, imm);
1227 } else {
1228 asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1229 asmALURegImm(X86_ADD, EAX, imm);
1230 }
1231 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1232 }
1233
1234 void ppc_opc_gen_helper_lu(PPC_Register cr1, uint32 imm)
1235 {
1236 jitcClobberAll();
1237 byte modrm[6];
1238 asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1239 asmALURegImm(X86_ADD, EAX, imm);
1240 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1241 }
1242
1243 void ppc_opc_gen_helper_lux(PPC_Register cr1, PPC_Register cr2)
1244 {
1245 jitcClobberAll();
1246 byte modrm[6];
1247 asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1248 asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1249 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1250 }
1251
1252 void ppc_opc_gen_helper_lx(PPC_Register cr1, PPC_Register cr2)
1253 {
1254 jitcClobberAll();
1255 byte modrm[6];
1256 if (cr1 == PPC_GPR(0)) {
1257 asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1258 } else {
1259 asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1260 asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1261 }
1262 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1263 }
1264
1265 void ppc_opc_gen_helper_st(PPC_Register cr1, uint32 imm, PPC_Register cr2)
1266 {
1267 jitcClobberAll();
1268 byte modrm[6];
1269 if (cr1 == PPC_GPR(0)) {
1270 asmALURegImm(X86_MOV, EAX, imm);
1271 asmALURegMem(X86_MOV, EDX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1272 } else {
1273 asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1274 asmALURegMem(X86_MOV, EDX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1275 asmALURegImm(X86_ADD, EAX, imm);
1276 }
1277 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1278 }
1279
1280 void ppc_opc_gen_helper_stu(PPC_Register cr1, uint32 imm, PPC_Register cr2)
1281 {
1282 jitcClobberAll();
1283 byte modrm[6];
1284 asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1285 asmALURegMem(X86_MOV, EDX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1286 asmALURegImm(X86_ADD, EAX, imm);
1287 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1288 }
1289
1290 void ppc_opc_gen_helper_stux(PPC_Register cr1, PPC_Register cr2, PPC_Register cr3)
1291 {
1292 jitcClobberAll();
1293 byte modrm[6];
1294 asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1295 asmALURegMem(X86_MOV, EDX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr3));
1296 asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1297 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1298 }
1299
1300 void ppc_opc_gen_helper_stx(PPC_Register cr1, PPC_Register cr2, PPC_Register cr3)
1301 {
1302 jitcClobberAll();
1303 byte modrm[6];
1304 if (cr1 == PPC_GPR(0)) {
1305 asmALURegMem(X86_MOV, EDX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr3));
1306 asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1307 } else {
1308 asmALURegMem(X86_MOV, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr1));
1309 asmALURegMem(X86_MOV, EDX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr3));
1310 asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU+cr2));
1311 }
1312 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1313 }
1314 */
1315
1316 uint64 FASTCALL ppc_opc_single_to_double(uint32 r)
1317 {
1318 ppc_single s;
1319 ppc_double d;
1320 uint64 ret;
1321 ppc_fpu_unpack_single(s, r);
1322 ppc_fpu_single_to_double(s, d);
1323 ppc_fpu_pack_double(d, ret);
1324 return ret;
1325 }
1326
1327 uint32 FASTCALL ppc_opc_double_to_single(uint64 r)
1328 {
1329 uint32 s;
1330 ppc_double d;
1331 ppc_fpu_unpack_double(d, r);
1332 ppc_fpu_pack_single(d, s);
1333 return s;
1334 }
1335
1336
1337 /*
1338 * dcbz Data Cache Clear to Zero
1339 * .464
1340 */
1341 void ppc_opc_dcbz()
1342 {
1343 //PPC_L1_CACHE_LINE_SIZE
1344 int rA, rD, rB;
1345 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
1346 // assert rD=0
1347 uint32 a = (rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB];
1348 // BAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
1349 ppc_write_effective_dword(a, 0)
1350 || ppc_write_effective_dword(a+8, 0)
1351 || ppc_write_effective_dword(a+16, 0)
1352 || ppc_write_effective_dword(a+24, 0);
1353 }
1354 JITCFlow ppc_opc_gen_dcbz()
1355 {
1356 int rA, rD, rB;
1357 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
1358 jitcClobberCarryAndFlags();
1359 jitcFlushRegister();
1360 getEAX_0_Rsum(PPC_GPR(rA), PPC_GPR(rB));
1361 jitcClobberRegister();
1362 asmALURegImm(X86_MOV, ECX, 0);
1363 asmMOVDMemReg((uint32)&gCPU.temp, EAX);
1364 asmALURegImm(X86_MOV, EDX, 0);
1365 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1366 asmCALL((NativeAddress)ppc_write_effective_dword_asm);
1367 asmALURegImm(X86_MOV, ECX, 0);
1368 asmMOVRegDMem(EAX, (uint32)&gCPU.temp);
1369 asmALURegImm(X86_MOV, EDX, 0);
1370 asmALURegImm(X86_ADD, EAX, 8);
1371 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1372 asmCALL((NativeAddress)ppc_write_effective_dword_asm);
1373 asmALURegImm(X86_MOV, ECX, 0);
1374 asmMOVRegDMem(EAX, (uint32)&gCPU.temp);
1375 asmALURegImm(X86_MOV, EDX, 0);
1376 asmALURegImm(X86_ADD, EAX, 16);
1377 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1378 asmCALL((NativeAddress)ppc_write_effective_dword_asm);
1379 asmALURegImm(X86_MOV, ECX, 0);
1380 asmMOVRegDMem(EAX, (uint32)&gCPU.temp);
1381 asmALURegImm(X86_MOV, EDX, 0);
1382 asmALURegImm(X86_ADD, EAX, 24);
1383 asmALURegImm(X86_MOV, ESI, gJITC.pc);
1384 asmCALL((NativeAddress)ppc_write_effective_dword_asm);
1385 return flowEndBlock;
1386 }
1387
1388 /*
1389 * lbz Load Byte and Zero
1390 * .521
1391 */
1392 void ppc_opc_lbz()
1393 {
1394 int rA, rD;
1395 uint32 imm;
1396 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
1397 uint8 r;
1398 int ret = ppc_read_effective_byte((rA?gCPU.gpr[rA]:0)+imm, r);
1399 if (ret == PPC_MMU_OK) {
1400 gCPU.gpr[rD] = r;
1401 }
1402 }
1403 JITCFlow ppc_opc_gen_lbz()
1404 {
1405 int rA, rD;
1406 uint32 imm;
1407 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
1408 ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
1409 asmCALL((NativeAddress)ppc_read_effective_byte_asm);
1410 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1411 return flowContinue;
1412 }
1413 /*
1414 * lbzu Load Byte and Zero with Update
1415 * .522
1416 */
1417 void ppc_opc_lbzu()
1418 {
1419 int rA, rD;
1420 uint32 imm;
1421 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
1422 // FIXME: check rA!=0 && rA!=rD
1423 uint8 r;
1424 int ret = ppc_read_effective_byte(gCPU.gpr[rA]+imm, r);
1425 if (ret == PPC_MMU_OK) {
1426 gCPU.gpr[rA] += imm;
1427 gCPU.gpr[rD] = r;
1428 }
1429 }
1430 JITCFlow ppc_opc_gen_lbzu()
1431 {
1432 int rA, rD;
1433 uint32 imm;
1434 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
1435 ppc_opc_gen_helper_lu(PPC_GPR(rA), imm);
1436 asmCALL((NativeAddress)ppc_read_effective_byte_asm);
1437 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1438 if (imm) {
1439 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1440 asmALURegImm(X86_ADD, a, imm);
1441 }
1442 return flowContinue;
1443 }
1444 /*
1445 * lbzux Load Byte and Zero with Update Indexed
1446 * .523
1447 */
1448 void ppc_opc_lbzux()
1449 {
1450 int rA, rD, rB;
1451 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
1452 // FIXME: check rA!=0 && rA!=rD
1453 uint8 r;
1454 int ret = ppc_read_effective_byte(gCPU.gpr[rA]+gCPU.gpr[rB], r);
1455 if (ret == PPC_MMU_OK) {
1456 gCPU.gpr[rA] += gCPU.gpr[rB];
1457 gCPU.gpr[rD] = r;
1458 }
1459 }
1460 JITCFlow ppc_opc_gen_lbzux()
1461 {
1462 int rA, rD, rB;
1463 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
1464 ppc_opc_gen_helper_lux(PPC_GPR(rA), PPC_GPR(rB));
1465 asmCALL((NativeAddress)ppc_read_effective_byte_asm);
1466 if (rD == rB) {
1467 // don't ask...
1468 byte modrm[6];
1469 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA), NATIVE_REG | EAX);
1470 asmALURegMem(X86_ADD, a, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
1471 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1472 } else {
1473 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1474 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1475 NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
1476 asmALURegReg(X86_ADD, a, b);
1477 }
1478 return flowContinue;
1479 }
1480 /*
1481 * lbzx Load Byte and Zero Indexed
1482 * .524
1483 */
1484 void ppc_opc_lbzx()
1485 {
1486 int rA, rD, rB;
1487 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
1488 uint8 r;
1489 int ret = ppc_read_effective_byte((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
1490 if (ret == PPC_MMU_OK) {
1491 gCPU.gpr[rD] = r;
1492 }
1493 }
1494 JITCFlow ppc_opc_gen_lbzx()
1495 {
1496 int rA, rD, rB;
1497 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
1498 ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
1499 asmCALL((NativeAddress)ppc_read_effective_byte_asm);
1500 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1501 return flowContinue;
1502 }
1503 /*
1504 * lfd Load Floating-Point Double
1505 * .530
1506 */
1507 void ppc_opc_lfd()
1508 {
1509 if ((gCPU.msr & MSR_FP) == 0) {
1510 ppc_exception(PPC_EXC_NO_FPU);
1511 return;
1512 }
1513 int rA, frD;
1514 uint32 imm;
1515 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, frD, rA, imm);
1516 uint64 r;
1517 int ret = ppc_read_effective_dword((rA?gCPU.gpr[rA]:0)+imm, r);
1518 if (ret == PPC_MMU_OK) {
1519 gCPU.fpr[frD] = r;
1520 }
1521 }
1522 JITCFlow ppc_opc_gen_lfd()
1523 {
1524 ppc_opc_gen_check_fpu();
1525 int rA, frD;
1526 uint32 imm;
1527 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, frD, rA, imm);
1528 jitcFloatRegisterClobberAll();
1529 ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
1530 asmCALL((NativeAddress)ppc_read_effective_dword_asm);
1531 jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | ECX);
1532 jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EDX);
1533 return flowContinue;
1534 }
1535 /*
1536 * lfdu Load Floating-Point Double with Update
1537 * .531
1538 */
1539 void ppc_opc_lfdu()
1540 {
1541 if ((gCPU.msr & MSR_FP) == 0) {
1542 ppc_exception(PPC_EXC_NO_FPU);
1543 return;
1544 }
1545 int rA, frD;
1546 uint32 imm;
1547 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, frD, rA, imm);
1548 // FIXME: check rA!=0
1549 uint64 r;
1550 int ret = ppc_read_effective_dword(gCPU.gpr[rA]+imm, r);
1551 if (ret == PPC_MMU_OK) {
1552 gCPU.fpr[frD] = r;
1553 gCPU.gpr[rA] += imm;
1554 }
1555 }
1556 JITCFlow ppc_opc_gen_lfdu()
1557 {
1558 ppc_opc_gen_check_fpu();
1559 int rA, frD;
1560 uint32 imm;
1561 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, frD, rA, imm);
1562 jitcFloatRegisterClobberAll();
1563 ppc_opc_gen_helper_lu(PPC_GPR(rA), imm);
1564 asmCALL((NativeAddress)ppc_read_effective_dword_asm);
1565 jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | ECX);
1566 jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EDX);
1567 if (imm) {
1568 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1569 asmALURegImm(X86_ADD, a, imm);
1570 }
1571 return flowContinue;
1572 }
1573 /*
1574 * lfdux Load Floating-Point Double with Update Indexed
1575 * .532
1576 */
1577 void ppc_opc_lfdux()
1578 {
1579 if ((gCPU.msr & MSR_FP) == 0) {
1580 ppc_exception(PPC_EXC_NO_FPU);
1581 return;
1582 }
1583 int rA, frD, rB;
1584 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, rB);
1585 // FIXME: check rA!=0
1586 uint64 r;
1587 int ret = ppc_read_effective_dword(gCPU.gpr[rA]+gCPU.gpr[rB], r);
1588 if (ret == PPC_MMU_OK) {
1589 gCPU.gpr[rA] += gCPU.gpr[rB];
1590 gCPU.fpr[frD] = r;
1591 }
1592 }
1593 JITCFlow ppc_opc_gen_lfdux()
1594 {
1595 ppc_opc_gen_check_fpu();
1596 int rA, frD, rB;
1597 PPC_OPC_TEMPL_X(gJITC.current_opc, frD, rA, rB);
1598 ppc_opc_gen_helper_lux(PPC_GPR(rA), PPC_GPR(rB));
1599 jitcFloatRegisterClobberAll();
1600 asmCALL((NativeAddress)ppc_read_effective_dword_asm);
1601 jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | ECX);
1602 jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EDX);
1603 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1604 NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
1605 asmALURegReg(X86_ADD, a, b);
1606 return flowContinue;
1607 }
1608 /*
1609 * lfdx Load Floating-Point Double Indexed
1610 * .533
1611 */
1612 void ppc_opc_lfdx()
1613 {
1614 if ((gCPU.msr & MSR_FP) == 0) {
1615 ppc_exception(PPC_EXC_NO_FPU);
1616 return;
1617 }
1618 int rA, frD, rB;
1619 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, rB);
1620 uint64 r;
1621 int ret = ppc_read_effective_dword((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
1622 if (ret == PPC_MMU_OK) {
1623 gCPU.fpr[frD] = r;
1624 }
1625 }
1626 JITCFlow ppc_opc_gen_lfdx()
1627 {
1628 ppc_opc_gen_check_fpu();
1629 int rA, frD, rB;
1630 PPC_OPC_TEMPL_X(gJITC.current_opc, frD, rA, rB);
1631 jitcFloatRegisterClobberAll();
1632 ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
1633 asmCALL((NativeAddress)ppc_read_effective_dword_asm);
1634 jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | ECX);
1635 jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EDX);
1636 return flowContinue;
1637 }
1638 /*
1639 * lfs Load Floating-Point Single
1640 * .534
1641 */
1642 void ppc_opc_lfs()
1643 {
1644 if ((gCPU.msr & MSR_FP) == 0) {
1645 ppc_exception(PPC_EXC_NO_FPU);
1646 return;
1647 }
1648 int rA, frD;
1649 uint32 imm;
1650 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, frD, rA, imm);
1651 uint32 r;
1652 int ret = ppc_read_effective_word((rA?gCPU.gpr[rA]:0)+imm, r);
1653 if (ret == PPC_MMU_OK) {
1654 ppc_single s;
1655 ppc_double d;
1656 ppc_fpu_unpack_single(s, r);
1657 ppc_fpu_single_to_double(s, d);
1658 ppc_fpu_pack_double(d, gCPU.fpr[frD]);
1659 }
1660 }
1661 JITCFlow ppc_opc_gen_lfs()
1662 {
1663 ppc_opc_gen_check_fpu();
1664 int rA, frD;
1665 uint32 imm;
1666 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, frD, rA, imm);
1667 jitcFloatRegisterClobberAll();
1668 ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
1669 asmCALL((NativeAddress)ppc_read_effective_word_asm);
1670 asmALURegReg(X86_MOV, EAX, EDX);
1671 asmCALL((NativeAddress)ppc_opc_single_to_double);
1672 jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | EDX);
1673 jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EAX);
1674 return flowContinue;
1675 }
1676 /*
1677 * lfsu Load Floating-Point Single with Update
1678 * .535
1679 */
1680 void ppc_opc_lfsu()
1681 {
1682 if ((gCPU.msr & MSR_FP) == 0) {
1683 ppc_exception(PPC_EXC_NO_FPU);
1684 return;
1685 }
1686 int rA, frD;
1687 uint32 imm;
1688 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, frD, rA, imm);
1689 // FIXME: check rA!=0
1690 uint32 r;
1691 int ret = ppc_read_effective_word(gCPU.gpr[rA]+imm, r);
1692 if (ret == PPC_MMU_OK) {
1693 ppc_single s;
1694 ppc_double d;
1695 ppc_fpu_unpack_single(s, r);
1696 ppc_fpu_single_to_double(s, d);
1697 ppc_fpu_pack_double(d, gCPU.fpr[frD]);
1698 gCPU.gpr[rA] += imm;
1699 }
1700 }
1701 JITCFlow ppc_opc_gen_lfsu()
1702 {
1703 ppc_opc_gen_check_fpu();
1704 int rA, frD;
1705 uint32 imm;
1706 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, frD, rA, imm);
1707 jitcFloatRegisterClobberAll();
1708 ppc_opc_gen_helper_lu(PPC_GPR(rA), imm);
1709 asmCALL((NativeAddress)ppc_read_effective_word_asm);
1710 asmALURegReg(X86_MOV, EAX, EDX);
1711 asmCALL((NativeAddress)ppc_opc_single_to_double);
1712 jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | EDX);
1713 jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EAX);
1714 if (imm) {
1715 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1716 asmALURegImm(X86_ADD, a, imm);
1717 }
1718 return flowContinue;
1719 }
1720 /*
1721 * lfsux Load Floating-Point Single with Update Indexed
1722 * .536
1723 */
1724 void ppc_opc_lfsux()
1725 {
1726 if ((gCPU.msr & MSR_FP) == 0) {
1727 ppc_exception(PPC_EXC_NO_FPU);
1728 return;
1729 }
1730 int rA, frD, rB;
1731 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, rB);
1732 // FIXME: check rA!=0
1733 uint32 r;
1734 int ret = ppc_read_effective_word(gCPU.gpr[rA]+gCPU.gpr[rB], r);
1735 if (ret == PPC_MMU_OK) {
1736 gCPU.gpr[rA] += gCPU.gpr[rB];
1737 ppc_single s;
1738 ppc_double d;
1739 ppc_fpu_unpack_single(s, r);
1740 ppc_fpu_single_to_double(s, d);
1741 ppc_fpu_pack_double(d, gCPU.fpr[frD]);
1742 }
1743 }
1744 JITCFlow ppc_opc_gen_lfsux()
1745 {
1746 ppc_opc_gen_check_fpu();
1747 int rA, frD, rB;
1748 PPC_OPC_TEMPL_X(gJITC.current_opc, frD, rA, rB);
1749 jitcFloatRegisterClobberAll();
1750 ppc_opc_gen_helper_lux(PPC_GPR(rA), PPC_GPR(rB));
1751 asmCALL((NativeAddress)ppc_read_effective_word_asm);
1752 asmALURegReg(X86_MOV, EAX, EDX);
1753 asmCALL((NativeAddress)ppc_opc_single_to_double);
1754 jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | EDX);
1755 jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EAX);
1756 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1757 NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
1758 asmALURegReg(X86_ADD, a, b);
1759 return flowContinue;
1760 }
1761 /*
1762 * lfsx Load Floating-Point Single Indexed
1763 * .537
1764 */
1765 void ppc_opc_lfsx()
1766 {
1767 if ((gCPU.msr & MSR_FP) == 0) {
1768 ppc_exception(PPC_EXC_NO_FPU);
1769 return;
1770 }
1771 int rA, frD, rB;
1772 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, rB);
1773 uint32 r;
1774 int ret = ppc_read_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
1775 if (ret == PPC_MMU_OK) {
1776 ppc_single s;
1777 ppc_double d;
1778 ppc_fpu_unpack_single(s, r);
1779 ppc_fpu_single_to_double(s, d);
1780 ppc_fpu_pack_double(d, gCPU.fpr[frD]);
1781 }
1782 }
1783 JITCFlow ppc_opc_gen_lfsx()
1784 {
1785 ppc_opc_gen_check_fpu();
1786 int rA, frD, rB;
1787 PPC_OPC_TEMPL_X(gJITC.current_opc, frD, rA, rB);
1788 jitcFloatRegisterClobberAll();
1789 ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
1790 asmCALL((NativeAddress)ppc_read_effective_word_asm);
1791 asmALURegReg(X86_MOV, EAX, EDX);
1792 asmCALL((NativeAddress)ppc_opc_single_to_double);
1793 jitcMapClientRegisterDirty(PPC_FPR_U(frD), NATIVE_REG | EDX);
1794 jitcMapClientRegisterDirty(PPC_FPR_L(frD), NATIVE_REG | EAX);
1795 return flowContinue;
1796 }
1797 /*
1798 * lha Load Half Word Algebraic
1799 * .538
1800 */
1801 void ppc_opc_lha()
1802 {
1803 int rA, rD;
1804 uint32 imm;
1805 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
1806 uint16 r;
1807 int ret = ppc_read_effective_half((rA?gCPU.gpr[rA]:0)+imm, r);
1808 if (ret == PPC_MMU_OK) {
1809 gCPU.gpr[rD] = (r&0x8000)?(r|0xffff0000):r;
1810 }
1811 }
1812 JITCFlow ppc_opc_gen_lha()
1813 {
1814 int rA, rD;
1815 uint32 imm;
1816 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
1817 ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
1818 asmCALL((NativeAddress)ppc_read_effective_half_s_asm);
1819 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1820 return flowContinue;
1821 }
1822 /*
1823 * lhau Load Half Word Algebraic with Update
1824 * .539
1825 */
1826 void ppc_opc_lhau()
1827 {
1828 int rA, rD;
1829 uint32 imm;
1830 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
1831 uint16 r;
1832 // FIXME: rA != 0
1833 int ret = ppc_read_effective_half(gCPU.gpr[rA]+imm, r);
1834 if (ret == PPC_MMU_OK) {
1835 gCPU.gpr[rA] += imm;
1836 gCPU.gpr[rD] = (r&0x8000)?(r|0xffff0000):r;
1837 }
1838 }
1839 JITCFlow ppc_opc_gen_lhau()
1840 {
1841 int rA, rD;
1842 uint32 imm;
1843 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
1844 ppc_opc_gen_helper_lu(PPC_GPR(rA), imm);
1845 asmCALL((NativeAddress)ppc_read_effective_half_s_asm);
1846 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1847 if (imm) {
1848 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1849 asmALURegImm(X86_ADD, a, imm);
1850 }
1851 return flowContinue;
1852 }
1853 /*
1854 * lhaux Load Half Word Algebraic with Update Indexed
1855 * .540
1856 */
1857 void ppc_opc_lhaux()
1858 {
1859 int rA, rD, rB;
1860 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
1861 uint16 r;
1862 // FIXME: rA != 0
1863 int ret = ppc_read_effective_half(gCPU.gpr[rA]+gCPU.gpr[rB], r);
1864 if (ret == PPC_MMU_OK) {
1865 gCPU.gpr[rA] += gCPU.gpr[rB];
1866 gCPU.gpr[rD] = (r&0x8000)?(r|0xffff0000):r;
1867 }
1868 }
1869 JITCFlow ppc_opc_gen_lhaux()
1870 {
1871 int rA, rD, rB;
1872 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
1873 ppc_opc_gen_helper_lux(PPC_GPR(rA), PPC_GPR(rB));
1874 asmCALL((NativeAddress)ppc_read_effective_half_s_asm);
1875 if (rD == rB) {
1876 byte modrm[6];
1877 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA), NATIVE_REG | EAX);
1878 asmALURegMem(X86_ADD, a, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
1879 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1880 } else {
1881 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1882 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1883 NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
1884 asmALURegReg(X86_ADD, a, b);
1885 }
1886 return flowContinue;
1887 }
1888 /*
1889 * lhax Load Half Word Algebraic Indexed
1890 * .541
1891 */
1892 void ppc_opc_lhax()
1893 {
1894 int rA, rD, rB;
1895 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
1896 uint16 r;
1897 // FIXME: rA != 0
1898 int ret = ppc_read_effective_half((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
1899 if (ret == PPC_MMU_OK) {
1900 gCPU.gpr[rD] = (r&0x8000) ? (r|0xffff0000):r;
1901 }
1902 }
1903 JITCFlow ppc_opc_gen_lhax()
1904 {
1905 int rA, rD, rB;
1906 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
1907 ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
1908 asmCALL((NativeAddress)ppc_read_effective_half_s_asm);
1909 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1910 return flowContinue;
1911 }
1912 /*
1913 * lhbrx Load Half Word Byte-Reverse Indexed
1914 * .542
1915 */
1916 void ppc_opc_lhbrx()
1917 {
1918 int rA, rD, rB;
1919 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
1920 uint16 r;
1921 int ret = ppc_read_effective_half((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
1922 if (ret == PPC_MMU_OK) {
1923 gCPU.gpr[rD] = ppc_bswap_half(r);
1924 }
1925 }
1926 JITCFlow ppc_opc_gen_lhbrx()
1927 {
1928 int rA, rD, rB;
1929 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
1930 ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
1931 asmCALL((NativeAddress)ppc_read_effective_half_z_asm);
1932 asmALURegReg8(X86_XCHG, DL, DH);
1933 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1934 return flowContinue;
1935 }
1936 /*
1937 * lhz Load Half Word and Zero
1938 * .543
1939 */
1940 void ppc_opc_lhz()
1941 {
1942 int rA, rD;
1943 uint32 imm;
1944 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
1945 uint16 r;
1946 int ret = ppc_read_effective_half((rA?gCPU.gpr[rA]:0)+imm, r);
1947 if (ret == PPC_MMU_OK) {
1948 gCPU.gpr[rD] = r;
1949 }
1950 }
1951 JITCFlow ppc_opc_gen_lhz()
1952 {
1953 int rA, rD;
1954 uint32 imm;
1955 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
1956 ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
1957 asmCALL((NativeAddress)ppc_read_effective_half_z_asm);
1958 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1959 return flowContinue;
1960 }
1961 /*
1962 * lhzu Load Half Word and Zero with Update
1963 * .544
1964 */
1965 void ppc_opc_lhzu()
1966 {
1967 int rA, rD;
1968 uint32 imm;
1969 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
1970 uint16 r;
1971 // FIXME: rA!=0
1972 int ret = ppc_read_effective_half(gCPU.gpr[rA]+imm, r);
1973 if (ret == PPC_MMU_OK) {
1974 gCPU.gpr[rD] = r;
1975 gCPU.gpr[rA] += imm;
1976 }
1977 }
1978 JITCFlow ppc_opc_gen_lhzu()
1979 {
1980 int rA, rD;
1981 uint32 imm;
1982 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
1983 ppc_opc_gen_helper_lu(PPC_GPR(rA), imm);
1984 asmCALL((NativeAddress)ppc_read_effective_half_z_asm);
1985 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
1986 if (imm) {
1987 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
1988 asmALURegImm(X86_ADD, a, imm);
1989 }
1990 return flowContinue;
1991 }
1992 /*
1993 * lhzux Load Half Word and Zero with Update Indexed
1994 * .545
1995 */
1996 void ppc_opc_lhzux()
1997 {
1998 int rA, rD, rB;
1999 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2000 uint16 r;
2001 // FIXME: rA != 0
2002 int ret = ppc_read_effective_half(gCPU.gpr[rA]+gCPU.gpr[rB], r);
2003 if (ret == PPC_MMU_OK) {
2004 gCPU.gpr[rA] += gCPU.gpr[rB];
2005 gCPU.gpr[rD] = r;
2006 }
2007 }
2008 JITCFlow ppc_opc_gen_lhzux()
2009 {
2010 int rA, rD, rB;
2011 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2012 ppc_opc_gen_helper_lux(PPC_GPR(rA), PPC_GPR(rB));
2013 asmCALL((NativeAddress)ppc_read_effective_half_z_asm);
2014 if (rD == rB) {
2015 byte modrm[6];
2016 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA), NATIVE_REG | EAX);
2017 asmALURegMem(X86_ADD, a, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
2018 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2019 } else {
2020 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2021 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
2022 NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
2023 asmALURegReg(X86_ADD, a, b);
2024 }
2025 return flowContinue;
2026 }
2027 /*
2028 * lhzx Load Half Word and Zero Indexed
2029 * .546
2030 */
2031 void ppc_opc_lhzx()
2032 {
2033 int rA, rD, rB;
2034 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2035 uint16 r;
2036 int ret = ppc_read_effective_half((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
2037 if (ret == PPC_MMU_OK) {
2038 gCPU.gpr[rD] = r;
2039 }
2040 }
2041 JITCFlow ppc_opc_gen_lhzx()
2042 {
2043 int rA, rD, rB;
2044 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2045 ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
2046 asmCALL((NativeAddress)ppc_read_effective_half_z_asm);
2047 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2048 return flowContinue;
2049 }
2050 /*
2051 * lmw Load Multiple Word
2052 * .547
2053 */
2054 void ppc_opc_lmw()
2055 {
2056 int rD, rA;
2057 uint32 imm;
2058 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
2059 uint32 ea = (rA ? gCPU.gpr[rA] : 0) + imm;
2060 while (rD <= 31) {
2061 if (ppc_read_effective_word(ea, gCPU.gpr[rD])) {
2062 return;
2063 }
2064 rD++;
2065 ea += 4;
2066 }
2067 }
2068 JITCFlow ppc_opc_gen_lmw()
2069 {
2070 int rD, rA;
2071 uint32 imm;
2072 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
2073 while (rD <= 30) {
2074 ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
2075 asmCALL((NativeAddress)ppc_read_effective_dword_asm);
2076 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | ECX);
2077 jitcMapClientRegisterDirty(PPC_GPR(rD+1), NATIVE_REG | EDX);
2078 rD += 2;
2079 imm += 8;
2080 }
2081 if (rD == 31) {
2082 ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
2083 asmCALL((NativeAddress)ppc_read_effective_word_asm);
2084 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2085 }
2086 return flowContinue;
2087 }
2088 /*
2089 * lswi Load String Word Immediate
2090 * .548
2091 */
2092 void ppc_opc_lswi()
2093 {
2094 int rA, rD, NB;
2095 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, NB);
2096 if (NB==0) NB=32;
2097 uint32 ea = rA ? gCPU.gpr[rA] : 0;
2098 uint32 r = 0;
2099 int i = 4;
2100 uint8 v;
2101 while (NB > 0) {
2102 if (!i) {
2103 i = 4;
2104 gCPU.gpr[rD] = r;
2105 rD++;
2106 rD%=32;
2107 r = 0;
2108 }
2109 if (ppc_read_effective_byte(ea, v)) {
2110 return;
2111 }
2112 r<<=8;
2113 r|=v;
2114 ea++;
2115 i--;
2116 NB--;
2117 }
2118 while (i) { r<<=8; i--; }
2119 gCPU.gpr[rD] = r;
2120 }
2121 JITCFlow ppc_opc_gen_lswi()
2122 {
2123 int rA, rD, NB;
2124 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, NB);
2125 if (NB==0) NB=32;
2126 jitcClobberCarryAndFlags();
2127 jitcFlushRegister();
2128 if (rA) {
2129 jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
2130 } else {
2131 asmALURegImm(X86_MOV, EAX, 0);
2132 }
2133 asmALURegImm(X86_MOV, ECX, NB);
2134 asmALURegImm(X86_MOV, EBX, rD);
2135 asmALURegImm(X86_MOV, ESI, gJITC.pc);
2136 jitcClobberAll();
2137 asmCALL((NativeAddress)ppc_opc_lswi_asm);
2138 return flowEndBlock;
2139 }
2140 /*
2141 * lswx Load String Word Indexed
2142 * .550
2143 */
2144 void ppc_opc_lswx()
2145 {
2146 int rA, rD, rB;
2147 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2148 int NB = XER_n(gCPU.xer);
2149 uint32 ea = gCPU.gpr[rB] + (rA ? gCPU.gpr[rA] : 0);
2150
2151 uint32 r = 0;
2152 int i = 4;
2153 uint8 v;
2154 while (NB > 0) {
2155 if (!i) {
2156 i = 4;
2157 gCPU.gpr[rD] = r;
2158 rD++;
2159 rD%=32;
2160 r = 0;
2161 }
2162 if (ppc_read_effective_byte(ea, v)) {
2163 return;
2164 }
2165 r<<=8;
2166 r|=v;
2167 ea++;
2168 i--;
2169 NB--;
2170 }
2171 while (i) { r<<=8; i--; }
2172 gCPU.gpr[rD] = r;
2173 }
2174 JITCFlow ppc_opc_gen_lswx()
2175 {
2176 int rA, rD, rB;
2177 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2178 jitcClobberCarryAndFlags();
2179 jitcFlushRegister();
2180 jitcGetClientRegister(PPC_XER, NATIVE_REG | ECX);
2181 if (rA) {
2182 byte modrm[6];
2183 jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG | EAX);
2184 asmALURegMem(X86_ADD, EAX, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
2185 } else {
2186 jitcGetClientRegister(PPC_GPR(rB), NATIVE_REG | EAX);
2187 }
2188 asmALURegImm(X86_AND, ECX, 0x7f);
2189 jitcClobberAll();
2190 NativeAddress fixup = asmJxxFixup(X86_Z);
2191 asmALURegImm(X86_MOV, EBX, rD);
2192 asmALURegImm(X86_MOV, ESI, gJITC.pc);
2193 asmCALL((NativeAddress)ppc_opc_lswi_asm);
2194 asmResolveFixup(fixup, asmHERE());
2195 return flowEndBlock;
2196 }
2197 /*
2198 * lwarx Load Word and Reserve Indexed
2199 * .553
2200 */
2201 void ppc_opc_lwarx()
2202 {
2203 int rA, rD, rB;
2204 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2205 uint32 r;
2206 int ret = ppc_read_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
2207 if (ret == PPC_MMU_OK) {
2208 gCPU.gpr[rD] = r;
2209 gCPU.reserve = r;
2210 gCPU.have_reservation = 1;
2211 }
2212 }
2213 JITCFlow ppc_opc_gen_lwarx()
2214 {
2215 int rA, rD, rB;
2216 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2217 ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
2218 asmCALL((NativeAddress)ppc_read_effective_word_asm);
2219 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2220 byte modrm[6];
2221 asmALUMemReg(X86_MOV, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.reserve), EDX);
2222 asmALUMemImm8(X86_MOV, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.have_reservation), 1);
2223 return flowContinue;
2224 }
2225 /*
2226 * lwbrx Load Word Byte-Reverse Indexed
2227 * .556
2228 */
2229 void ppc_opc_lwbrx()
2230 {
2231 int rA, rD, rB;
2232 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2233 uint32 r;
2234 int ret = ppc_read_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
2235 if (ret == PPC_MMU_OK) {
2236 gCPU.gpr[rD] = ppc_bswap_word(r);
2237 }
2238 }
2239 JITCFlow ppc_opc_gen_lwbrx()
2240 {
2241 int rA, rD, rB;
2242 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2243 ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
2244 asmCALL((NativeAddress)ppc_read_effective_word_asm);
2245 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2246 asmBSWAP(EDX);
2247 return flowContinue;
2248 }
2249 /*
2250 * lwz Load Word and Zero
2251 * .557
2252 */
2253 void ppc_opc_lwz()
2254 {
2255 int rA, rD;
2256 uint32 imm;
2257 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
2258 uint32 r;
2259 int ret = ppc_read_effective_word((rA?gCPU.gpr[rA]:0)+imm, r);
2260 if (ret == PPC_MMU_OK) {
2261 gCPU.gpr[rD] = r;
2262 }
2263 }
2264 JITCFlow ppc_opc_gen_lwz()
2265 {
2266 int rA, rD;
2267 uint32 imm;
2268 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
2269 ppc_opc_gen_helper_l(PPC_GPR(rA), imm);
2270 asmCALL((NativeAddress)ppc_read_effective_word_asm);
2271 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2272 return flowContinue;
2273 }
2274 /*
2275 * lbzu Load Word and Zero with Update
2276 * .558
2277 */
2278 void ppc_opc_lwzu()
2279 {
2280 int rA, rD;
2281 uint32 imm;
2282 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rD, rA, imm);
2283 // FIXME: check rA!=0 && rA!=rD
2284 uint32 r;
2285 int ret = ppc_read_effective_word(gCPU.gpr[rA]+imm, r);
2286 if (ret == PPC_MMU_OK) {
2287 gCPU.gpr[rA] += imm;
2288 gCPU.gpr[rD] = r;
2289 }
2290 }
2291 JITCFlow ppc_opc_gen_lwzu()
2292 {
2293 int rA, rD;
2294 uint32 imm;
2295 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rD, rA, imm);
2296 ppc_opc_gen_helper_lu(PPC_GPR(rA), imm);
2297 asmCALL((NativeAddress)ppc_read_effective_word_asm);
2298 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2299 if (imm) {
2300 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
2301 asmALURegImm(X86_ADD, a, imm);
2302 }
2303 return flowContinue;
2304 }
2305 /*
2306 * lwzux Load Word and Zero with Update Indexed
2307 * .559
2308 */
2309 void ppc_opc_lwzux()
2310 {
2311 int rA, rD, rB;
2312 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2313 // FIXME: check rA!=0 && rA!=rD
2314 uint32 r;
2315 int ret = ppc_read_effective_word(gCPU.gpr[rA]+gCPU.gpr[rB], r);
2316 if (ret == PPC_MMU_OK) {
2317 gCPU.gpr[rA] += gCPU.gpr[rB];
2318 gCPU.gpr[rD] = r;
2319 }
2320 }
2321 JITCFlow ppc_opc_gen_lwzux()
2322 {
2323 int rA, rD, rB;
2324 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2325 ppc_opc_gen_helper_lux(PPC_GPR(rA), PPC_GPR(rB));
2326 asmCALL((NativeAddress)ppc_read_effective_word_asm);
2327 if (rD == rB) {
2328 // don't ask...
2329 byte modrm[6];
2330 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA), NATIVE_REG | EAX);
2331 asmALURegMem(X86_ADD, a, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
2332 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2333 } else {
2334 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2335 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
2336 NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
2337 asmALURegReg(X86_ADD, a, b);
2338 }
2339 return flowContinue;
2340 }
2341 /*
2342 * lwzx Load Word and Zero Indexed
2343 * .560
2344 */
2345 void ppc_opc_lwzx()
2346 {
2347 int rA, rD, rB;
2348 PPC_OPC_TEMPL_X(gCPU.current_opc, rD, rA, rB);
2349 uint32 r;
2350 int ret = ppc_read_effective_word((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], r);
2351 if (ret == PPC_MMU_OK) {
2352 gCPU.gpr[rD] = r;
2353 }
2354 }
2355 JITCFlow ppc_opc_gen_lwzx()
2356 {
2357 int rA, rD, rB;
2358 PPC_OPC_TEMPL_X(gJITC.current_opc, rD, rA, rB);
2359 ppc_opc_gen_helper_lx(PPC_GPR(rA), PPC_GPR(rB));
2360 asmCALL((NativeAddress)ppc_read_effective_word_asm);
2361 jitcMapClientRegisterDirty(PPC_GPR(rD), NATIVE_REG | EDX);
2362 return flowContinue;
2363 }
2364
2365 static inline NativeReg FASTCALL ppc_opc_gen_helper_lvx_hint(int rA, int rB, int hint)
2366 {
2367 NativeReg ret = REG_NO;
2368 byte modrm[6];
2369
2370 NativeReg reg1 = jitcGetClientRegisterMapping(PPC_GPR(rA));
2371 NativeReg reg2 = jitcGetClientRegisterMapping(PPC_GPR(rB));
2372
2373 if (reg1 == hint) {
2374 jitcClobberCarryAndFlags();
2375 jitcClobberRegister(NATIVE_REG | reg1);
2376 ret = reg1;
2377 jitcTouchRegister(ret);
2378
2379 if (reg2 != REG_NO) {
2380 asmALURegReg(X86_ADD, ret, reg2);
2381 } else {
2382 asmALURegMem(X86_ADD, ret, modrm,
2383 x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
2384 }
2385 } else if (reg2 == hint) {
2386 jitcClobberCarryAndFlags();
2387 jitcClobberRegister(NATIVE_REG | reg2);
2388 ret = reg2;
2389 jitcTouchRegister(ret);
2390
2391 if (reg1 != REG_NO) {
2392 asmALURegReg(X86_ADD, ret, reg1);
2393 } else {
2394 asmALURegMem(X86_ADD, ret, modrm,
2395 x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rA]));
2396 }
2397 } else if ((reg1 != REG_NO) && (reg2 != REG_NO)) {
2398 /* If both are in register space, and not the hint we're best
2399 * off clobbering the hint, then using leal as a 3-operand
2400 * ADD.
2401 * This gives us the performance of an ADD, and removes the
2402 * need for a later MOV into the hint.
2403 */
2404 jitcClobberRegister(NATIVE_REG | hint);
2405 ret = (NativeReg)hint;
2406 jitcTouchRegister(ret);
2407
2408 asmLEA(ret, modrm, x86_mem_sib(modrm, reg1, 1, reg2, 0));
2409 }
2410
2411 return ret;
2412 }
2413
2414 static inline NativeReg FASTCALL ppc_opc_gen_helper_lvx(int rA, int rB, int hint=0)
2415 {
2416 NativeReg ret = REG_NO;
2417 byte modrm[6];
2418
2419 if (!rA) {
2420 ret = jitcGetClientRegisterMapping(PPC_GPR(rB));
2421
2422 if (ret == REG_NO) {
2423 ret = jitcGetClientRegister(PPC_GPR(rB), hint);
2424 }
2425
2426 jitcClobberRegister(NATIVE_REG | ret);
2427 jitcTouchRegister(ret);
2428
2429 return ret;
2430 }
2431
2432 if (hint & NATIVE_REG) {
2433 ret = ppc_opc_gen_helper_lvx_hint(rA, rB, hint & 0x0f);
2434
2435 if (ret != REG_NO)
2436 return ret;
2437 }
2438
2439 jitcClobberCarryAndFlags();
2440
2441 NativeReg reg1 = jitcGetClientRegisterMapping(PPC_GPR(rA));
2442 NativeReg reg2 = jitcGetClientRegisterMapping(PPC_GPR(rB));
2443
2444 if (reg2 == REG_NO) {
2445 ret = jitcGetClientRegister(PPC_GPR(rA));
2446 jitcClobberRegister(NATIVE_REG | ret);
2447
2448 asmALURegMem(X86_ADD, ret, modrm,
2449 x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rB]));
2450 } else {
2451 jitcClobberRegister(NATIVE_REG | reg2);
2452 ret = reg2;
2453
2454 if (reg1 != REG_NO) {
2455 asmALURegReg(X86_ADD, ret, reg1);
2456 } else {
2457 asmALURegMem(X86_ADD, ret, modrm,
2458 x86_mem(modrm, REG_NO, (uint32)&gCPU.gpr[rA]));
2459 }
2460 }
2461
2462 jitcTouchRegister(ret);
2463 return ret;
2464 }
2465
2466 /* lvx Load Vector Indexed
2467 * v.127
2468 */
2469 void ppc_opc_lvx()
2470 {
2471 #ifndef __VEC_EXC_OFF__
2472 if ((gCPU.msr & MSR_VEC) == 0) {
2473 ppc_exception(PPC_EXC_NO_VEC);
2474 return;
2475 }
2476 #endif
2477 VECTOR_DEBUG;
2478 int rA, vrD, rB;
2479 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, rA, rB);
2480 Vector_t r;
2481
2482 int ea = ((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB]);
2483
2484 int ret = ppc_read_effective_qword(ea, r);
2485 if (ret == PPC_MMU_OK) {
2486 gCPU.vr[vrD] = r;
2487 }
2488 }
2489 JITCFlow ppc_opc_gen_lvx()
2490 {
2491 ppc_opc_gen_check_vec();
2492 int rA, vrD, rB;
2493 PPC_OPC_TEMPL_X(gJITC.current_opc, vrD, rA, rB);
2494 jitcDropClientVectorRegister(vrD);
2495 jitcAssertFlushedVectorRegister(vrD);
2496
2497 jitcClobberCarryAndFlags();
2498 NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB, NATIVE_REG | EAX);
2499 #if 1
2500 jitcClobberAll();
2501 if (regA != EAX) {
2502 //printf("*** hint miss r%u != r0\n", regA);
2503 asmALURegReg(X86_MOV, EAX, regA);
2504 }
2505 asmALURegImm(X86_MOV, ESI, gJITC.pc);
2506 asmALURegImm(X86_MOV, EDX, (uint32)&(gCPU.vr[vrD]));
2507
2508 if (0 && gJITC.hostCPUCaps.sse) {
2509 asmCALL((NativeAddress)ppc_read_effective_qword_sse_asm);
2510 gJITC.nativeVectorReg = vrD;
2511 } else {
2512 asmCALL((NativeAddress)ppc_read_effective_qword_asm);
2513 }
2514 #else
2515 asmALURegImm(X86_AND, regA, ~0x0f);
2516 asmMOVDMemReg((uint32)&gCPU.vtemp, regA);
2517
2518 jitcClobberAll();
2519 if (regA != EAX) {
2520 //printf("*** hint miss r%u != r0\n", regA);
2521 asmALURegReg(X86_MOV, EAX, regA);
2522 }
2523 asmALURegImm(X86_MOV, ESI, gJITC.pc);
2524
2525 asmCALL((NativeAddress)ppc_read_effective_dword_asm);
2526 asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[3]), ECX);
2527 asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[2]), EDX);
2528
2529 asmMOVRegDMem(EAX, (uint32)&gCPU.vtemp);
2530 asmALURegImm(X86_OR, EAX, 8);
2531
2532 asmCALL((NativeAddress)ppc_read_effective_dword_asm);
2533 asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[1]), ECX);
2534 asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[0]), EDX);
2535 #endif
2536
2537 return flowContinue;
2538 }
2539
2540
2541 /* lvxl Load Vector Index LRU
2542 * v.128
2543 */
2544 void ppc_opc_lvxl()
2545 {
2546 ppc_opc_lvx();
2547 /* This instruction should hint to the cache that the value won't be
2548 * needed again in memory anytime soon. We don't emulate the cache,
2549 * so this is effectively exactly the same as lvx.
2550 */
2551 }
2552 JITCFlow ppc_opc_gen_lvxl()
2553 {
2554 return ppc_opc_gen_lvx();
2555 }
2556
2557
2558 /* lvebx Load Vector Element Byte Indexed
2559 * v.119
2560 */
2561 void ppc_opc_lvebx()
2562 {
2563 #ifndef __VEC_EXC_OFF__
2564 if ((gCPU.msr & MSR_VEC) == 0) {
2565 ppc_exception(PPC_EXC_NO_VEC);
2566 return;
2567 }
2568 #endif
2569 VECTOR_DEBUG;
2570 int rA, vrD, rB;
2571 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, rA, rB);
2572 uint32 ea;
2573 uint8 r;
2574 ea = (rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB];
2575 int ret = ppc_read_effective_byte(ea, r);
2576 if (ret == PPC_MMU_OK) {
2577 VECT_B(gCPU.vr[vrD], ea & 0x0f) = r;
2578 }
2579 }
2580 JITCFlow ppc_opc_gen_lvebx()
2581 {
2582 ppc_opc_gen_check_vec();
2583 int rA, vrD, rB;
2584 byte modrm[6];
2585 PPC_OPC_TEMPL_X(gJITC.current_opc, vrD, rA, rB);
2586 jitcDropClientVectorRegister(vrD);
2587 jitcAssertFlushedVectorRegister(vrD);
2588
2589 if (vrD == gJITC.nativeVectorReg) {
2590 gJITC.nativeVectorReg = VECTREG_NO;
2591 }
2592
2593 jitcClobberCarryAndFlags();
2594 NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB, NATIVE_REG | EAX);
2595 asmMOVDMemReg((uint32)&gCPU.vtemp, regA);
2596
2597 jitcClobberAll();
2598 if (regA != EAX) asmALURegReg(X86_MOV, EAX, regA);
2599 asmALURegImm(X86_MOV, ESI, gJITC.pc);
2600
2601 asmCALL((NativeAddress)ppc_read_effective_byte_asm);
2602 asmMOVRegDMem(EAX, (uint32)&gCPU.vtemp);
2603 asmALURegImm(X86_AND, EAX, 0x0f);
2604 asmALUReg(X86_NOT, EAX);
2605
2606 asmALUMemReg8(X86_MOV, modrm,
2607 x86_mem(modrm, EAX, ((uint32)&gCPU.vr[vrD])+16), DL);
2608
2609 return flowContinue;
2610 }
2611
2612 /* lvehx Load Vector Element Half Word Indexed
2613 * v.121
2614 */
2615 void ppc_opc_lvehx()
2616 {
2617 #ifndef __VEC_EXC_OFF__
2618 if ((gCPU.msr & MSR_VEC) == 0) {
2619 ppc_exception(PPC_EXC_NO_VEC);
2620 return;
2621 }
2622 #endif
2623 VECTOR_DEBUG;
2624 int rA, vrD, rB;
2625 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, rA, rB);
2626 uint32 ea;
2627 uint16 r;
2628 ea = ((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB]) & ~1;
2629 int ret = ppc_read_effective_half(ea, r);
2630 if (ret == PPC_MMU_OK) {
2631 VECT_H(gCPU.vr[vrD], (ea & 0x0f) >> 1) = r;
2632 }
2633 }
2634 JITCFlow ppc_opc_gen_lvehx()
2635 {
2636 ppc_opc_gen_check_vec();
2637 int rA, vrD, rB;
2638 byte modrm[6];
2639 PPC_OPC_TEMPL_X(gJITC.current_opc, vrD, rA, rB);
2640 jitcDropClientVectorRegister(vrD);
2641 jitcAssertFlushedVectorRegister(vrD);
2642
2643 if (vrD == gJITC.nativeVectorReg) {
2644 gJITC.nativeVectorReg = VECTREG_NO;
2645 }
2646
2647 jitcClobberCarryAndFlags();
2648 NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB, NATIVE_REG | EAX);
2649 asmMOVDMemReg((uint32)&gCPU.vtemp, regA);
2650 asmALURegImm(X86_AND, regA, ~0x01);
2651
2652 jitcClobberAll();
2653 if (regA != EAX) asmALURegReg(X86_MOV, EAX, regA);
2654 asmALURegImm(X86_MOV, ESI, gJITC.pc);
2655
2656 asmCALL((NativeAddress)ppc_read_effective_half_z_asm);
2657 asmMOVRegDMem(EAX, (uint32)&gCPU.vtemp);
2658 asmALURegImm(X86_AND, EAX, 0x0e);
2659 asmALUReg(X86_NOT, EAX);
2660
2661 asmALUMemReg8(X86_MOV, modrm,
2662 x86_mem(modrm, EAX, ((uint32)&gCPU.vr[vrD])+15), DL);
2663 asmALUMemReg8(X86_MOV, modrm,
2664 x86_mem(modrm, EAX, ((uint32)&gCPU.vr[vrD])+16), DH);
2665
2666 return flowContinue;
2667 }
2668
2669 /* lvewx Load Vector Element Word Indexed
2670 * v.122
2671 */
2672 void ppc_opc_lvewx()
2673 {
2674 #ifndef __VEC_EXC_OFF__
2675 if ((gCPU.msr & MSR_VEC) == 0) {
2676 ppc_exception(PPC_EXC_NO_VEC);
2677 return;
2678 }
2679 #endif
2680 VECTOR_DEBUG;
2681 int rA, vrD, rB;
2682 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, rA, rB);
2683 uint32 ea;
2684 uint32 r;
2685 ea = ((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB]) & ~3;
2686 int ret = ppc_read_effective_word(ea, r);
2687 if (ret == PPC_MMU_OK) {
2688 VECT_W(gCPU.vr[vrD], (ea & 0xf) >> 2) = r;
2689 }
2690 }
2691 JITCFlow ppc_opc_gen_lvewx()
2692 {
2693 ppc_opc_gen_check_vec();
2694 int rA, vrD, rB;
2695 byte modrm[6];
2696 PPC_OPC_TEMPL_X(gJITC.current_opc, vrD, rA, rB);
2697 jitcDropClientVectorRegister(vrD);
2698 jitcAssertFlushedVectorRegister(vrD);
2699
2700 if (vrD == gJITC.nativeVectorReg) {
2701 gJITC.nativeVectorReg = VECTREG_NO;
2702 }
2703
2704 jitcClobberCarryAndFlags();
2705 NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB, NATIVE_REG | EAX);
2706 asmMOVDMemReg((uint32)&gCPU.vtemp, regA);
2707 asmALURegImm(X86_AND, regA, ~0x03);
2708
2709 jitcClobberAll();
2710 if (regA != EAX) asmALURegReg(X86_MOV, EAX, regA);
2711 asmALURegImm(X86_MOV, ESI, gJITC.pc);
2712
2713 asmCALL((NativeAddress)ppc_read_effective_word_asm);
2714 asmMOVRegDMem(EAX, (uint32)&gCPU.vtemp);
2715 asmALURegImm(X86_AND, EAX, 0x0c);
2716 asmALUReg(X86_NOT, EAX);
2717
2718 asmALUMemReg(X86_MOV, modrm,
2719 x86_mem(modrm, EAX, ((uint32)&gCPU.vr[vrD])+13), EDX);
2720 return flowContinue;
2721 }
2722
2723 const static byte lvsl_helper[] = {
2724 #if HOST_ENDIANESS == HOST_ENDIANESS_LE
2725 0x1F, 0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18,
2726 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
2727 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08,
2728 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
2729 #elif HOST_ENDIANESS == HOST_ENDIANESS_BE
2730 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
2731 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
2732 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
2733 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
2734 #else
2735 #error Endianess not supported!
2736 #endif
2737 };
2738
2739 const static uint32 lvsl_helper_full[16*4] = {
2740 VECT_BUILD(0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F),
2741 VECT_BUILD(0x01020304, 0x05060708, 0x090A0B0C, 0x0D0E0F10),
2742 VECT_BUILD(0x02030405, 0x06070809, 0x0A0B0C0D, 0x0E0F1011),
2743 VECT_BUILD(0x03040506, 0x0708090A, 0x0B0C0D0E, 0x0F101112),
2744 VECT_BUILD(0x04050607, 0x08090A0B, 0x0C0D0E0F, 0x10111213),
2745 VECT_BUILD(0x05060708, 0x090A0B0C, 0x0D0E0F10, 0x11121314),
2746 VECT_BUILD(0x06070809, 0x0A0B0C0D, 0x0E0F1011, 0x12131415),
2747 VECT_BUILD(0x0708090A, 0x0B0C0D0E, 0x0F101112, 0x13141516),
2748 VECT_BUILD(0x08090A0B, 0x0C0D0E0F, 0x10111213, 0x14151617),
2749 VECT_BUILD(0x090A0B0C, 0x0D0E0F10, 0x11121314, 0x15161718),
2750 VECT_BUILD(0x0A0B0C0D, 0x0E0F1011, 0x12131415, 0x16171819),
2751 VECT_BUILD(0x0B0C0D0E, 0x0F101112, 0x13141516, 0x1718191A),
2752 VECT_BUILD(0x0C0D0E0F, 0x10111213, 0x14151617, 0x18191A1B),
2753 VECT_BUILD(0x0D0E0F10, 0x11121314, 0x15161718, 0x191A1B1C),
2754 VECT_BUILD(0x0E0F1011, 0x12131415, 0x16171819, 0x1A1B1C1D),
2755 VECT_BUILD(0x0F101112, 0x13141516, 0x1718191A, 0x1B1C1D1E),
2756 };
2757
2758 const static uint32 lvsr_helper_full[16*4] = {
2759 VECT_BUILD(0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F),
2760 VECT_BUILD(0x0F101112, 0x13141516, 0x1718191A, 0x1B1C1D1E),
2761 VECT_BUILD(0x0E0F1011, 0x12131415, 0x16171819, 0x1A1B1C1D),
2762 VECT_BUILD(0x0D0E0F10, 0x11121314, 0x15161718, 0x191A1B1C),
2763 VECT_BUILD(0x0C0D0E0F, 0x10111213, 0x14151617, 0x18191A1B),
2764 VECT_BUILD(0x0B0C0D0E, 0x0F101112, 0x13141516, 0x1718191A),
2765 VECT_BUILD(0x0A0B0C0D, 0x0E0F1011, 0x12131415, 0x16171819),
2766 VECT_BUILD(0x090A0B0C, 0x0D0E0F10, 0x11121314, 0x15161718),
2767 VECT_BUILD(0x08090A0B, 0x0C0D0E0F, 0x10111213, 0x14151617),
2768 VECT_BUILD(0x0708090A, 0x0B0C0D0E, 0x0F101112, 0x13141516),
2769 VECT_BUILD(0x06070809, 0x0A0B0C0D, 0x0E0F1011, 0x12131415),
2770 VECT_BUILD(0x05060708, 0x090A0B0C, 0x0D0E0F10, 0x11121314),
2771 VECT_BUILD(0x04050607, 0x08090A0B, 0x0C0D0E0F, 0x10111213),
2772 VECT_BUILD(0x03040506, 0x0708090A, 0x0B0C0D0E, 0x0F101112),
2773 VECT_BUILD(0x02030405, 0x06070809, 0x0A0B0C0D, 0x0E0F1011),
2774 VECT_BUILD(0x01020304, 0x05060708, 0x090A0B0C, 0x0D0E0F10),
2775 };
2776
2777 /*
2778 * lvsl Load Vector for Shift Left
2779 * v.123
2780 */
2781 void ppc_opc_lvsl()
2782 {
2783 #ifndef __VEC_EXC_OFF__
2784 if ((gCPU.msr & MSR_VEC) == 0) {
2785 ppc_exception(PPC_EXC_NO_VEC);
2786 return;
2787 }
2788 #endif
2789 VECTOR_DEBUG;
2790 int rA, vrD, rB;
2791 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, rA, rB);
2792 uint32 ea;
2793 ea = ((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB]);
2794 #if HOST_ENDIANESS == HOST_ENDIANESS_LE
2795 memmove(&gCPU.vr[vrD], lvsl_helper+0x10-(ea & 0xf), 16);
2796 #elif HOST_ENDIANESS == HOST_ENDIANESS_BE
2797 memmove(&gCPU.vr[vrD], lvsl_helper+(ea & 0xf), 16);
2798 #else
2799 #error Endianess not supported!
2800 #endif
2801 }
2802 JITCFlow ppc_opc_gen_lvsl()
2803 {
2804 ppc_opc_gen_check_vec();
2805 int rA, vrD, rB;
2806 byte modrm[6];
2807 PPC_OPC_TEMPL_X(gJITC.current_opc, vrD, rA, rB);
2808
2809 if (vrD == gJITC.nativeVectorReg) {
2810 gJITC.nativeVectorReg = VECTREG_NO;
2811 }
2812
2813 jitcClobberCarryAndFlags();
2814 NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB);
2815 asmALURegImm(X86_AND, regA, 0x0f);
2816
2817 if (gJITC.hostCPUCaps.sse) {
2818 asmShiftRegImm(X86_SHL, regA, 4);
2819
2820 NativeVectorReg reg1 = jitcMapClientVectorRegisterDirty(vrD);
2821
2822 asmALUPS(X86_MOVAPS, reg1,
2823 x86_mem2(modrm, regA, (uint32)&lvsl_helper_full));
2824 } else {
2825 asmALUReg(X86_NOT, regA);
2826 jitcDropClientVectorRegister(vrD);
2827
2828 NativeReg reg1 = jitcAllocRegister();
2829 NativeReg reg2 = jitcAllocRegister();
2830 NativeReg reg3 = jitcAllocRegister();
2831
2832 asmALURegMem(X86_MOV, reg1, modrm,
2833 x86_mem(modrm, regA, (uint32)&lvsl_helper+0x11));
2834 asmALURegMem(X86_MOV, reg2, modrm,
2835 x86_mem(modrm, regA, (uint32)&lvsl_helper+0x15));
2836 asmALURegMem(X86_MOV, reg3, modrm,
2837 x86_mem(modrm, regA, (uint32)&lvsl_helper+0x19));
2838 asmALURegMem(X86_MOV, regA, modrm,
2839 x86_mem(modrm, regA, (uint32)&lvsl_helper+0x1d));
2840
2841 asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[0]), reg1);
2842 asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[1]), reg2);
2843 asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[2]), reg3);
2844 asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[3]), regA);
2845 }
2846
2847 return flowContinue;
2848 }
2849
2850 /*
2851 * lvsr Load Vector for Shift Right
2852 * v.125
2853 */
2854 void ppc_opc_lvsr()
2855 {
2856 #ifndef __VEC_EXC_OFF__
2857 if ((gCPU.msr & MSR_VEC) == 0) {
2858 ppc_exception(PPC_EXC_NO_VEC);
2859 return;
2860 }
2861 #endif
2862 VECTOR_DEBUG;
2863 int rA, vrD, rB;
2864 PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, rA, rB);
2865 uint32 ea;
2866 ea = ((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB]);
2867 #if HOST_ENDIANESS == HOST_ENDIANESS_LE
2868 memmove(&gCPU.vr[vrD], lvsl_helper+(ea & 0xf), 16);
2869 #elif HOST_ENDIANESS == HOST_ENDIANESS_BE
2870 memmove(&gCPU.vr[vrD], lvsl_helper+0x10-(ea & 0xf), 16);
2871 #else
2872 #error Endianess not supported!
2873 #endif
2874 }
2875 JITCFlow ppc_opc_gen_lvsr()
2876 {
2877 ppc_opc_gen_check_vec();
2878 int rA, vrD, rB;
2879 byte modrm[6];
2880 PPC_OPC_TEMPL_X(gJITC.current_opc, vrD, rA, rB);
2881
2882 if (vrD == gJITC.nativeVectorReg) {
2883 gJITC.nativeVectorReg = VECTREG_NO;
2884 }
2885
2886 jitcClobberCarryAndFlags();
2887 NativeReg regA = ppc_opc_gen_helper_lvx(rA, rB);
2888 asmALURegImm(X86_AND, regA, 0x0f);
2889
2890 if (gJITC.hostCPUCaps.sse) {
2891 asmShiftRegImm(X86_SHL, regA, 4);
2892
2893 NativeVectorReg reg1 = jitcMapClientVectorRegisterDirty(vrD);
2894
2895 asmALUPS(X86_MOVAPS, reg1,
2896 x86_mem2(modrm, regA, (uint32)&lvsr_helper_full));
2897 } else {
2898 jitcDropClientVectorRegister(vrD);
2899 jitcAssertFlushedVectorRegister(vrD);
2900
2901 NativeReg reg1 = jitcAllocRegister();
2902 NativeReg reg2 = jitcAllocRegister();
2903 NativeReg reg3 = jitcAllocRegister();
2904
2905 asmALURegMem(X86_MOV, reg1, modrm,
2906 x86_mem(modrm, regA, (uint32)&lvsl_helper));
2907 asmALURegMem(X86_MOV, reg2, modrm,
2908 x86_mem(modrm, regA, (uint32)&lvsl_helper+4));
2909 asmALURegMem(X86_MOV, reg3, modrm,
2910 x86_mem(modrm, regA, (uint32)&lvsl_helper+8));
2911 asmALURegMem(X86_MOV, regA, modrm,
2912 x86_mem(modrm, regA, (uint32)&lvsl_helper+12));
2913
2914 asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[0]), reg1);
2915 asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[1]), reg2);
2916 asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[2]), reg3);
2917 asmMOVDMemReg((uint32)&(gCPU.vr[vrD].w[3]), regA);
2918 }
2919
2920 return flowContinue;
2921 }
2922
2923 /*
2924 * dst Data Stream Touch
2925 * v.115
2926 */
2927 void ppc_opc_dst()
2928 {
2929 VECTOR_DEBUG;
2930 /* Since we are not emulating the cache, this is a nop */
2931 }
2932 JITCFlow ppc_opc_gen_dst()
2933 {
2934 /* Since we are not emulating the cache, this is a nop */
2935 return flowContinue;
2936 }
2937
2938 /*
2939 * stb Store Byte
2940 * .632
2941 */
2942 void ppc_opc_stb()
2943 {
2944 int rA, rS;
2945 uint32 imm;
2946 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rS, rA, imm);
2947 ppc_write_effective_byte((rA?gCPU.gpr[rA]:0)+imm, (uint8)gCPU.gpr[rS]) != PPC_MMU_FATAL;
2948 }
2949 JITCFlow ppc_opc_gen_stb()
2950 {
2951 int rA, rS;
2952 uint32 imm;
2953 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rS, rA, imm);
2954 ppc_opc_gen_helper_st(PPC_GPR(rA), imm, PPC_GPR(rS));
2955 asmCALL((NativeAddress)ppc_write_effective_byte_asm);
2956 return flowEndBlock;
2957 }
2958 /*
2959 * stbu Store Byte with Update
2960 * .633
2961 */
2962 void ppc_opc_stbu()
2963 {
2964 int rA, rS;
2965 uint32 imm;
2966 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, rS, rA, imm);
2967 // FIXME: check rA!=0
2968 int ret = ppc_write_effective_byte(gCPU.gpr[rA]+imm, (uint8)gCPU.gpr[rS]);
2969 if (ret == PPC_MMU_OK) {
2970 gCPU.gpr[rA] += imm;
2971 }
2972 }
2973 JITCFlow ppc_opc_gen_stbu()
2974 {
2975 int rA, rS;
2976 uint32 imm;
2977 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, rS, rA, imm);
2978 // FIXME: check rA!=0
2979 ppc_opc_gen_helper_stu(PPC_GPR(rA), imm, PPC_GPR(rS));
2980 asmCALL((NativeAddress)ppc_write_effective_byte_asm);
2981 if (imm) {
2982 NativeReg r = jitcGetClientRegisterDirty(PPC_GPR(rA));
2983 asmALURegImm(X86_ADD, r, imm);
2984 }
2985 return flowContinue;
2986 }
2987 /*
2988 * stbux Store Byte with Update Indexed
2989 * .634
2990 */
2991 void ppc_opc_stbux()
2992 {
2993 int rA, rS, rB;
2994 PPC_OPC_TEMPL_X(gCPU.current_opc, rS, rA, rB);
2995 // FIXME: check rA!=0
2996 int ret = ppc_write_effective_byte(gCPU.gpr[rA]+gCPU.gpr[rB], (uint8)gCPU.gpr[rS]);
2997 if (ret == PPC_MMU_OK) {
2998 gCPU.gpr[rA] += gCPU.gpr[rB];
2999 }
3000 }
3001 JITCFlow ppc_opc_gen_stbux()
3002 {
3003 int rA, rS, rB;
3004 PPC_OPC_TEMPL_X(gJITC.current_opc, rS, rA, rB);
3005 ppc_opc_gen_helper_stux(PPC_GPR(rA), PPC_GPR(rB), PPC_GPR(rS));
3006 asmCALL((NativeAddress)ppc_write_effective_byte_asm);
3007 NativeReg a = jitcGetClientRegisterDirty(PPC_GPR(rA));
3008 NativeReg b = jitcGetClientRegister(PPC_GPR(rB));
3009 asmALURegReg(X86_ADD, a, b);
3010 return flowContinue;
3011 }
3012 /*
3013 * stbx Store Byte Indexed
3014 * .635
3015 */
3016 void ppc_opc_stbx()
3017 {
3018 int rA, rS, rB;
3019 PPC_OPC_TEMPL_X(gCPU.current_opc, rS, rA, rB);
3020 ppc_write_effective_byte((rA?gCPU.gpr[rA]:0)+gCPU.gpr[rB], (uint8)gCPU.gpr[rS]) != PPC_MMU_FATAL;
3021 }
3022 JITCFlow ppc_opc_gen_stbx()
3023 {
3024 int rA, rS, rB;
3025 PPC_OPC_TEMPL_X(gJITC.current_opc, rS, rA, rB);
3026 ppc_opc_gen_helper_stx(PPC_GPR(rA), PPC_GPR(rB), PPC_GPR(rS));
3027 asmCALL((NativeAddress)ppc_write_effective_byte_asm);
3028 return flowEndBlock;
3029 }
3030 /*
3031 * stfd Store Floating-Point Double
3032 * .642
3033 */
3034 void ppc_opc_stfd()
3035 {
3036 if ((gCPU.msr & MSR_FP) == 0) {
3037 ppc_exception(PPC_EXC_NO_FPU);
3038 return;
3039 }
3040 int rA, frS;
3041 uint32 imm;
3042 PPC_OPC_TEMPL_D_SImm(gCPU.current_opc, frS, rA, imm);
3043 ppc_write_effective_dword((rA?gCPU.gpr[rA]:0)+imm, gCPU.fpr[frS]) != PPC_MMU_FATAL;
3044 }
3045 JITCFlow ppc_opc_gen_stfd()
3046 {
3047 ppc_opc_gen_check_fpu();
3048 int rA, frS;
3049 uint32 imm;
3050 PPC_OPC_TEMPL_D_SImm(gJITC.current_opc, frS, rA, imm);
3051 jitcFloatRegisterClobberAll();
3052 jitcFlushRegister();
3053 jitcClobberCarryAndFlags();
3054 jitcGetClientRegister(PPC_FPR_U(frS), NATIVE_REG | ECX);
3055 jitcGetClientRegister(PPC_FPR_L(frS), NATIVE_REG | EDX);
3056 if (rA) {
3057 jitcGetClientRegister(PPC_GPR(rA), NATIVE_REG