/[gxemul]/upstream/0.3.1/src/bintrans_i386.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /upstream/0.3.1/src/bintrans_i386.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3 - (show annotations)
Mon Oct 8 16:17:52 2007 UTC (16 years, 6 months ago) by dpavlin
File MIME type: text/plain
File size: 83884 byte(s)
0.3.1
1 /*
2 * Copyright (C) 2004-2005 Anders Gavare. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * 3. The name of the author may not be used to endorse or promote products
13 * derived from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 *
28 * $Id: bintrans_i386.c,v 1.75 2005/03/22 09:12:04 debug Exp $
29 *
30 * i386 specific code for dynamic binary translation.
31 * See bintrans.c for more information. Included from bintrans.c.
32 *
33 * Translated code uses the following conventions at all time:
34 *
35 * esi points to the cpu struct
36 * edi lowest 32 bits of cpu->pc
37 * ebp contains cpu->bintrans_instructions_executed
38 */
39
40
41 struct cpu dummy_cpu;
42 struct mips_coproc dummy_coproc;
43 struct vth32_table dummy_vth32_table;
44
45
46 /*
47 * bintrans_host_cacheinvalidate()
48 *
49 * Invalidate the host's instruction cache. On i386, this isn't necessary,
50 * so this is an empty function.
51 */
52 static void bintrans_host_cacheinvalidate(unsigned char *p, size_t len)
53 {
54 /* Do nothing. */
55 }
56
57
58 /* offsetof (in stdarg.h) could possibly be used, but I'm not sure
59 if it will take care of the compiler problems... */
60
61 #define ofs_i (((size_t)&dummy_cpu.cd.mips.bintrans_instructions_executed) - ((size_t)&dummy_cpu))
62 #define ofs_pc (((size_t)&dummy_cpu.pc) - ((size_t)&dummy_cpu))
63 #define ofs_pc_last (((size_t)&dummy_cpu.cd.mips.pc_last) - ((size_t)&dummy_cpu))
64 #define ofs_tabl0 (((size_t)&dummy_cpu.cd.mips.vaddr_to_hostaddr_table0) - ((size_t)&dummy_cpu))
65 #define ofs_chunks ((size_t)&dummy_vth32_table.bintrans_chunks[0] - (size_t)&dummy_vth32_table)
66 #define ofs_chunkbase ((size_t)&dummy_cpu.cd.mips.chunk_base_address - (size_t)&dummy_cpu)
67
68
69 static void (*bintrans_runchunk)(struct cpu *, unsigned char *);
70 static void (*bintrans_jump_to_32bit_pc)(struct cpu *);
71 static void (*bintrans_loadstore_32bit)(struct cpu *);
72
73
74 /*
75 * bintrans_write_quickjump():
76 */
77 static void bintrans_write_quickjump(struct memory *mem,
78 unsigned char *quickjump_code, uint32_t chunkoffset)
79 {
80 uint32_t i386_addr;
81 unsigned char *a = quickjump_code;
82
83 i386_addr = chunkoffset + (size_t)mem->translation_code_chunk_space;
84 i386_addr = i386_addr - ((size_t)a + 5);
85
86 /* printf("chunkoffset=%i, %08x %08x %i\n",
87 chunkoffset, i386_addr, a, ofs); */
88
89 *a++ = 0xe9;
90 *a++ = i386_addr;
91 *a++ = i386_addr >> 8;
92 *a++ = i386_addr >> 16;
93 *a++ = i386_addr >> 24;
94 }
95
96
97 /*
98 * bintrans_write_chunkreturn():
99 */
100 static void bintrans_write_chunkreturn(unsigned char **addrp)
101 {
102 unsigned char *a = *addrp;
103 *a++ = 0xc3; /* ret */
104 *addrp = a;
105 }
106
107
108 /*
109 * bintrans_write_chunkreturn_fail():
110 */
111 static void bintrans_write_chunkreturn_fail(unsigned char **addrp)
112 {
113 unsigned char *a = *addrp;
114
115 /* 81 cd 00 00 00 01 orl $0x1000000,%ebp */
116 *a++ = 0x81; *a++ = 0xcd;
117 *a++ = 0; *a++ = 0; *a++ = 0; *a++ = 0x01; /* TODO: not hardcoded */
118
119 *a++ = 0xc3; /* ret */
120 *addrp = a;
121 }
122
123
124 /*
125 * bintrans_write_pc_inc():
126 */
127 static void bintrans_write_pc_inc(unsigned char **addrp)
128 {
129 unsigned char *a = *addrp;
130
131 /* 83 c7 04 add $0x4,%edi */
132 *a++ = 0x83; *a++ = 0xc7; *a++ = 4;
133
134 #if 0
135 if (!bintrans_32bit_only) {
136 int ofs;
137 /* 83 96 zz zz zz zz 00 adcl $0x0,zz(%esi) */
138 ofs = ((size_t)&dummy_cpu.pc) - (size_t)&dummy_cpu;
139 ofs += 4;
140 *a++ = 0x83; *a++ = 0x96;
141 *a++ = ofs & 255;
142 *a++ = (ofs >> 8) & 255;
143 *a++ = (ofs >> 16) & 255;
144 *a++ = (ofs >> 24) & 255;
145 *a++ = 0;
146 }
147 #endif
148
149 /* 45 inc %ebp */
150 *a++ = 0x45;
151
152 *addrp = a;
153 }
154
155
156 /*
157 * load_pc_into_eax_edx():
158 */
159 static void load_pc_into_eax_edx(unsigned char **addrp)
160 {
161 unsigned char *a;
162 a = *addrp;
163
164 /* 89 f8 mov %edi,%eax */
165 *a++ = 0x89; *a++ = 0xf8;
166
167 #if 0
168 if (bintrans_32bit_only) {
169 /* 99 cltd */
170 *a++ = 0x99;
171 } else
172 #endif
173 {
174 int ofs = ((size_t)&dummy_cpu.pc) - (size_t)&dummy_cpu;
175 /* 8b 96 3c 30 00 00 mov 0x303c(%esi),%edx */
176 ofs += 4;
177 *a++ = 0x8b; *a++ = 0x96;
178 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
179 }
180
181 *addrp = a;
182 }
183
184
185 /*
186 * store_eax_edx_into_pc():
187 */
188 static void store_eax_edx_into_pc(unsigned char **addrp)
189 {
190 unsigned char *a;
191 int ofs = ((size_t)&dummy_cpu.pc) - (size_t)&dummy_cpu;
192 a = *addrp;
193
194 /* 89 c7 mov %eax,%edi */
195 *a++ = 0x89; *a++ = 0xc7;
196
197 /* 89 96 3c 30 00 00 mov %edx,0x303c(%esi) */
198 ofs += 4;
199 *a++ = 0x89; *a++ = 0x96;
200 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
201
202 *addrp = a;
203 }
204
205
206 /*
207 * load_into_eax_edx():
208 *
209 * Usage: load_into_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rs]); etc.
210 */
211 static void load_into_eax_edx(unsigned char **addrp, void *p)
212 {
213 unsigned char *a;
214 int ofs = (size_t)p - (size_t)&dummy_cpu;
215 a = *addrp;
216
217 /* 8b 86 38 30 00 00 mov 0x3038(%esi),%eax */
218 *a++ = 0x8b; *a++ = 0x86;
219 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
220
221 #if 0
222 if (bintrans_32bit_only) {
223 /* 99 cltd */
224 *a++ = 0x99;
225 } else
226 #endif
227 {
228 /* 8b 96 3c 30 00 00 mov 0x303c(%esi),%edx */
229 ofs += 4;
230 *a++ = 0x8b; *a++ = 0x96;
231 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
232 }
233
234 *addrp = a;
235 }
236
237
238 /*
239 * load_into_eax_and_sign_extend_into_edx():
240 *
241 * Usage: load_into_eax_and_sign_extend_into_edx(&a, &dummy_cpu.cd.mips.gpr[rs]); etc.
242 */
243 static void load_into_eax_and_sign_extend_into_edx(unsigned char **addrp, void *p)
244 {
245 unsigned char *a;
246 int ofs = (size_t)p - (size_t)&dummy_cpu;
247 a = *addrp;
248
249 /* 8b 86 38 30 00 00 mov 0x3038(%esi),%eax */
250 *a++ = 0x8b; *a++ = 0x86;
251 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
252
253 /* 99 cltd */
254 *a++ = 0x99;
255
256 *addrp = a;
257 }
258
259
260 /*
261 * load_into_eax_dont_care_about_edx():
262 *
263 * Usage: load_into_eax_dont_care_about_edx(&a, &dummy_cpu.cd.mips.gpr[rs]); etc.
264 */
265 static void load_into_eax_dont_care_about_edx(unsigned char **addrp, void *p)
266 {
267 unsigned char *a;
268 int ofs = (size_t)p - (size_t)&dummy_cpu;
269 a = *addrp;
270
271 /* 8b 86 38 30 00 00 mov 0x3038(%esi),%eax */
272 *a++ = 0x8b; *a++ = 0x86;
273 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
274
275 *addrp = a;
276 }
277
278
279 /*
280 * store_eax_edx():
281 *
282 * Usage: store_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rs]); etc.
283 */
284 static void store_eax_edx(unsigned char **addrp, void *p)
285 {
286 unsigned char *a;
287 int ofs = (size_t)p - (size_t)&dummy_cpu;
288 a = *addrp;
289
290 /* 89 86 38 30 00 00 mov %eax,0x3038(%esi) */
291 *a++ = 0x89; *a++ = 0x86;
292 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
293
294 /* 89 96 3c 30 00 00 mov %edx,0x303c(%esi) */
295 ofs += 4;
296 *a++ = 0x89; *a++ = 0x96;
297 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
298
299 *addrp = a;
300 }
301
302
303 /*
304 * bintrans_write_instruction__lui():
305 */
306 static int bintrans_write_instruction__lui(unsigned char **addrp, int rt, int imm)
307 {
308 unsigned char *a;
309
310 a = *addrp;
311 if (rt == 0)
312 goto rt0;
313
314 /* b8 00 00 dc fe mov $0xfedc0000,%eax */
315 *a++ = 0xb8; *a++ = 0; *a++ = 0;
316 *a++ = imm & 255; *a++ = imm >> 8;
317
318 /* 99 cltd */
319 *a++ = 0x99;
320
321 store_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
322 *addrp = a;
323
324 rt0:
325 bintrans_write_pc_inc(addrp);
326 return 1;
327 }
328
329
330 /*
331 * bintrans_write_instruction__jr():
332 */
333 static int bintrans_write_instruction__jr(unsigned char **addrp,
334 int rs, int rd, int special)
335 {
336 unsigned char *a;
337 int ofs;
338
339 a = *addrp;
340
341 /*
342 * Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED
343 * and cpu->delay_jmpaddr = gpr[rs].
344 */
345
346 /* c7 86 38 30 00 00 01 00 00 00 movl $0x1,0x3038(%esi) */
347 ofs = ((size_t)&dummy_cpu.cd.mips.delay_slot) - (size_t)&dummy_cpu;
348 *a++ = 0xc7; *a++ = 0x86;
349 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
350 *a++ = TO_BE_DELAYED; *a++ = 0; *a++ = 0; *a++ = 0;
351
352 #if 0
353 if (bintrans_32bit_only)
354 load_into_eax_and_sign_extend_into_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
355 else
356 #endif
357 load_into_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
358
359 store_eax_edx(&a, &dummy_cpu.cd.mips.delay_jmpaddr);
360
361 if (special == SPECIAL_JALR && rd != 0) {
362 /* gpr[rd] = retaddr (pc + 8) */
363
364 #if 0
365 if (bintrans_32bit_only) {
366 load_pc_into_eax_edx(&a);
367 /* 83 c0 08 add $0x8,%eax */
368 *a++ = 0x83; *a++ = 0xc0; *a++ = 0x08;
369 } else
370 #endif
371 {
372 load_pc_into_eax_edx(&a);
373 /* 83 c0 08 add $0x8,%eax */
374 /* 83 d2 00 adc $0x0,%edx */
375 *a++ = 0x83; *a++ = 0xc0; *a++ = 0x08;
376 *a++ = 0x83; *a++ = 0xd2; *a++ = 0x00;
377 }
378
379 store_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rd]);
380 }
381
382 *addrp = a;
383 bintrans_write_pc_inc(addrp);
384 return 1;
385 }
386
387
388 /*
389 * bintrans_write_instruction__mfmthilo():
390 */
391 static int bintrans_write_instruction__mfmthilo(unsigned char **addrp,
392 int rd, int from_flag, int hi_flag)
393 {
394 unsigned char *a;
395
396 a = *addrp;
397
398 if (from_flag) {
399 if (rd != 0) {
400 /* mfhi or mflo */
401 if (hi_flag)
402 load_into_eax_edx(&a, &dummy_cpu.cd.mips.hi);
403 else
404 load_into_eax_edx(&a, &dummy_cpu.cd.mips.lo);
405 store_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rd]);
406 }
407 } else {
408 /* mthi or mtlo */
409 load_into_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rd]);
410 if (hi_flag)
411 store_eax_edx(&a, &dummy_cpu.cd.mips.hi);
412 else
413 store_eax_edx(&a, &dummy_cpu.cd.mips.lo);
414 }
415
416 *addrp = a;
417 bintrans_write_pc_inc(addrp);
418 return 1;
419 }
420
421
422 /*
423 * bintrans_write_instruction__addiu_etc():
424 */
425 static int bintrans_write_instruction__addiu_etc(unsigned char **addrp,
426 int rt, int rs, int imm, int instruction_type)
427 {
428 unsigned char *a;
429 unsigned int uimm;
430
431 /* TODO: overflow detection for ADDI and DADDI */
432 switch (instruction_type) {
433 case HI6_ADDI:
434 case HI6_DADDI:
435 return 0;
436 }
437
438 a = *addrp;
439
440 if (rt == 0)
441 goto rt0;
442
443 uimm = imm & 0xffff;
444
445 if (uimm == 0 && (instruction_type == HI6_ADDIU ||
446 instruction_type == HI6_ADDI)) {
447 load_into_eax_and_sign_extend_into_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
448 store_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
449 goto rt0;
450 }
451
452 if (uimm == 0 && (instruction_type == HI6_DADDIU ||
453 instruction_type == HI6_DADDI || instruction_type == HI6_ORI)) {
454 load_into_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
455 store_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
456 goto rt0;
457 }
458
459 #if 0
460 if (bintrans_32bit_only)
461 load_into_eax_and_sign_extend_into_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
462 else
463 #endif
464 load_into_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
465
466 switch (instruction_type) {
467 case HI6_ADDIU:
468 case HI6_DADDIU:
469 case HI6_ADDI:
470 case HI6_DADDI:
471 if (imm & 0x8000) {
472 /* 05 39 fd ff ff add $0xfffffd39,%eax */
473 /* 83 d2 ff adc $0xffffffff,%edx */
474 *a++ = 0x05; *a++ = uimm; *a++ = uimm >> 8; *a++ = 0xff; *a++ = 0xff;
475 if (instruction_type == HI6_DADDIU) {
476 *a++ = 0x83; *a++ = 0xd2; *a++ = 0xff;
477 }
478 } else {
479 /* 05 c7 02 00 00 add $0x2c7,%eax */
480 /* 83 d2 00 adc $0x0,%edx */
481 *a++ = 0x05; *a++ = uimm; *a++ = uimm >> 8; *a++ = 0; *a++ = 0;
482 if (instruction_type == HI6_DADDIU) {
483 *a++ = 0x83; *a++ = 0xd2; *a++ = 0;
484 }
485 }
486 if (instruction_type == HI6_ADDIU) {
487 /* 99 cltd */
488 *a++ = 0x99;
489 }
490 break;
491 case HI6_ANDI:
492 /* 25 34 12 00 00 and $0x1234,%eax */
493 /* 31 d2 xor %edx,%edx */
494 *a++ = 0x25; *a++ = uimm; *a++ = uimm >> 8; *a++ = 0; *a++ = 0;
495 *a++ = 0x31; *a++ = 0xd2;
496 break;
497 case HI6_ORI:
498 /* 0d 34 12 00 00 or $0x1234,%eax */
499 *a++ = 0xd; *a++ = uimm; *a++ = uimm >> 8; *a++ = 0; *a++ = 0;
500 break;
501 case HI6_XORI:
502 /* 35 34 12 00 00 xor $0x1234,%eax */
503 *a++ = 0x35; *a++ = uimm; *a++ = uimm >> 8; *a++ = 0; *a++ = 0;
504 break;
505 case HI6_SLTIU:
506 /* set if less than, unsigned. (compare edx:eax to ecx:ebx) */
507 /* ecx:ebx = the immediate value */
508 /* bb dc fe ff ff mov $0xfffffedc,%ebx */
509 /* b9 ff ff ff ff mov $0xffffffff,%ecx */
510 /* or */
511 /* 29 c9 sub %ecx,%ecx */
512 #if 0
513 if (bintrans_32bit_only) {
514 /* 99 cltd */
515 *a++ = 0x99;
516 }
517 #endif
518 *a++ = 0xbb; *a++ = uimm; *a++ = uimm >> 8;
519 if (uimm & 0x8000) {
520 *a++ = 0xff; *a++ = 0xff;
521 *a++ = 0xb9; *a++ = 0xff; *a++ = 0xff; *a++ = 0xff; *a++ = 0xff;
522 } else {
523 *a++ = 0; *a++ = 0;
524 *a++ = 0x29; *a++ = 0xc9;
525 }
526
527 /* if edx <= ecx and eax < ebx then 1, else 0. */
528 /* 39 ca cmp %ecx,%edx */
529 /* 77 0b ja <ret0> */
530 /* 39 d8 cmp %ebx,%eax */
531 /* 73 07 jae 58 <ret0> */
532 *a++ = 0x39; *a++ = 0xca;
533 *a++ = 0x77; *a++ = 0x0b;
534 *a++ = 0x39; *a++ = 0xd8;
535 *a++ = 0x73; *a++ = 0x07;
536
537 /* b8 01 00 00 00 mov $0x1,%eax */
538 /* eb 02 jmp <common> */
539 *a++ = 0xb8; *a++ = 1; *a++ = 0; *a++ = 0; *a++ = 0;
540 *a++ = 0xeb; *a++ = 0x02;
541
542 /* ret0: */
543 /* 29 c0 sub %eax,%eax */
544 *a++ = 0x29; *a++ = 0xc0;
545
546 /* common: */
547 /* 99 cltd */
548 *a++ = 0x99;
549 break;
550 case HI6_SLTI:
551 /* set if less than, signed. (compare edx:eax to ecx:ebx) */
552 /* ecx:ebx = the immediate value */
553 /* bb dc fe ff ff mov $0xfffffedc,%ebx */
554 /* b9 ff ff ff ff mov $0xffffffff,%ecx */
555 /* or */
556 /* 29 c9 sub %ecx,%ecx */
557 #if 0
558 if (bintrans_32bit_only) {
559 /* 99 cltd */
560 *a++ = 0x99;
561 }
562 #endif
563 *a++ = 0xbb; *a++ = uimm; *a++ = uimm >> 8;
564 if (uimm & 0x8000) {
565 *a++ = 0xff; *a++ = 0xff;
566 *a++ = 0xb9; *a++ = 0xff; *a++ = 0xff; *a++ = 0xff; *a++ = 0xff;
567 } else {
568 *a++ = 0; *a++ = 0;
569 *a++ = 0x29; *a++ = 0xc9;
570 }
571
572 /* if edx > ecx then 0. */
573 /* if edx < ecx then 1. */
574 /* if eax < ebx then 1, else 0. */
575 /* 39 ca cmp %ecx,%edx */
576 /* 7c 0a jl <ret1> */
577 /* 7f 04 jg <ret0> */
578 /* 39 d8 cmp %ebx,%eax */
579 /* 7c 04 jl <ret1> */
580 *a++ = 0x39; *a++ = 0xca;
581 *a++ = 0x7c; *a++ = 0x0a;
582 *a++ = 0x7f; *a++ = 0x04;
583 *a++ = 0x39; *a++ = 0xd8;
584 *a++ = 0x7c; *a++ = 0x04;
585
586 /* ret0: */
587 /* 29 c0 sub %eax,%eax */
588 /* eb 05 jmp <common> */
589 *a++ = 0x29; *a++ = 0xc0;
590 *a++ = 0xeb; *a++ = 0x05;
591
592 /* ret1: */
593 /* b8 01 00 00 00 mov $0x1,%eax */
594 *a++ = 0xb8; *a++ = 1; *a++ = 0; *a++ = 0; *a++ = 0;
595
596 /* common: */
597 /* 99 cltd */
598 *a++ = 0x99;
599 break;
600 }
601
602 store_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
603
604 rt0:
605 *addrp = a;
606 bintrans_write_pc_inc(addrp);
607 return 1;
608 }
609
610
611 /*
612 * bintrans_write_instruction__jal():
613 */
614 static int bintrans_write_instruction__jal(unsigned char **addrp,
615 int imm, int link)
616 {
617 unsigned char *a;
618 uint32_t subimm;
619 int ofs;
620
621 a = *addrp;
622
623 load_pc_into_eax_edx(&a);
624
625 if (link) {
626 /* gpr[31] = pc + 8 */
627 #if 0
628 if (bintrans_32bit_only) {
629 /* 50 push %eax */
630 /* 83 c0 08 add $0x8,%eax */
631 *a++ = 0x50;
632 *a++ = 0x83; *a++ = 0xc0; *a++ = 0x08;
633 } else
634 #endif
635 {
636 /* 50 push %eax */
637 /* 52 push %edx */
638 /* 83 c0 08 add $0x8,%eax */
639 /* 83 d2 00 adc $0x0,%edx */
640 *a++ = 0x50;
641 *a++ = 0x52;
642 *a++ = 0x83; *a++ = 0xc0; *a++ = 0x08;
643 *a++ = 0x83; *a++ = 0xd2; *a++ = 0x00;
644 }
645 store_eax_edx(&a, &dummy_cpu.cd.mips.gpr[31]);
646 #if 0
647 if (bintrans_32bit_only) {
648 /* 58 pop %eax */
649 *a++ = 0x58;
650 } else
651 #endif
652 {
653 /* 5a pop %edx */
654 /* 58 pop %eax */
655 *a++ = 0x5a;
656 *a++ = 0x58;
657 }
658 }
659
660 /* delay_jmpaddr = top 36 bits of pc together with lowest 28 bits of imm*4: */
661 imm *= 4;
662
663 /* Add 4, because the jump is from the delay slot: */
664 /* 83 c0 04 add $0x4,%eax */
665 /* 83 d2 00 adc $0x0,%edx */
666 *a++ = 0x83; *a++ = 0xc0; *a++ = 0x04;
667 *a++ = 0x83; *a++ = 0xd2; *a++ = 0x00;
668
669 /* c1 e8 1c shr $0x1c,%eax */
670 /* c1 e0 1c shl $0x1c,%eax */
671 *a++ = 0xc1; *a++ = 0xe8; *a++ = 0x1c;
672 *a++ = 0xc1; *a++ = 0xe0; *a++ = 0x1c;
673
674 subimm = imm;
675 subimm &= 0x0fffffff;
676
677 /* 0d 78 56 34 12 or $0x12345678,%eax */
678 *a++ = 0x0d; *a++ = subimm; *a++ = subimm >> 8;
679 *a++ = subimm >> 16; *a++ = subimm >> 24;
680
681 store_eax_edx(&a, &dummy_cpu.cd.mips.delay_jmpaddr);
682
683 /* c7 86 38 30 00 00 01 00 00 00 movl $0x1,0x3038(%esi) */
684 ofs = ((size_t)&dummy_cpu.cd.mips.delay_slot) - (size_t)&dummy_cpu;
685 *a++ = 0xc7; *a++ = 0x86;
686 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
687 *a++ = TO_BE_DELAYED; *a++ = 0; *a++ = 0; *a++ = 0;
688
689 *addrp = a;
690 bintrans_write_pc_inc(addrp);
691 return 1;
692 }
693
694
695 /*
696 * bintrans_write_instruction__addu_etc():
697 */
698 static int bintrans_write_instruction__addu_etc(unsigned char **addrp,
699 int rd, int rs, int rt, int sa, int instruction_type)
700 {
701 unsigned char *a;
702 int load64 = 0, do_store = 1;
703
704 /* TODO: Not yet */
705 switch (instruction_type) {
706 case SPECIAL_MULT:
707 case SPECIAL_MULTU:
708 case SPECIAL_DIV:
709 case SPECIAL_DIVU:
710 if (rd != 0)
711 return 0;
712 break;
713 case SPECIAL_DSLL:
714 case SPECIAL_DSLL32:
715 case SPECIAL_DSRA:
716 case SPECIAL_DSRA32:
717 case SPECIAL_DSRL:
718 case SPECIAL_DSRL32:
719 case SPECIAL_MOVZ:
720 case SPECIAL_MOVN:
721 bintrans_write_chunkreturn_fail(addrp);
722 return 0;
723 }
724
725 switch (instruction_type) {
726 case SPECIAL_DADDU:
727 case SPECIAL_DSUBU:
728 case SPECIAL_OR:
729 case SPECIAL_AND:
730 case SPECIAL_NOR:
731 case SPECIAL_XOR:
732 case SPECIAL_DSLL:
733 case SPECIAL_DSRL:
734 case SPECIAL_DSRA:
735 case SPECIAL_DSLL32:
736 case SPECIAL_DSRL32:
737 case SPECIAL_DSRA32:
738 case SPECIAL_SLT:
739 case SPECIAL_SLTU:
740 load64 = 1;
741 }
742
743 switch (instruction_type) {
744 case SPECIAL_MULT:
745 case SPECIAL_MULTU:
746 case SPECIAL_DIV:
747 case SPECIAL_DIVU:
748 break;
749 default:
750 if (rd == 0)
751 goto rd0;
752 }
753
754 a = *addrp;
755
756 if ((instruction_type == SPECIAL_ADDU || instruction_type == SPECIAL_DADDU
757 || instruction_type == SPECIAL_OR) && rt == 0) {
758 if (load64)
759 load_into_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
760 else
761 load_into_eax_and_sign_extend_into_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
762 store_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rd]);
763 *addrp = a;
764 goto rd0;
765 }
766
767 /* edx:eax = rs, ecx:ebx = rt */
768 if (load64) {
769 load_into_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
770 /* 89 c3 mov %eax,%ebx */
771 /* 89 d1 mov %edx,%ecx */
772 *a++ = 0x89; *a++ = 0xc3; *a++ = 0x89; *a++ = 0xd1;
773 load_into_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
774 } else {
775 load_into_eax_and_sign_extend_into_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
776 /* 89 c3 mov %eax,%ebx */
777 /* 89 d1 mov %edx,%ecx */
778 *a++ = 0x89; *a++ = 0xc3; *a++ = 0x89; *a++ = 0xd1;
779 load_into_eax_and_sign_extend_into_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
780 }
781
782 switch (instruction_type) {
783 case SPECIAL_ADDU:
784 /* 01 d8 add %ebx,%eax */
785 /* 99 cltd */
786 *a++ = 0x01; *a++ = 0xd8;
787 *a++ = 0x99;
788 break;
789 case SPECIAL_DADDU:
790 /* 01 d8 add %ebx,%eax */
791 /* 11 ca adc %ecx,%edx */
792 *a++ = 0x01; *a++ = 0xd8;
793 *a++ = 0x11; *a++ = 0xca;
794 break;
795 case SPECIAL_SUBU:
796 /* 29 d8 sub %ebx,%eax */
797 /* 99 cltd */
798 *a++ = 0x29; *a++ = 0xd8;
799 *a++ = 0x99;
800 break;
801 case SPECIAL_DSUBU:
802 /* 29 d8 sub %ebx,%eax */
803 /* 19 ca sbb %ecx,%edx */
804 *a++ = 0x29; *a++ = 0xd8;
805 *a++ = 0x19; *a++ = 0xca;
806 break;
807 case SPECIAL_AND:
808 /* 21 d8 and %ebx,%eax */
809 /* 21 ca and %ecx,%edx */
810 *a++ = 0x21; *a++ = 0xd8;
811 *a++ = 0x21; *a++ = 0xca;
812 break;
813 case SPECIAL_OR:
814 /* 09 d8 or %ebx,%eax */
815 /* 09 ca or %ecx,%edx */
816 *a++ = 0x09; *a++ = 0xd8;
817 *a++ = 0x09; *a++ = 0xca;
818 break;
819 case SPECIAL_NOR:
820 /* 09 d8 or %ebx,%eax */
821 /* 09 ca or %ecx,%edx */
822 /* f7 d0 not %eax */
823 /* f7 d2 not %edx */
824 *a++ = 0x09; *a++ = 0xd8;
825 *a++ = 0x09; *a++ = 0xca;
826 *a++ = 0xf7; *a++ = 0xd0;
827 *a++ = 0xf7; *a++ = 0xd2;
828 break;
829 case SPECIAL_XOR:
830 /* 31 d8 xor %ebx,%eax */
831 /* 31 ca xor %ecx,%edx */
832 *a++ = 0x31; *a++ = 0xd8;
833 *a++ = 0x31; *a++ = 0xca;
834 break;
835 case SPECIAL_SLL:
836 /* 89 d8 mov %ebx,%eax */
837 /* c1 e0 1f shl $0x1f,%eax */
838 /* 99 cltd */
839 *a++ = 0x89; *a++ = 0xd8;
840 if (sa == 1) {
841 *a++ = 0xd1; *a++ = 0xe0;
842 } else {
843 *a++ = 0xc1; *a++ = 0xe0; *a++ = sa;
844 }
845 *a++ = 0x99;
846 break;
847 case SPECIAL_SRA:
848 /* 89 d8 mov %ebx,%eax */
849 /* c1 f8 1f sar $0x1f,%eax */
850 /* 99 cltd */
851 *a++ = 0x89; *a++ = 0xd8;
852 if (sa == 1) {
853 *a++ = 0xd1; *a++ = 0xf8;
854 } else {
855 *a++ = 0xc1; *a++ = 0xf8; *a++ = sa;
856 }
857 *a++ = 0x99;
858 break;
859 case SPECIAL_SRL:
860 /* 89 d8 mov %ebx,%eax */
861 /* c1 e8 1f shr $0x1f,%eax */
862 /* 99 cltd */
863 *a++ = 0x89; *a++ = 0xd8;
864 if (sa == 1) {
865 *a++ = 0xd1; *a++ = 0xe8;
866 } else {
867 *a++ = 0xc1; *a++ = 0xe8; *a++ = sa;
868 }
869 *a++ = 0x99;
870 break;
871 case SPECIAL_SLTU:
872 /* set if less than, unsigned. (compare edx:eax to ecx:ebx) */
873 /* if edx <= ecx and eax < ebx then 1, else 0. */
874 /* 39 ca cmp %ecx,%edx */
875 /* 77 0b ja <ret0> */
876 /* 39 d8 cmp %ebx,%eax */
877 /* 73 07 jae 58 <ret0> */
878 *a++ = 0x39; *a++ = 0xca;
879 *a++ = 0x77; *a++ = 0x0b;
880 *a++ = 0x39; *a++ = 0xd8;
881 *a++ = 0x73; *a++ = 0x07;
882
883 /* b8 01 00 00 00 mov $0x1,%eax */
884 /* eb 02 jmp <common> */
885 *a++ = 0xb8; *a++ = 1; *a++ = 0; *a++ = 0; *a++ = 0;
886 *a++ = 0xeb; *a++ = 0x02;
887
888 /* ret0: */
889 /* 29 c0 sub %eax,%eax */
890 *a++ = 0x29; *a++ = 0xc0;
891
892 /* common: */
893 /* 99 cltd */
894 *a++ = 0x99;
895 break;
896 case SPECIAL_SLT:
897 /* set if less than, signed. (compare edx:eax to ecx:ebx) */
898 /* if edx > ecx then 0. */
899 /* if edx < ecx then 1. */
900 /* if eax < ebx then 1, else 0. */
901 /* 39 ca cmp %ecx,%edx */
902 /* 7c 0a jl <ret1> */
903 /* 7f 04 jg <ret0> */
904 /* 39 d8 cmp %ebx,%eax */
905 /* 7c 04 jl <ret1> */
906 *a++ = 0x39; *a++ = 0xca;
907 *a++ = 0x7c; *a++ = 0x0a;
908 *a++ = 0x7f; *a++ = 0x04;
909 *a++ = 0x39; *a++ = 0xd8;
910 *a++ = 0x7c; *a++ = 0x04;
911
912 /* ret0: */
913 /* 29 c0 sub %eax,%eax */
914 /* eb 05 jmp <common> */
915 *a++ = 0x29; *a++ = 0xc0;
916 *a++ = 0xeb; *a++ = 0x05;
917
918 /* ret1: */
919 /* b8 01 00 00 00 mov $0x1,%eax */
920 *a++ = 0xb8; *a++ = 1; *a++ = 0; *a++ = 0; *a++ = 0;
921
922 /* common: */
923 /* 99 cltd */
924 *a++ = 0x99;
925 break;
926 case SPECIAL_SLLV:
927 /* rd = rt << (rs&31) (logical) eax = ebx << (eax&31) */
928 /* xchg ebx,eax, then we can do eax = eax << (ebx&31) */
929 /* 93 xchg %eax,%ebx */
930 /* 89 d9 mov %ebx,%ecx */
931 /* 83 e1 1f and $0x1f,%ecx */
932 /* d3 e0 shl %cl,%eax */
933 *a++ = 0x93;
934 *a++ = 0x89; *a++ = 0xd9;
935 *a++ = 0x83; *a++ = 0xe1; *a++ = 0x1f;
936 *a++ = 0xd3; *a++ = 0xe0;
937 /* 99 cltd */
938 *a++ = 0x99;
939 break;
940 case SPECIAL_SRLV:
941 /* rd = rt >> (rs&31) (logical) eax = ebx >> (eax&31) */
942 /* xchg ebx,eax, then we can do eax = eax >> (ebx&31) */
943 /* 93 xchg %eax,%ebx */
944 /* 89 d9 mov %ebx,%ecx */
945 /* 83 e1 1f and $0x1f,%ecx */
946 /* d3 e8 shr %cl,%eax */
947 *a++ = 0x93;
948 *a++ = 0x89; *a++ = 0xd9;
949 *a++ = 0x83; *a++ = 0xe1; *a++ = 0x1f;
950 *a++ = 0xd3; *a++ = 0xe8;
951 /* 99 cltd */
952 *a++ = 0x99;
953 break;
954 case SPECIAL_SRAV:
955 /* rd = rt >> (rs&31) (arithmetic) eax = ebx >> (eax&31) */
956 /* xchg ebx,eax, then we can do eax = eax >> (ebx&31) */
957 /* 93 xchg %eax,%ebx */
958 /* 89 d9 mov %ebx,%ecx */
959 /* 83 e1 1f and $0x1f,%ecx */
960 /* d3 f8 sar %cl,%eax */
961 *a++ = 0x93;
962 *a++ = 0x89; *a++ = 0xd9;
963 *a++ = 0x83; *a++ = 0xe1; *a++ = 0x1f;
964 *a++ = 0xd3; *a++ = 0xf8;
965 /* 99 cltd */
966 *a++ = 0x99;
967 break;
968 case SPECIAL_MULT:
969 case SPECIAL_MULTU:
970 /* 57 push %edi */
971 *a++ = 0x57;
972 if (instruction_type == SPECIAL_MULT) {
973 /* f7 eb imul %ebx */
974 *a++ = 0xf7; *a++ = 0xeb;
975 } else {
976 /* f7 e3 mul %ebx */
977 *a++ = 0xf7; *a++ = 0xe3;
978 }
979 /* here: edx:eax = hi:lo */
980 /* 89 d7 mov %edx,%edi */
981 /* 99 cltd */
982 *a++ = 0x89; *a++ = 0xd7;
983 *a++ = 0x99;
984 /* here: edi=hi, edx:eax = sign-extended lo */
985 store_eax_edx(&a, &dummy_cpu.cd.mips.lo);
986 /* 89 f8 mov %edi,%eax */
987 /* 99 cltd */
988 *a++ = 0x89; *a++ = 0xf8;
989 *a++ = 0x99;
990 /* here: edx:eax = sign-extended hi */
991 store_eax_edx(&a, &dummy_cpu.cd.mips.hi);
992 /* 5f pop %edi */
993 *a++ = 0x5f;
994 do_store = 0;
995 break;
996 case SPECIAL_DIV:
997 case SPECIAL_DIVU:
998 /*
999 * In: edx:eax = rs, ecx:ebx = rt
1000 * Out: LO = rs / rt, HI = rs % rt
1001 */
1002 /* Division by zero on MIPS is undefined, but on
1003 i386 it causes an exception, so we'll try to
1004 avoid that. */
1005 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x00; /* cmp $0x0,%ebx */
1006 *a++ = 0x75; *a++ = 0x01; /* jne skip_inc */
1007 *a++ = 0x43; /* inc %ebx */
1008
1009 /* 57 push %edi */
1010 *a++ = 0x57;
1011 if (instruction_type == SPECIAL_DIV) {
1012 *a++ = 0x99; /* cltd */
1013 *a++ = 0xf7; *a++ = 0xfb; /* idiv %ebx */
1014 } else {
1015 *a++ = 0x29; *a++ = 0xd2; /* sub %edx,%edx */
1016 *a++ = 0xf7; *a++ = 0xf3; /* div %ebx */
1017 }
1018 /* here: edx:eax = hi:lo */
1019 /* 89 d7 mov %edx,%edi */
1020 /* 99 cltd */
1021 *a++ = 0x89; *a++ = 0xd7;
1022 *a++ = 0x99;
1023 /* here: edi=hi, edx:eax = sign-extended lo */
1024 store_eax_edx(&a, &dummy_cpu.cd.mips.lo);
1025 /* 89 f8 mov %edi,%eax */
1026 /* 99 cltd */
1027 *a++ = 0x89; *a++ = 0xf8;
1028 *a++ = 0x99;
1029 /* here: edx:eax = sign-extended hi */
1030 store_eax_edx(&a, &dummy_cpu.cd.mips.hi);
1031 /* 5f pop %edi */
1032 *a++ = 0x5f;
1033 do_store = 0;
1034 break;
1035 #if 0
1036 /* TODO: These are from bintrans_alpha.c. Translate them to i386. */
1037
1038 case SPECIAL_DSLL:
1039 *a++ = 0x21; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
1040 break;
1041 case SPECIAL_DSLL32:
1042 sa += 32;
1043 *a++ = 0x21; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
1044 break;
1045 case SPECIAL_DSRA:
1046 *a++ = 0x81; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
1047 break;
1048 case SPECIAL_DSRA32:
1049 sa += 32;
1050 *a++ = 0x81; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
1051 break;
1052 case SPECIAL_DSRL:
1053 /* Note: bits of sa are distributed among two different bytes. */
1054 *a++ = 0x81; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
1055 break;
1056 case SPECIAL_DSRL32:
1057 /* Note: bits of sa are distributed among two different bytes. */
1058 sa += 32;
1059 *a++ = 0x81; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
1060 break;
1061 #endif
1062 }
1063
1064 if (do_store)
1065 store_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rd]);
1066
1067 *addrp = a;
1068 rd0:
1069 bintrans_write_pc_inc(addrp);
1070 return 1;
1071 }
1072
1073
1074 /*
1075 * bintrans_write_instruction__mfc_mtc():
1076 */
1077 static int bintrans_write_instruction__mfc_mtc(struct memory *mem,
1078 unsigned char **addrp, int coproc_nr, int flag64bit, int rt,
1079 int rd, int mtcflag)
1080 {
1081 unsigned char *a, *failskip;
1082 int ofs;
1083
1084 if (mtcflag && flag64bit) {
1085 /* dmtc */
1086 return 0;
1087 }
1088
1089 /*
1090 * NOTE: Only a few registers are readable without side effects.
1091 */
1092 if (rt == 0 && !mtcflag)
1093 return 0;
1094
1095 if (coproc_nr >= 1)
1096 return 0;
1097
1098 if (rd == COP0_RANDOM || rd == COP0_COUNT)
1099 return 0;
1100
1101 a = *addrp;
1102
1103 /*************************************************************
1104 *
1105 * TODO: Check for kernel mode, or Coproc X usability bit!
1106 *
1107 *************************************************************/
1108
1109 /* 8b 96 3c 30 00 00 mov 0x303c(%esi),%edx */
1110 ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
1111 *a++ = 0x8b; *a++ = 0x96;
1112 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1113
1114 /* here, edx = cpu->coproc[0] */
1115
1116 if (mtcflag) {
1117 /* mtc */
1118
1119 /* TODO: This code only works for mtc0, not dmtc0 */
1120
1121 /* 8b 9a 38 30 00 00 mov 0x3038(%edx),%ebx */
1122 ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc;
1123 *a++ = 0x8b; *a++ = 0x9a;
1124 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1125
1126 load_into_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
1127
1128 /*
1129 * Here: eax contains the value in register rt,
1130 * ebx contains the coproc register rd value.
1131 *
1132 * In the general case, only allow mtc if it does not
1133 * change the coprocessor register!
1134 */
1135
1136 switch (rd) {
1137
1138 case COP0_INDEX:
1139 break;
1140
1141 case COP0_ENTRYLO0:
1142 case COP0_ENTRYLO1:
1143 /* TODO: Not all bits are writable! */
1144 break;
1145
1146 case COP0_EPC:
1147 break;
1148
1149 case COP0_STATUS:
1150 /* Only allow updates to the status register if
1151 the interrupt enable bits were changed, but no
1152 other bits! */
1153 /* 89 c1 mov %eax,%ecx */
1154 /* 89 da mov %ebx,%edx */
1155 /* 81 e1 00 00 e7 0f and $0x0fe70000,%ecx */
1156 /* 81 e2 00 00 e7 0f and $0x0fe70000,%edx */
1157 /* 39 ca cmp %ecx,%edx */
1158 /* 74 01 je <ok> */
1159 *a++ = 0x89; *a++ = 0xc1;
1160 *a++ = 0x89; *a++ = 0xda;
1161 *a++ = 0x81; *a++ = 0xe1; *a++ = 0x00; *a++ = 0x00;
1162 if (mem->bintrans_32bit_only) {
1163 *a++ = 0xe7; *a++ = 0x0f;
1164 } else {
1165 *a++ = 0xff; *a++ = 0xff;
1166 }
1167 *a++ = 0x81; *a++ = 0xe2; *a++ = 0x00; *a++ = 0x00;
1168 if (mem->bintrans_32bit_only) {
1169 *a++ = 0xe7; *a++ = 0x0f;
1170 } else {
1171 *a++ = 0xff; *a++ = 0xff;
1172 }
1173 *a++ = 0x39; *a++ = 0xca;
1174 *a++ = 0x74; failskip = a; *a++ = 0x00;
1175 bintrans_write_chunkreturn_fail(&a);
1176 *failskip = (size_t)a - (size_t)failskip - 1;
1177
1178 /* Only allow the update if it would NOT cause
1179 an interrupt exception: */
1180
1181 /* 8b 96 3c 30 00 00 mov 0x303c(%esi),%edx */
1182 ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
1183 *a++ = 0x8b; *a++ = 0x96;
1184 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1185
1186 /* 8b 9a 38 30 00 00 mov 0x3038(%edx),%ebx */
1187 ofs = ((size_t)&dummy_coproc.reg[COP0_CAUSE]) - (size_t)&dummy_coproc;
1188 *a++ = 0x8b; *a++ = 0x9a;
1189 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1190
1191 /* 21 c3 and %eax,%ebx */
1192 /* 81 e3 00 ff 00 00 and $0xff00,%ebx */
1193 /* 83 fb 00 cmp $0x0,%ebx */
1194 /* 74 01 je <ok> */
1195 *a++ = 0x21; *a++ = 0xc3;
1196 *a++ = 0x81; *a++ = 0xe3; *a++ = 0x00;
1197 *a++ = 0xff; *a++ = 0x00; *a++ = 0x00;
1198 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x00;
1199 *a++ = 0x74; failskip = a; *a++ = 0x00;
1200 bintrans_write_chunkreturn_fail(&a);
1201 *failskip = (size_t)a - (size_t)failskip - 1;
1202
1203 break;
1204
1205 default:
1206 /* 39 d8 cmp %ebx,%eax */
1207 /* 74 01 je <ok> */
1208 *a++ = 0x39; *a++ = 0xd8;
1209 *a++ = 0x74; failskip = a; *a++ = 0x00;
1210 bintrans_write_chunkreturn_fail(&a);
1211 *failskip = (size_t)a - (size_t)failskip - 1;
1212 }
1213
1214 /* 8b 96 3c 30 00 00 mov 0x303c(%esi),%edx */
1215 ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
1216 *a++ = 0x8b; *a++ = 0x96;
1217 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1218
1219 /* 8d 9a 38 30 00 00 lea 0x3038(%edx),%ebx */
1220 ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc;
1221 *a++ = 0x8d; *a++ = 0x9a;
1222 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1223
1224 /* Sign-extend eax into edx:eax, and store it in
1225 coprocessor register rd: */
1226 /* 99 cltd */
1227 *a++ = 0x99;
1228
1229 /* 89 03 mov %eax,(%ebx) */
1230 /* 89 53 04 mov %edx,0x4(%ebx) */
1231 *a++ = 0x89; *a++ = 0x03;
1232 *a++ = 0x89; *a++ = 0x53; *a++ = 0x04;
1233 } else {
1234 /* mfc */
1235
1236 /* 8b 82 38 30 00 00 mov 0x3038(%edx),%eax */
1237 ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc;
1238 *a++ = 0x8b; *a++ = 0x82;
1239 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1240
1241 if (flag64bit) {
1242 /* Load high 32 bits: (note: edx gets overwritten) */
1243 /* 8b 92 3c 30 00 00 mov 0x303c(%edx),%edx */
1244 ofs += 4;
1245 *a++ = 0x8b; *a++ = 0x92;
1246 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1247 } else {
1248 /* 99 cltd */
1249 *a++ = 0x99;
1250 }
1251
1252 store_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
1253 }
1254
1255 *addrp = a;
1256 bintrans_write_pc_inc(addrp);
1257 return 1;
1258 }
1259
1260
1261 /*
1262 * bintrans_write_instruction__branch():
1263 */
1264 static int bintrans_write_instruction__branch(unsigned char **addrp,
1265 int instruction_type, int regimm_type, int rt, int rs, int imm)
1266 {
1267 unsigned char *a;
1268 unsigned char *skip1 = NULL, *skip2 = NULL;
1269 int ofs, likely = 0;
1270
1271 switch (instruction_type) {
1272 case HI6_BEQL:
1273 case HI6_BNEL:
1274 case HI6_BLEZL:
1275 case HI6_BGTZL:
1276 likely = 1;
1277 }
1278
1279 /* TODO: See the Alpha backend on how these could be implemented: */
1280 if (likely)
1281 return 0;
1282
1283 a = *addrp;
1284
1285 /*
1286 * edx:eax = gpr[rs]; ecx:ebx = gpr[rt];
1287 *
1288 * Compare for equality (BEQ).
1289 * If the result was zero, then it means equality; perform the
1290 * delayed jump. Otherwise: skip.
1291 */
1292
1293 switch (instruction_type) {
1294 case HI6_BEQ:
1295 case HI6_BNE:
1296 load_into_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
1297 /* 89 c3 mov %eax,%ebx */
1298 /* 89 d1 mov %edx,%ecx */
1299 *a++ = 0x89; *a++ = 0xc3; *a++ = 0x89; *a++ = 0xd1;
1300 }
1301 load_into_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
1302
1303 if (instruction_type == HI6_BEQ && rt != rs) {
1304 /* If rt != rs, then skip. */
1305 /* 39 c3 cmp %eax,%ebx */
1306 /* 75 05 jne 155 <skip> */
1307 /* 39 d1 cmp %edx,%ecx */
1308 /* 75 01 jne 155 <skip> */
1309 *a++ = 0x39; *a++ = 0xc3;
1310 *a++ = 0x75; skip1 = a; *a++ = 0x00;
1311 #if 0
1312 if (!bintrans_32bit_only)
1313 #endif
1314 {
1315 *a++ = 0x39; *a++ = 0xd1;
1316 *a++ = 0x75; skip2 = a; *a++ = 0x00;
1317 }
1318 }
1319
1320 if (instruction_type == HI6_BNE) {
1321 /* If rt != rs, then ok. Otherwise skip. */
1322 #if 0
1323 if (bintrans_32bit_only) {
1324 /* 39 c3 cmp %eax,%ebx */
1325 /* 74 xx je <skip> */
1326 *a++ = 0x39; *a++ = 0xc3;
1327 *a++ = 0x74; skip2 = a; *a++ = 0x00;
1328 } else
1329 #endif
1330 {
1331 /* 39 c3 cmp %eax,%ebx */
1332 /* 75 06 jne 156 <bra> */
1333 /* 39 d1 cmp %edx,%ecx */
1334 /* 75 02 jne 156 <bra> */
1335 /* eb 01 jmp 157 <skip> */
1336 *a++ = 0x39; *a++ = 0xc3;
1337 *a++ = 0x75; *a++ = 0x06;
1338 *a++ = 0x39; *a++ = 0xd1;
1339 *a++ = 0x75; *a++ = 0x02;
1340 *a++ = 0xeb; skip2 = a; *a++ = 0x00;
1341 }
1342 }
1343
1344 if (instruction_type == HI6_BLEZ) {
1345 /* If both eax and edx are zero, then do the branch. */
1346 /* 83 f8 00 cmp $0x0,%eax */
1347 /* 75 07 jne <nott> */
1348 /* 83 fa 00 cmp $0x0,%edx */
1349 /* 75 02 jne 23d <nott> */
1350 /* eb 01 jmp <branch> */
1351 *a++ = 0x83; *a++ = 0xf8; *a++ = 0x00;
1352 *a++ = 0x75; *a++ = 0x07;
1353 *a++ = 0x83; *a++ = 0xfa; *a++ = 0x00;
1354 *a++ = 0x75; *a++ = 0x02;
1355 *a++ = 0xeb; skip1 = a; *a++ = 0x00;
1356
1357 /* If high bit of edx is set, then rs < 0. */
1358 /* f7 c2 00 00 00 80 test $0x80000000,%edx */
1359 /* 74 00 jz skip */
1360 *a++ = 0xf7; *a++ = 0xc2; *a++ = 0; *a++ = 0; *a++ = 0; *a++ = 0x80;
1361 *a++ = 0x74; skip2 = a; *a++ = 0x00;
1362
1363 if (skip1 != NULL)
1364 *skip1 = (size_t)a - (size_t)skip1 - 1;
1365 skip1 = NULL;
1366 }
1367 if (instruction_type == HI6_BGTZ) {
1368 /* If both eax and edx are zero, then skip the branch. */
1369 /* 83 f8 00 cmp $0x0,%eax */
1370 /* 75 07 jne <nott> */
1371 /* 83 fa 00 cmp $0x0,%edx */
1372 /* 75 02 jne 23d <nott> */
1373 /* eb 01 jmp <skip> */
1374 *a++ = 0x83; *a++ = 0xf8; *a++ = 0x00;
1375 *a++ = 0x75; *a++ = 0x07;
1376 *a++ = 0x83; *a++ = 0xfa; *a++ = 0x00;
1377 *a++ = 0x75; *a++ = 0x02;
1378 *a++ = 0xeb; skip1 = a; *a++ = 0x00;
1379
1380 /* If high bit of edx is set, then rs < 0. */
1381 /* f7 c2 00 00 00 80 test $0x80000000,%edx */
1382 /* 75 00 jnz skip */
1383 *a++ = 0xf7; *a++ = 0xc2; *a++ = 0; *a++ = 0; *a++ = 0; *a++ = 0x80;
1384 *a++ = 0x75; skip2 = a; *a++ = 0x00;
1385 }
1386 if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BLTZ) {
1387 /* If high bit of edx is set, then rs < 0. */
1388 /* f7 c2 00 00 00 80 test $0x80000000,%edx */
1389 /* 74 00 jz skip */
1390 *a++ = 0xf7; *a++ = 0xc2; *a++ = 0; *a++ = 0; *a++ = 0; *a++ = 0x80;
1391 *a++ = 0x74; skip2 = a; *a++ = 0x00;
1392 }
1393 if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BGEZ) {
1394 /* If high bit of edx is not set, then rs >= 0. */
1395 /* f7 c2 00 00 00 80 test $0x80000000,%edx */
1396 /* 75 00 jnz skip */
1397 *a++ = 0xf7; *a++ = 0xc2; *a++ = 0; *a++ = 0; *a++ = 0; *a++ = 0x80;
1398 *a++ = 0x75; skip2 = a; *a++ = 0x00;
1399 }
1400
1401 /*
1402 * Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED
1403 * and cpu->delay_jmpaddr = pc + 4 + (imm << 2).
1404 */
1405
1406 /* c7 86 38 30 00 00 01 00 00 00 movl $0x1,0x3038(%esi) */
1407 ofs = ((size_t)&dummy_cpu.cd.mips.delay_slot) - (size_t)&dummy_cpu;
1408 *a++ = 0xc7; *a++ = 0x86;
1409 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1410 *a++ = TO_BE_DELAYED; *a++ = 0; *a++ = 0; *a++ = 0;
1411
1412 load_pc_into_eax_edx(&a);
1413
1414 /* 05 78 56 34 12 add $0x12345678,%eax */
1415 /* 83 d2 00 adc $0x0,%edx */
1416 /* or */
1417 /* 83 d2 ff adc $0xffffffff,%edx */
1418 imm = (imm << 2) + 4;
1419 *a++ = 0x05; *a++ = imm; *a++ = imm >> 8; *a++ = imm >> 16; *a++ = imm >> 24;
1420 if (imm >= 0) {
1421 *a++ = 0x83; *a++ = 0xd2; *a++ = 0x00;
1422 } else {
1423 *a++ = 0x83; *a++ = 0xd2; *a++ = 0xff;
1424 }
1425 store_eax_edx(&a, &dummy_cpu.cd.mips.delay_jmpaddr);
1426
1427 if (skip1 != NULL)
1428 *skip1 = (size_t)a - (size_t)skip1 - 1;
1429 if (skip2 != NULL)
1430 *skip2 = (size_t)a - (size_t)skip2 - 1;
1431
1432 *addrp = a;
1433 bintrans_write_pc_inc(addrp);
1434 return 1;
1435 }
1436
1437
1438 /*
1439 * bintrans_write_instruction__delayedbranch():
1440 */
1441 static int bintrans_write_instruction__delayedbranch(struct memory *mem,
1442 unsigned char **addrp, uint32_t *potential_chunk_p, uint32_t *chunks,
1443 int only_care_about_chunk_p, int p, int forward)
1444 {
1445 unsigned char *a, *skip=NULL, *failskip;
1446 int ofs;
1447 uint32_t i386_addr;
1448
1449 a = *addrp;
1450
1451 if (only_care_about_chunk_p)
1452 goto try_chunk_p;
1453
1454 /* Skip all of this if there is no branch: */
1455 ofs = ((size_t)&dummy_cpu.cd.mips.delay_slot) - (size_t)&dummy_cpu;
1456
1457 /* 8b 86 38 30 00 00 mov 0x3038(%esi),%eax */
1458 *a++ = 0x8b; *a++ = 0x86;
1459 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1460
1461 /* 83 f8 00 cmp $0x0,%eax */
1462 /* 74 01 je 16b <skippa> */
1463 *a++ = 0x83; *a++ = 0xf8; *a++ = 0x00;
1464 *a++ = 0x74; skip = a; *a++ = 0;
1465
1466 /*
1467 * Perform the jump by setting cpu->delay_slot = 0
1468 * and pc = cpu->delay_jmpaddr.
1469 */
1470
1471 /* c7 86 38 30 00 00 00 00 00 00 movl $0x0,0x3038(%esi) */
1472 ofs = ((size_t)&dummy_cpu.cd.mips.delay_slot) - (size_t)&dummy_cpu;
1473 *a++ = 0xc7; *a++ = 0x86;
1474 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1475 *a++ = 0; *a++ = 0; *a++ = 0; *a++ = 0;
1476
1477 /* REMEMBER old pc: */
1478 load_pc_into_eax_edx(&a);
1479 /* 89 c3 mov %eax,%ebx */
1480 /* 89 d1 mov %edx,%ecx */
1481 *a++ = 0x89; *a++ = 0xc3;
1482 *a++ = 0x89; *a++ = 0xd1;
1483 load_into_eax_edx(&a, &dummy_cpu.cd.mips.delay_jmpaddr);
1484 store_eax_edx_into_pc(&a);
1485
1486 try_chunk_p:
1487
1488 if (potential_chunk_p == NULL) {
1489 if (mem->bintrans_32bit_only) {
1490 #if 1
1491 /* 8b 86 78 56 34 12 mov 0x12345678(%esi),%eax */
1492 /* ff e0 jmp *%eax */
1493 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc) - (size_t)&dummy_cpu;
1494 *a++ = 0x8b; *a++ = 0x86;
1495 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1496 *a++ = 0xff; *a++ = 0xe0;
1497
1498 #else
1499 /* Don't execute too many instructions. */
1500 /* 81 fd f0 1f 00 00 cmpl $0x1ff0,%ebp */
1501 /* 7c 01 jl <okk> */
1502 /* c3 ret */
1503 *a++ = 0x81; *a++ = 0xfd;
1504 *a++ = (N_SAFE_BINTRANS_LIMIT-1) & 255;
1505 *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8) & 255; *a++ = 0; *a++ = 0;
1506 *a++ = 0x7c; failskip = a; *a++ = 0x01;
1507 bintrans_write_chunkreturn_fail(&a);
1508 *failskip = (size_t)a - (size_t)failskip - 1;
1509
1510 /*
1511 * ebx = ((vaddr >> 22) & 1023) * sizeof(void *)
1512 *
1513 * 89 c3 mov %eax,%ebx
1514 * c1 eb 14 shr $20,%ebx
1515 * 81 e3 fc 0f 00 00 and $0xffc,%ebx
1516 */
1517 *a++ = 0x89; *a++ = 0xc3;
1518 *a++ = 0xc1; *a++ = 0xeb; *a++ = 0x14;
1519 *a++ = 0x81; *a++ = 0xe3; *a++ = 0xfc; *a++ = 0x0f; *a++ = 0; *a++ = 0;
1520
1521 /*
1522 * ecx = vaddr_to_hostaddr_table0
1523 *
1524 * 8b 8e 34 12 00 00 mov 0x1234(%esi),%ecx
1525 */
1526 ofs = ((size_t)&dummy_cpu.cd.mips.vaddr_to_hostaddr_table0) - (size_t)&dummy_cpu;
1527 *a++ = 0x8b; *a++ = 0x8e;
1528 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1529
1530 /*
1531 * ecx = vaddr_to_hostaddr_table0[a]
1532 *
1533 * 8b 0c 19 mov (%ecx,%ebx),%ecx
1534 */
1535 *a++ = 0x8b; *a++ = 0x0c; *a++ = 0x19;
1536
1537 /*
1538 * ebx = ((vaddr >> 12) & 1023) * sizeof(void *)
1539 *
1540 * 89 c3 mov %eax,%ebx
1541 * c1 eb 0a shr $10,%ebx
1542 * 81 e3 fc 0f 00 00 and $0xffc,%ebx
1543 */
1544 *a++ = 0x89; *a++ = 0xc3;
1545 *a++ = 0xc1; *a++ = 0xeb; *a++ = 0x0a;
1546 *a++ = 0x81; *a++ = 0xe3; *a++ = 0xfc; *a++ = 0x0f; *a++ = 0; *a++ = 0;
1547
1548 /*
1549 * ecx = vaddr_to_hostaddr_table0[a][b].cd.mips.chunks
1550 *
1551 * 8b 8c 19 56 34 12 00 mov 0x123456(%ecx,%ebx,1),%ecx
1552 */
1553 ofs = (size_t)&dummy_vth32_table.cd.mips.bintrans_chunks[0]
1554 - (size_t)&dummy_vth32_table;
1555
1556 *a++ = 0x8b; *a++ = 0x8c; *a++ = 0x19;
1557 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1558
1559 /*
1560 * ecx = NULL? Then return with failure.
1561 *
1562 * 83 f9 00 cmp $0x0,%ecx
1563 * 75 01 jne <okzzz>
1564 */
1565 *a++ = 0x83; *a++ = 0xf9; *a++ = 0x00;
1566 *a++ = 0x75; fail = a; *a++ = 0x00;
1567 bintrans_write_chunkreturn(&a);
1568 *fail = (size_t)a - (size_t)fail - 1;
1569
1570 /*
1571 * 25 fc 0f 00 00 and $0xffc,%eax
1572 * 01 c1 add %eax,%ecx
1573 *
1574 * 8b 01 mov (%ecx),%eax
1575 *
1576 * 83 f8 00 cmp $0x0,%eax
1577 * 75 01 jne <ok>
1578 * c3 ret
1579 */
1580 *a++ = 0x25; *a++ = 0xfc; *a++ = 0x0f; *a++ = 0; *a++ = 0;
1581 *a++ = 0x01; *a++ = 0xc1;
1582
1583 *a++ = 0x8b; *a++ = 0x01;
1584
1585 *a++ = 0x83; *a++ = 0xf8; *a++ = 0x00;
1586 *a++ = 0x75; fail = a; *a++ = 0x01;
1587 bintrans_write_chunkreturn(&a);
1588 *fail = (size_t)a - (size_t)fail - 1;
1589
1590 /* 03 86 78 56 34 12 add 0x12345678(%esi),%eax */
1591 /* ff e0 jmp *%eax */
1592 ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu;
1593 *a++ = 0x03; *a++ = 0x86;
1594 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1595 *a++ = 0xff; *a++ = 0xe0;
1596 #endif
1597 } else {
1598 /* Not much we can do here if this wasn't to the same physical page... */
1599
1600 /* Don't execute too many instructions. */
1601 /* 81 fd f0 1f 00 00 cmpl $0x1ff0,%ebp */
1602 /* 7c 01 jl <okk> */
1603 /* c3 ret */
1604 *a++ = 0x81; *a++ = 0xfd;
1605 *a++ = (N_SAFE_BINTRANS_LIMIT-1) & 255;
1606 *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8) & 255; *a++ = 0; *a++ = 0;
1607 *a++ = 0x7c; failskip = a; *a++ = 0x01;
1608 bintrans_write_chunkreturn_fail(&a);
1609 *failskip = (size_t)a - (size_t)failskip - 1;
1610
1611 /*
1612 * Compare the old pc (ecx:ebx) and the new pc (edx:eax). If they are on the
1613 * same virtual page (which means that they are on the same physical
1614 * page), then we can check the right chunk pointer, and if it
1615 * is non-NULL, then we can jump there. Otherwise just return.
1616 */
1617
1618 /* Subtract 4 from the old pc first. (This is where the jump originated from.) */
1619 /* 83 eb 04 sub $0x4,%ebx */
1620 /* 83 d9 00 sbb $0x0,%ecx */
1621 *a++ = 0x83; *a++ = 0xeb; *a++ = 0x04;
1622 *a++ = 0x83; *a++ = 0xd9; *a++ = 0x00;
1623
1624 /* 39 d1 cmp %edx,%ecx */
1625 /* 74 01 je 1b9 <ok2> */
1626 /* c3 ret */
1627 *a++ = 0x39; *a++ = 0xd1;
1628 *a++ = 0x74; *a++ = 0x01;
1629 *a++ = 0xc3;
1630
1631 /* Remember new pc: */
1632 /* 89 c1 mov %eax,%ecx */
1633 *a++ = 0x89; *a++ = 0xc1;
1634
1635 /* 81 e3 00 f0 ff ff and $0xfffff000,%ebx */
1636 /* 25 00 f0 ff ff and $0xfffff000,%eax */
1637 *a++ = 0x81; *a++ = 0xe3; *a++ = 0x00; *a++ = 0xf0; *a++ = 0xff; *a++ = 0xff;
1638 *a++ = 0x25; *a++ = 0x00; *a++ = 0xf0; *a++ = 0xff; *a++ = 0xff;
1639
1640 /* 39 c3 cmp %eax,%ebx */
1641 /* 74 01 je <ok1> */
1642 /* c3 ret */
1643 *a++ = 0x39; *a++ = 0xc3;
1644 *a++ = 0x74; *a++ = 0x01;
1645 *a++ = 0xc3;
1646
1647 /* 81 e1 ff 0f 00 00 and $0xfff,%ecx */
1648 *a++ = 0x81; *a++ = 0xe1; *a++ = 0xff; *a++ = 0x0f; *a++ = 0; *a++ = 0;
1649
1650 /* 8b 81 78 56 34 12 mov 0x12345678(%ecx),%eax */
1651 ofs = (size_t)chunks;
1652 *a++ = 0x8b; *a++ = 0x81; *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1653
1654 /* 83 f8 00 cmp $0x0,%eax */
1655 /* 75 01 jne 1cd <okjump> */
1656 /* c3 ret */
1657 *a++ = 0x83; *a++ = 0xf8; *a++ = 0x00;
1658 *a++ = 0x75; *a++ = 0x01;
1659 *a++ = 0xc3;
1660
1661 /* 03 86 78 56 34 12 add 0x12345678(%esi),%eax */
1662 /* ff e0 jmp *%eax */
1663 ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu;
1664 *a++ = 0x03; *a++ = 0x86;
1665 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1666 *a++ = 0xff; *a++ = 0xe0;
1667 }
1668 } else {
1669 /*
1670 * Just to make sure that we don't become too unreliant
1671 * on the main program loop, we need to return every once
1672 * in a while (interrupts etc).
1673 *
1674 * Load the "nr of instructions executed" (which is an int)
1675 * and see if it is below a certain threshold. If so, then
1676 * we go on with the fast path (bintrans), otherwise we
1677 * abort by returning.
1678 */
1679 /* 81 fd f0 1f 00 00 cmpl $0x1ff0,%ebp */
1680 /* 7c 01 jl <okk> */
1681 /* c3 ret */
1682 if (!only_care_about_chunk_p && !forward) {
1683 *a++ = 0x81; *a++ = 0xfd;
1684 *a++ = (N_SAFE_BINTRANS_LIMIT-1) & 255;
1685 *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8) & 255; *a++ = 0; *a++ = 0;
1686 *a++ = 0x7c; failskip = a; *a++ = 0x01;
1687 bintrans_write_chunkreturn_fail(&a);
1688 *failskip = (size_t)a - (size_t)failskip - 1;
1689 }
1690
1691 /*
1692 * potential_chunk_p points to an "uint32_t".
1693 * If this value is non-NULL, then it is a piece of i386
1694 * machine language code corresponding to the address
1695 * we're jumping to. Otherwise, those instructions haven't
1696 * been translated yet, so we have to return to the main
1697 * loop. (Actually, we have to add cpu->chunk_base_address.)
1698 *
1699 * Case 1: The value is non-NULL already at translation
1700 * time. Then we can make a direct (fast) native
1701 * i386 jump to the code chunk.
1702 *
1703 * Case 2: The value was NULL at translation time, then we
1704 * have to check during runtime.
1705 */
1706
1707 /* Case 1: */
1708 /* printf("%08x ", *potential_chunk_p); */
1709 i386_addr = *potential_chunk_p +
1710 (size_t)mem->translation_code_chunk_space;
1711 i386_addr = i386_addr - ((size_t)a + 5);
1712 if ((*potential_chunk_p) != 0) {
1713 *a++ = 0xe9;
1714 *a++ = i386_addr;
1715 *a++ = i386_addr >> 8;
1716 *a++ = i386_addr >> 16;
1717 *a++ = i386_addr >> 24;
1718 } else {
1719 /* Case 2: */
1720
1721 bintrans_register_potential_quick_jump(mem, a, p);
1722
1723 i386_addr = (size_t)potential_chunk_p;
1724
1725 /*
1726 * Load the chunk pointer into eax.
1727 * If it is NULL (zero), then skip the following jump.
1728 * Add chunk_base_address to eax, and jump to eax.
1729 */
1730
1731 /* a1 78 56 34 12 mov 0x12345678,%eax */
1732 /* 83 f8 00 cmp $0x0,%eax */
1733 /* 75 01 jne <okaa> */
1734 /* c3 ret */
1735 *a++ = 0xa1;
1736 *a++ = i386_addr; *a++ = i386_addr >> 8;
1737 *a++ = i386_addr >> 16; *a++ = i386_addr >> 24;
1738 *a++ = 0x83; *a++ = 0xf8; *a++ = 0x00;
1739 *a++ = 0x75; *a++ = 0x01;
1740 *a++ = 0xc3;
1741
1742 /* 03 86 78 56 34 12 add 0x12345678(%esi),%eax */
1743 /* ff e0 jmp *%eax */
1744 ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu;
1745 *a++ = 0x03; *a++ = 0x86;
1746 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
1747 *a++ = 0xff; *a++ = 0xe0;
1748 }
1749 }
1750
1751 if (skip != NULL)
1752 *skip = (size_t)a - (size_t)skip - 1;
1753
1754 *addrp = a;
1755 return 1;
1756 }
1757
1758
1759 /*
1760 * bintrans_write_instruction__loadstore():
1761 */
1762 static int bintrans_write_instruction__loadstore(struct memory *mem,
1763 unsigned char **addrp, int rt, int imm, int rs,
1764 int instruction_type, int bigendian)
1765 {
1766 unsigned char *a, *retfail, *generic64bit, *doloadstore,
1767 *okret0, *okret1, *okret2, *skip;
1768 int ofs, alignment, load=0, unaligned=0;
1769
1770 /* TODO: Not yet: */
1771 if (instruction_type == HI6_LQ_MDMX || instruction_type == HI6_SQ)
1772 return 0;
1773
1774 /* TODO: Not yet: */
1775 if (bigendian)
1776 return 0;
1777
1778 switch (instruction_type) {
1779 case HI6_LQ_MDMX:
1780 case HI6_LDL:
1781 case HI6_LDR:
1782 case HI6_LD:
1783 case HI6_LWU:
1784 case HI6_LWL:
1785 case HI6_LWR:
1786 case HI6_LW:
1787 case HI6_LHU:
1788 case HI6_LH:
1789 case HI6_LBU:
1790 case HI6_LB:
1791 load = 1;
1792 if (rt == 0)
1793 return 0;
1794 }
1795
1796 switch (instruction_type) {
1797 case HI6_LWL:
1798 case HI6_LWR:
1799 case HI6_LDL:
1800 case HI6_LDR:
1801 case HI6_SWL:
1802 case HI6_SWR:
1803 case HI6_SDL:
1804 case HI6_SDR:
1805 unaligned = 1;
1806 }
1807
1808 a = *addrp;
1809
1810 if (mem->bintrans_32bit_only)
1811 load_into_eax_dont_care_about_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
1812 else
1813 load_into_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
1814
1815 if (imm & 0x8000) {
1816 /* 05 34 f2 ff ff add $0xfffff234,%eax */
1817 /* 83 d2 ff adc $0xffffffff,%edx */
1818 *a++ = 5;
1819 *a++ = imm; *a++ = imm >> 8; *a++ = 0xff; *a++ = 0xff;
1820 if (!mem->bintrans_32bit_only) {
1821 *a++ = 0x83; *a++ = 0xd2; *a++ = 0xff;
1822 }
1823 } else {
1824 /* 05 34 12 00 00 add $0x1234,%eax */
1825 /* 83 d2 00 adc $0x0,%edx */
1826 *a++ = 5;
1827 *a++ = imm; *a++ = imm >> 8; *a++ = 0; *a++ = 0;
1828 if (!mem->bintrans_32bit_only) {
1829 *a++ = 0x83; *a++ = 0xd2; *a++ = 0;
1830 }
1831 }
1832
1833 alignment = 0;
1834 switch (instruction_type) {
1835 case HI6_LQ_MDMX:
1836 case HI6_SQ:
1837 alignment = 15;
1838 break;
1839 case HI6_LD:
1840 case HI6_LDL:
1841 case HI6_LDR:
1842 case HI6_SD:
1843 case HI6_SDL:
1844 case HI6_SDR:
1845 alignment = 7;
1846 break;
1847 case HI6_LW:
1848 case HI6_LWL:
1849 case HI6_LWR:
1850 case HI6_LWU:
1851 case HI6_SW:
1852 case HI6_SWL:
1853 case HI6_SWR:
1854 alignment = 3;
1855 break;
1856 case HI6_LH:
1857 case HI6_LHU:
1858 case HI6_SH:
1859 alignment = 1;
1860 break;
1861 }
1862
1863 if (unaligned) {
1864 /*
1865 * Perform the actual load/store from an
1866 * aligned address.
1867 *
1868 * 83 e0 fc and $0xfffffffc,%eax
1869 */
1870 *a++ = 0x83; *a++ = 0xe0; *a++ = 0xff - alignment;
1871 } else if (alignment > 0) {
1872 unsigned char *alignskip;
1873 /*
1874 * Check alignment:
1875 *
1876 * 89 c3 mov %eax,%ebx
1877 * 83 e3 01 and $0x1,%ebx
1878 * 74 01 jz <ok>
1879 * c3 ret
1880 */
1881 *a++ = 0x89; *a++ = 0xc3;
1882 *a++ = 0x83; *a++ = 0xe3; *a++ = alignment;
1883 *a++ = 0x74; alignskip = a; *a++ = 0x00;
1884 bintrans_write_chunkreturn_fail(&a);
1885 *alignskip = (size_t)a - (size_t)alignskip - 1;
1886 }
1887
1888
1889 /* Here, edx:eax = vaddr */
1890
1891 if (mem->bintrans_32bit_only) {
1892 /* Call the quick lookup routine: */
1893 ofs = (size_t)bintrans_loadstore_32bit;
1894 ofs = ofs - ((size_t)a + 5);
1895 *a++ = 0xe8; *a++ = ofs; *a++ = ofs >> 8;
1896 *a++ = ofs >> 16; *a++ = ofs >> 24;
1897
1898 /*
1899 * ecx = NULL? Then return with failure.
1900 *
1901 * 83 f9 00 cmp $0x0,%ecx
1902 * 75 01 jne <okzzz>
1903 */
1904 *a++ = 0x83; *a++ = 0xf9; *a++ = 0x00;
1905 *a++ = 0x75; retfail = a; *a++ = 0x00;
1906 bintrans_write_chunkreturn_fail(&a); /* ret (and fail) */
1907 *retfail = (size_t)a - (size_t)retfail - 1;
1908
1909 /*
1910 * If the lowest bit is zero, and we're storing, then fail.
1911 */
1912 if (!load) {
1913 /*
1914 * f7 c1 01 00 00 00 test $0x1,%ecx
1915 * 75 01 jne <ok>
1916 */
1917 *a++ = 0xf7; *a++ = 0xc1; *a++ = 1; *a++ = 0; *a++ = 0; *a++ = 0;
1918 *a++ = 0x75; retfail = a; *a++ = 0x00;
1919 bintrans_write_chunkreturn_fail(&a); /* ret (and fail) */
1920 *retfail = (size_t)a - (size_t)retfail - 1;
1921 }
1922
1923 /*
1924 * eax = offset within page = vaddr & 0xfff
1925 *
1926 * 25 ff 0f 00 00 and $0xfff,%eax
1927 */
1928 *a++ = 0x25; *a++ = 0xff; *a++ = 0x0f; *a++ = 0; *a++ = 0;
1929
1930 /*
1931 * ecx = host address ( = host page + offset)
1932 *
1933 * 83 e1 fe and $0xfffffffe,%ecx clear the lowest bit
1934 * 01 c1 add %eax,%ecx
1935 */
1936 *a++ = 0x83; *a++ = 0xe1; *a++ = 0xfe;
1937 *a++ = 0x01; *a++ = 0xc1;
1938 } else {
1939 /*
1940 * If the load/store address has the top 32 bits set to
1941 * 0x00000000 or 0xffffffff, then we can use the 32-bit
1942 * lookup tables:
1943 *
1944
1945 TODO: top 33 bits!!!!!!!
1946
1947 * 83 fa 00 cmp $0x0,%edx
1948 * 74 05 je <ok32>
1949 * 83 fa ff cmp $0xffffffff,%edx
1950 * 75 01 jne <not32>
1951 */
1952 *a++ = 0x83; *a++ = 0xfa; *a++ = 0x00;
1953 *a++ = 0x74; *a++ = 0x05;
1954 *a++ = 0x83; *a++ = 0xfa; *a++ = 0xff;
1955 *a++ = 0x75; generic64bit = a; *a++ = 0x01;
1956
1957 /* Call the quick lookup routine: */
1958 ofs = (size_t)bintrans_loadstore_32bit;
1959 ofs = ofs - ((size_t)a + 5);
1960 *a++ = 0xe8; *a++ = ofs; *a++ = ofs >> 8;
1961 *a++ = ofs >> 16; *a++ = ofs >> 24;
1962
1963 /*
1964 * ecx = NULL? Then return with failure.
1965 *
1966 * 83 f9 00 cmp $0x0,%ecx
1967 * 75 01 jne <okzzz>
1968 */
1969 *a++ = 0x83; *a++ = 0xf9; *a++ = 0x00;
1970 *a++ = 0x75; retfail = a; *a++ = 0x00;
1971 bintrans_write_chunkreturn_fail(&a); /* ret (and fail) */
1972 *retfail = (size_t)a - (size_t)retfail - 1;
1973
1974 /*
1975 * If the lowest bit is zero, and we're storing, then fail.
1976 */
1977 if (!load) {
1978 /*
1979 * f7 c1 01 00 00 00 test $0x1,%ecx
1980 * 75 01 jne <ok>
1981 */
1982 *a++ = 0xf7; *a++ = 0xc1; *a++ = 1; *a++ = 0; *a++ = 0; *a++ = 0;
1983 *a++ = 0x75; retfail = a; *a++ = 0x00;
1984 bintrans_write_chunkreturn_fail(&a); /* ret (and fail) */
1985 *retfail = (size_t)a - (size_t)retfail - 1;
1986 }
1987
1988 /*
1989 * eax = offset within page = vaddr & 0xfff
1990 *
1991 * 25 ff 0f 00 00 and $0xfff,%eax
1992 */
1993 *a++ = 0x25; *a++ = 0xff; *a++ = 0x0f; *a++ = 0; *a++ = 0;
1994
1995 /*
1996 * ecx = host address ( = host page + offset)
1997 *
1998 * 83 e1 fe and $0xfffffffe,%ecx clear the lowest bit
1999 * 01 c1 add %eax,%ecx
2000 */
2001 *a++ = 0x83; *a++ = 0xe1; *a++ = 0xfe;
2002 *a++ = 0x01; *a++ = 0xc1;
2003
2004 *a++ = 0xeb; doloadstore = a; *a++ = 0x01;
2005
2006
2007 /* TODO: The stuff above is so similar to the pure 32-bit
2008 case that it should be factored out. */
2009
2010
2011 *generic64bit = (size_t)a - (size_t)generic64bit - 1;
2012
2013 /*
2014 * 64-bit generic case:
2015 */
2016
2017 /* push writeflag */
2018 *a++ = 0x6a; *a++ = load? 0 : 1;
2019
2020 /* push vaddr (edx:eax) */
2021 *a++ = 0x52; *a++ = 0x50;
2022
2023 /* push cpu (esi) */
2024 *a++ = 0x56;
2025
2026 /* eax = points to the right function */
2027 ofs = ((size_t)&dummy_cpu.cd.mips.fast_vaddr_to_hostaddr) - (size_t)&dummy_cpu;
2028 *a++ = 0x8b; *a++ = 0x86;
2029 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
2030
2031 /* ff d0 call *%eax */
2032 *a++ = 0xff; *a++ = 0xd0;
2033
2034 /* 83 c4 08 add $0x10,%esp */
2035 *a++ = 0x83; *a++ = 0xc4; *a++ = 0x10;
2036
2037 /* If eax is NULL, then return. */
2038 /* 83 f8 00 cmp $0x0,%eax */
2039 /* 75 01 jne 1cd <okjump> */
2040 /* c3 ret */
2041 *a++ = 0x83; *a++ = 0xf8; *a++ = 0x00;
2042 *a++ = 0x75; retfail = a; *a++ = 0x00;
2043 bintrans_write_chunkreturn_fail(&a); /* ret (and fail) */
2044 *retfail = (size_t)a - (size_t)retfail - 1;
2045
2046 /* 89 c1 mov %eax,%ecx */
2047 *a++ = 0x89; *a++ = 0xc1;
2048
2049 *doloadstore = (size_t)a - (size_t)doloadstore - 1;
2050 }
2051
2052
2053 if (!load) {
2054 if (alignment >= 7)
2055 load_into_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
2056 else
2057 load_into_eax_dont_care_about_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
2058 }
2059
2060 switch (instruction_type) {
2061 case HI6_LD:
2062 /* 8b 01 mov (%ecx),%eax */
2063 /* 8b 51 04 mov 0x4(%ecx),%edx */
2064 *a++ = 0x8b; *a++ = 0x01;
2065 *a++ = 0x8b; *a++ = 0x51; *a++ = 0x04;
2066 break;
2067 case HI6_LWU:
2068 /* 8b 01 mov (%ecx),%eax */
2069 /* 31 d2 xor %edx,%edx */
2070 *a++ = 0x8b; *a++ = 0x01;
2071 *a++ = 0x31; *a++ = 0xd2;
2072 break;
2073 case HI6_LW:
2074 /* 8b 01 mov (%ecx),%eax */
2075 /* 99 cltd */
2076 *a++ = 0x8b; *a++ = 0x01;
2077 *a++ = 0x99;
2078 break;
2079 case HI6_LHU:
2080 /* 31 c0 xor %eax,%eax */
2081 /* 66 8b 01 mov (%ecx),%ax */
2082 /* 99 cltd */
2083 *a++ = 0x31; *a++ = 0xc0;
2084 *a++ = 0x66; *a++ = 0x8b; *a++ = 0x01;
2085 *a++ = 0x99;
2086 break;
2087 case HI6_LH:
2088 /* 66 8b 01 mov (%ecx),%ax */
2089 /* 98 cwtl */
2090 /* 99 cltd */
2091 *a++ = 0x66; *a++ = 0x8b; *a++ = 0x01;
2092 *a++ = 0x98;
2093 *a++ = 0x99;
2094 break;
2095 case HI6_LBU:
2096 /* 31 c0 xor %eax,%eax */
2097 /* 8a 01 mov (%ecx),%al */
2098 /* 99 cltd */
2099 *a++ = 0x31; *a++ = 0xc0;
2100 *a++ = 0x8a; *a++ = 0x01;
2101 *a++ = 0x99;
2102 break;
2103 case HI6_LB:
2104 /* 8a 01 mov (%ecx),%al */
2105 /* 66 98 cbtw */
2106 /* 98 cwtl */
2107 /* 99 cltd */
2108 *a++ = 0x8a; *a++ = 0x01;
2109 *a++ = 0x66; *a++ = 0x98;
2110 *a++ = 0x98;
2111 *a++ = 0x99;
2112 break;
2113
2114 case HI6_LWL:
2115 load_into_eax_dont_care_about_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
2116 /* 05 34 f2 ff ff add $0xfffff234,%eax */
2117 *a++ = 5;
2118 *a++ = imm; *a++ = imm >> 8; *a++ = 0xff; *a++ = 0xff;
2119 /* 83 e0 03 and $0x03,%eax */
2120 *a++ = 0x83; *a++ = 0xe0; *a++ = alignment;
2121 /* 89 c3 mov %eax,%ebx */
2122 *a++ = 0x89; *a++ = 0xc3;
2123
2124 load_into_eax_dont_care_about_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
2125
2126 /* ALIGNED LOAD: */
2127 /* 8b 11 mov (%ecx),%edx */
2128 *a++ = 0x8b; *a++ = 0x11;
2129
2130 /*
2131 * CASE 0:
2132 * memory = 0x12 0x34 0x56 0x78
2133 * register after lwl: 0x12 0x.. 0x.. 0x..
2134 */
2135 /* 83 fb 00 cmp $0x0,%ebx */
2136 /* 75 01 jne <skip> */
2137 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x00;
2138 *a++ = 0x75; skip = a; *a++ = 0x01;
2139
2140 /* c1 e2 18 shl $0x18,%edx */
2141 /* 25 ff ff ff 00 and $0xffffff,%eax */
2142 /* 09 d0 or %edx,%eax */
2143 *a++ = 0xc1; *a++ = 0xe2; *a++ = 0x18;
2144 *a++ = 0x25; *a++ = 0xff; *a++ = 0xff; *a++ = 0xff; *a++ = 0x00;
2145 *a++ = 0x09; *a++ = 0xd0;
2146
2147 /* eb 00 jmp <okret> */
2148 *a++ = 0xeb; okret0 = a; *a++ = 0;
2149
2150 *skip = (size_t)a - (size_t)skip - 1;
2151
2152 /*
2153 * CASE 1:
2154 * memory = 0x12 0x34 0x56 0x78
2155 * register after lwl: 0x34 0x12 0x.. 0x..
2156 */
2157 /* 83 fb 01 cmp $0x1,%ebx */
2158 /* 75 01 jne <skip> */
2159 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x01;
2160 *a++ = 0x75; skip = a; *a++ = 0x01;
2161
2162 /* c1 e2 10 shl $0x10,%edx */
2163 /* 25 ff ff 00 00 and $0xffff,%eax */
2164 /* 09 d0 or %edx,%eax */
2165 *a++ = 0xc1; *a++ = 0xe2; *a++ = 0x10;
2166 *a++ = 0x25; *a++ = 0xff; *a++ = 0xff; *a++ = 0x00; *a++ = 0x00;
2167 *a++ = 0x09; *a++ = 0xd0;
2168
2169 /* eb 00 jmp <okret> */
2170 *a++ = 0xeb; okret1 = a; *a++ = 0;
2171
2172 *skip = (size_t)a - (size_t)skip - 1;
2173
2174 /*
2175 * CASE 2:
2176 * memory = 0x12 0x34 0x56 0x78
2177 * register after lwl: 0x56 0x34 0x12 0x..
2178 */
2179 /* 83 fb 02 cmp $0x2,%ebx */
2180 /* 75 01 jne <skip> */
2181 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x02;
2182 *a++ = 0x75; skip = a; *a++ = 0x01;
2183
2184 /* c1 e2 08 shl $0x08,%edx */
2185 /* 25 ff 00 00 00 and $0xff,%eax */
2186 /* 09 d0 or %edx,%eax */
2187 *a++ = 0xc1; *a++ = 0xe2; *a++ = 0x08;
2188 *a++ = 0x25; *a++ = 0xff; *a++ = 0x00; *a++ = 0x00; *a++ = 0x00;
2189 *a++ = 0x09; *a++ = 0xd0;
2190
2191 /* eb 00 jmp <okret> */
2192 *a++ = 0xeb; okret2 = a; *a++ = 0;
2193
2194 *skip = (size_t)a - (size_t)skip - 1;
2195
2196 /*
2197 * CASE 3:
2198 * memory = 0x12 0x34 0x56 0x78
2199 * register after lwl: 0x78 0x56 0x34 0x12
2200 */
2201 /* 89 d0 mov %edx,%eax */
2202 *a++ = 0x89; *a++ = 0xd0;
2203
2204 /* okret: */
2205 *okret0 = (size_t)a - (size_t)okret0 - 1;
2206 *okret1 = (size_t)a - (size_t)okret1 - 1;
2207 *okret2 = (size_t)a - (size_t)okret2 - 1;
2208
2209 /* 99 cltd */
2210 *a++ = 0x99;
2211 break;
2212
2213 case HI6_LWR:
2214 load_into_eax_dont_care_about_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
2215 /* 05 34 f2 ff ff add $0xfffff234,%eax */
2216 *a++ = 5;
2217 *a++ = imm; *a++ = imm >> 8; *a++ = 0xff; *a++ = 0xff;
2218 /* 83 e0 03 and $0x03,%eax */
2219 *a++ = 0x83; *a++ = 0xe0; *a++ = alignment;
2220 /* 89 c3 mov %eax,%ebx */
2221 *a++ = 0x89; *a++ = 0xc3;
2222
2223 load_into_eax_dont_care_about_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
2224
2225 /* ALIGNED LOAD: */
2226 /* 8b 11 mov (%ecx),%edx */
2227 *a++ = 0x8b; *a++ = 0x11;
2228
2229 /*
2230 * CASE 0:
2231 * memory = 0x12 0x34 0x56 0x78
2232 * register after lwr: 0x78 0x56 0x34 0x12
2233 */
2234 /* 83 fb 00 cmp $0x0,%ebx */
2235 /* 75 01 jne <skip> */
2236 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x00;
2237 *a++ = 0x75; skip = a; *a++ = 0x01;
2238
2239 /* 89 d0 mov %edx,%eax */
2240 *a++ = 0x89; *a++ = 0xd0;
2241
2242 /* eb 00 jmp <okret> */
2243 *a++ = 0xeb; okret0 = a; *a++ = 0;
2244
2245 *skip = (size_t)a - (size_t)skip - 1;
2246
2247 /*
2248 * CASE 1:
2249 * memory = 0x12 0x34 0x56 0x78
2250 * register after lwr: 0x.. 0x78 0x56 0x34
2251 */
2252 /* 83 fb 01 cmp $0x1,%ebx */
2253 /* 75 01 jne <skip> */
2254 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x01;
2255 *a++ = 0x75; skip = a; *a++ = 0x01;
2256
2257 /* c1 ea 08 shr $0x8,%edx */
2258 /* 25 00 00 00 ff and $0xff000000,%eax */
2259 /* 09 d0 or %edx,%eax */
2260 *a++ = 0xc1; *a++ = 0xea; *a++ = 0x08;
2261 *a++ = 0x25; *a++ = 0x00; *a++ = 0x00; *a++ = 0x00; *a++ = 0xff;
2262 *a++ = 0x09; *a++ = 0xd0;
2263
2264 /* eb 00 jmp <okret> */
2265 *a++ = 0xeb; okret1 = a; *a++ = 0;
2266
2267 *skip = (size_t)a - (size_t)skip - 1;
2268
2269 /*
2270 * CASE 2:
2271 * memory = 0x12 0x34 0x56 0x78
2272 * register after lwr: 0x.. 0x.. 0x78 0x56
2273 */
2274 /* 83 fb 02 cmp $0x2,%ebx */
2275 /* 75 01 jne <skip> */
2276 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x02;
2277 *a++ = 0x75; skip = a; *a++ = 0x01;
2278
2279 /* c1 ea 10 shr $0x10,%edx */
2280 /* 25 00 00 ff ff and $0xffff0000,%eax */
2281 /* 09 d0 or %edx,%eax */
2282 *a++ = 0xc1; *a++ = 0xea; *a++ = 0x10;
2283 *a++ = 0x25; *a++ = 0x00; *a++ = 0x00; *a++ = 0xff; *a++ = 0xff;
2284 *a++ = 0x09; *a++ = 0xd0;
2285
2286 /* eb 00 jmp <okret> */
2287 *a++ = 0xeb; okret2 = a; *a++ = 0;
2288
2289 *skip = (size_t)a - (size_t)skip - 1;
2290
2291 /*
2292 * CASE 3:
2293 * memory = 0x12 0x34 0x56 0x78
2294 * register after lwr: 0x.. 0x.. 0x.. 0x78
2295 */
2296 /* c1 ea 18 shr $0x18,%edx */
2297 /* 25 00 ff ff ff and $0xffffff00,%eax */
2298 /* 09 d0 or %edx,%eax */
2299 *a++ = 0xc1; *a++ = 0xea; *a++ = 0x18;
2300 *a++ = 0x25; *a++ = 0x00; *a++ = 0xff; *a++ = 0xff; *a++ = 0xff;
2301 *a++ = 0x09; *a++ = 0xd0;
2302
2303 /* okret: */
2304 *okret0 = (size_t)a - (size_t)okret0 - 1;
2305 *okret1 = (size_t)a - (size_t)okret1 - 1;
2306 *okret2 = (size_t)a - (size_t)okret2 - 1;
2307
2308 /* 99 cltd */
2309 *a++ = 0x99;
2310 break;
2311
2312 case HI6_SD:
2313 /* 89 01 mov %eax,(%ecx) */
2314 /* 89 51 04 mov %edx,0x4(%ecx) */
2315 *a++ = 0x89; *a++ = 0x01;
2316 *a++ = 0x89; *a++ = 0x51; *a++ = 0x04;
2317 break;
2318 case HI6_SW:
2319 /* 89 01 mov %eax,(%ecx) */
2320 *a++ = 0x89; *a++ = 0x01;
2321 break;
2322 case HI6_SH:
2323 /* 66 89 01 mov %ax,(%ecx) */
2324 *a++ = 0x66; *a++ = 0x89; *a++ = 0x01;
2325 break;
2326 case HI6_SB:
2327 /* 88 01 mov %al,(%ecx) */
2328 *a++ = 0x88; *a++ = 0x01;
2329 break;
2330
2331 case HI6_SWL:
2332 load_into_eax_dont_care_about_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
2333 /* 05 34 f2 ff ff add $0xfffff234,%eax */
2334 *a++ = 5;
2335 *a++ = imm; *a++ = imm >> 8; *a++ = 0xff; *a++ = 0xff;
2336 /* 83 e0 03 and $0x03,%eax */
2337 *a++ = 0x83; *a++ = 0xe0; *a++ = alignment;
2338 /* 89 c3 mov %eax,%ebx */
2339 *a++ = 0x89; *a++ = 0xc3;
2340
2341 load_into_eax_dont_care_about_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
2342
2343 /* ALIGNED LOAD: */
2344 /* 8b 11 mov (%ecx),%edx */
2345 *a++ = 0x8b; *a++ = 0x11;
2346
2347 /*
2348 * CASE 0:
2349 * memory (edx): 0x12 0x34 0x56 0x78
2350 * register (eax): 0x89abcdef
2351 * mem after swl: 0x89 0x.. 0x.. 0x..
2352 */
2353 /* 83 fb 00 cmp $0x0,%ebx */
2354 /* 75 01 jne <skip> */
2355 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x00;
2356 *a++ = 0x75; skip = a; *a++ = 0x01;
2357
2358 /* 81 e2 00 ff ff ff and $0xffffff00,%edx */
2359 /* c1 e8 18 shr $0x18,%eax */
2360 /* 09 d0 or %edx,%eax */
2361 *a++ = 0x81; *a++ = 0xe2; *a++ = 0x00; *a++ = 0xff; *a++ = 0xff; *a++ = 0xff;
2362 *a++ = 0xc1; *a++ = 0xe8; *a++ = 0x18;
2363 *a++ = 0x09; *a++ = 0xd0;
2364
2365 /* eb 00 jmp <okret> */
2366 *a++ = 0xeb; okret0 = a; *a++ = 0;
2367
2368 *skip = (size_t)a - (size_t)skip - 1;
2369
2370 /*
2371 * CASE 1:
2372 * memory (edx): 0x12 0x34 0x56 0x78
2373 * register (eax): 0x89abcdef
2374 * mem after swl: 0xab 0x89 0x.. 0x..
2375 */
2376 /* 83 fb 01 cmp $0x1,%ebx */
2377 /* 75 01 jne <skip> */
2378 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x01;
2379 *a++ = 0x75; skip = a; *a++ = 0x01;
2380
2381 /* 81 e2 00 00 ff ff and $0xffff0000,%edx */
2382 /* c1 e8 10 shr $0x10,%eax */
2383 /* 09 d0 or %edx,%eax */
2384 *a++ = 0x81; *a++ = 0xe2; *a++ = 0x00; *a++ = 0x00; *a++ = 0xff; *a++ = 0xff;
2385 *a++ = 0xc1; *a++ = 0xe8; *a++ = 0x10;
2386 *a++ = 0x09; *a++ = 0xd0;
2387
2388 /* eb 00 jmp <okret> */
2389 *a++ = 0xeb; okret1 = a; *a++ = 0;
2390
2391 *skip = (size_t)a - (size_t)skip - 1;
2392
2393 /*
2394 * CASE 2:
2395 * memory (edx): 0x12 0x34 0x56 0x78
2396 * register (eax): 0x89abcdef
2397 * mem after swl: 0xcd 0xab 0x89 0x..
2398 */
2399 /* 83 fb 02 cmp $0x2,%ebx */
2400 /* 75 01 jne <skip> */
2401 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x02;
2402 *a++ = 0x75; skip = a; *a++ = 0x01;
2403
2404 /* 81 e2 00 00 00 ff and $0xff000000,%edx */
2405 /* c1 e8 08 shr $0x08,%eax */
2406 /* 09 d0 or %edx,%eax */
2407 *a++ = 0x81; *a++ = 0xe2; *a++ = 0x00; *a++ = 0x00; *a++ = 0x00; *a++ = 0xff;
2408 *a++ = 0xc1; *a++ = 0xe8; *a++ = 0x08;
2409 *a++ = 0x09; *a++ = 0xd0;
2410
2411 /* eb 00 jmp <okret> */
2412 *a++ = 0xeb; okret2 = a; *a++ = 0;
2413
2414 *skip = (size_t)a - (size_t)skip - 1;
2415
2416 /*
2417 * CASE 3:
2418 * memory (edx): 0x12 0x34 0x56 0x78
2419 * register (eax): 0x89abcdef
2420 * mem after swl: 0xef 0xcd 0xab 0x89
2421 */
2422 /* eax = eax :-) */
2423
2424 /* okret: */
2425 *okret0 = (size_t)a - (size_t)okret0 - 1;
2426 *okret1 = (size_t)a - (size_t)okret1 - 1;
2427 *okret2 = (size_t)a - (size_t)okret2 - 1;
2428
2429 /* Store back to memory: */
2430 /* 89 01 mov %eax,(%ecx) */
2431 *a++ = 0x89; *a++ = 0x01;
2432 break;
2433
2434 case HI6_SWR:
2435 load_into_eax_dont_care_about_edx(&a, &dummy_cpu.cd.mips.gpr[rs]);
2436 /* 05 34 f2 ff ff add $0xfffff234,%eax */
2437 *a++ = 5;
2438 *a++ = imm; *a++ = imm >> 8; *a++ = 0xff; *a++ = 0xff;
2439 /* 83 e0 03 and $0x03,%eax */
2440 *a++ = 0x83; *a++ = 0xe0; *a++ = alignment;
2441 /* 89 c3 mov %eax,%ebx */
2442 *a++ = 0x89; *a++ = 0xc3;
2443
2444 load_into_eax_dont_care_about_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
2445
2446 /* ALIGNED LOAD: */
2447 /* 8b 11 mov (%ecx),%edx */
2448 *a++ = 0x8b; *a++ = 0x11;
2449
2450 /*
2451 * CASE 0:
2452 * memory (edx): 0x12 0x34 0x56 0x78
2453 * register (eax): 0x89abcdef
2454 * mem after swr: 0xef 0xcd 0xab 0x89
2455 */
2456 /* 83 fb 00 cmp $0x0,%ebx */
2457 /* 75 01 jne <skip> */
2458 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x00;
2459 *a++ = 0x75; skip = a; *a++ = 0x01;
2460
2461 /* eax = eax, so do nothing */
2462
2463 /* eb 00 jmp <okret> */
2464 *a++ = 0xeb; okret0 = a; *a++ = 0;
2465
2466 *skip = (size_t)a - (size_t)skip - 1;
2467
2468 /*
2469 * CASE 1:
2470 * memory (edx): 0x12 0x34 0x56 0x78
2471 * register (eax): 0x89abcdef
2472 * mem after swr: 0x12 0xef 0xcd 0xab
2473 */
2474 /* 83 fb 01 cmp $0x1,%ebx */
2475 /* 75 01 jne <skip> */
2476 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x01;
2477 *a++ = 0x75; skip = a; *a++ = 0x01;
2478
2479 /* 81 e2 ff 00 00 00 and $0x000000ff,%edx */
2480 /* c1 e0 08 shl $0x08,%eax */
2481 /* 09 d0 or %edx,%eax */
2482 *a++ = 0x81; *a++ = 0xe2; *a++ = 0xff; *a++ = 0x00; *a++ = 0x00; *a++ = 0x00;
2483 *a++ = 0xc1; *a++ = 0xe0; *a++ = 0x08;
2484 *a++ = 0x09; *a++ = 0xd0;
2485
2486 /* eb 00 jmp <okret> */
2487 *a++ = 0xeb; okret1 = a; *a++ = 0;
2488
2489 *skip = (size_t)a - (size_t)skip - 1;
2490
2491 /*
2492 * CASE 2:
2493 * memory (edx): 0x12 0x34 0x56 0x78
2494 * register (eax): 0x89abcdef
2495 * mem after swr: 0x12 0x34 0xef 0xcd
2496 */
2497 /* 83 fb 02 cmp $0x2,%ebx */
2498 /* 75 01 jne <skip> */
2499 *a++ = 0x83; *a++ = 0xfb; *a++ = 0x02;
2500 *a++ = 0x75; skip = a; *a++ = 0x01;
2501
2502 /* 81 e2 ff ff 00 00 and $0x0000ffff,%edx */
2503 /* c1 e0 10 shl $0x10,%eax */
2504 /* 09 d0 or %edx,%eax */
2505 *a++ = 0x81; *a++ = 0xe2; *a++ = 0xff; *a++ = 0xff; *a++ = 0x00; *a++ = 0x00;
2506 *a++ = 0xc1; *a++ = 0xe0; *a++ = 0x10;
2507 *a++ = 0x09; *a++ = 0xd0;
2508
2509 /* eb 00 jmp <okret> */
2510 *a++ = 0xeb; okret2 = a; *a++ = 0;
2511
2512 *skip = (size_t)a - (size_t)skip - 1;
2513
2514 /*
2515 * CASE 3:
2516 * memory (edx): 0x12 0x34 0x56 0x78
2517 * register (eax): 0x89abcdef
2518 * mem after swr: 0x12 0x34 0x56 0xef
2519 */
2520 /* 81 e2 ff ff ff 00 and $0x00ffffff,%edx */
2521 /* c1 e0 18 shl $0x18,%eax */
2522 /* 09 d0 or %edx,%eax */
2523 *a++ = 0x81; *a++ = 0xe2; *a++ = 0xff; *a++ = 0xff; *a++ = 0xff; *a++ = 0x00;
2524 *a++ = 0xc1; *a++ = 0xe0; *a++ = 0x18;
2525 *a++ = 0x09; *a++ = 0xd0;
2526
2527
2528 /* okret: */
2529 *okret0 = (size_t)a - (size_t)okret0 - 1;
2530 *okret1 = (size_t)a - (size_t)okret1 - 1;
2531 *okret2 = (size_t)a - (size_t)okret2 - 1;
2532
2533 /* Store back to memory: */
2534 /* 89 01 mov %eax,(%ecx) */
2535 *a++ = 0x89; *a++ = 0x01;
2536 break;
2537
2538 default:
2539 bintrans_write_chunkreturn_fail(&a); /* ret (and fail) */
2540 }
2541
2542 if (load && rt != 0)
2543 store_eax_edx(&a, &dummy_cpu.cd.mips.gpr[rt]);
2544
2545 *addrp = a;
2546 bintrans_write_pc_inc(addrp);
2547 return 1;
2548 }
2549
2550
2551 /*
2552 * bintrans_write_instruction__tlb_rfe_etc():
2553 */
2554 static int bintrans_write_instruction__tlb_rfe_etc(unsigned char **addrp,
2555 int itype)
2556 {
2557 unsigned char *a;
2558 int ofs;
2559
2560 switch (itype) {
2561 case CALL_TLBP:
2562 case CALL_TLBR:
2563 case CALL_TLBWR:
2564 case CALL_TLBWI:
2565 case CALL_RFE:
2566 case CALL_ERET:
2567 case CALL_SYSCALL:
2568 case CALL_BREAK:
2569 break;
2570 default:
2571 return 0;
2572 }
2573
2574 a = *addrp;
2575
2576 /* Put back PC into the cpu struct, both as pc and pc_last */
2577 *a++ = 0x89; *a++ = 0xbe; *a++ = ofs_pc&255;
2578 *a++ = (ofs_pc>>8)&255; *a++ = (ofs_pc>>16)&255;
2579 *a++ = (ofs_pc>>24)&255; /* mov %edi,pc(%esi) */
2580
2581 *a++ = 0x89; *a++ = 0xbe; *a++ = ofs_pc_last&255;
2582 *a++ = (ofs_pc_last>>8)&255; *a++ = (ofs_pc_last>>16)&255;
2583 *a++ = (ofs_pc_last>>24)&255; /* mov %edi,pc_last(%esi) */
2584
2585 /* ... and make sure that the high 32 bits are ALSO in pc_last: */
2586 /* 8b 86 38 12 00 00 mov 0x1238(%esi),%eax */
2587 ofs = ofs_pc + 4;
2588 *a++ = 0x8b; *a++ = 0x86; *a++ = ofs&255;
2589 *a++ = (ofs>>8)&255; *a++ = (ofs>>16)&255;
2590 *a++ = (ofs>>24)&255; /* mov %edi,pc(%esi) */
2591
2592 /* 89 86 34 12 00 00 mov %eax,0x1234(%esi) */
2593 ofs = ofs_pc_last + 4;
2594 *a++ = 0x89; *a++ = 0x86; *a++ = ofs&255;
2595 *a++ = (ofs>>8)&255; *a++ = (ofs>>16)&255;
2596 *a++ = (ofs>>24)&255; /* mov %edi,pc(%esi) */
2597
2598 switch (itype) {
2599 case CALL_TLBP:
2600 case CALL_TLBR:
2601 /* push readflag */
2602 *a++ = 0x6a; *a++ = (itype == CALL_TLBR);
2603 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbpr) - (size_t)&dummy_cpu;
2604 break;
2605 case CALL_TLBWR:
2606 case CALL_TLBWI:
2607 /* push randomflag */
2608 *a++ = 0x6a; *a++ = (itype == CALL_TLBWR);
2609 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbwri) - (size_t)&dummy_cpu;
2610 break;
2611 case CALL_SYSCALL:
2612 case CALL_BREAK:
2613 /* push randomflag */
2614 *a++ = 0x6a; *a++ = (itype == CALL_BREAK? EXCEPTION_BP : EXCEPTION_SYS);
2615 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_simple_exception) - (size_t)&dummy_cpu;
2616 break;
2617 case CALL_RFE:
2618 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_rfe) - (size_t)&dummy_cpu;
2619 break;
2620 case CALL_ERET:
2621 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_eret) - (size_t)&dummy_cpu;
2622 break;
2623 }
2624
2625 /* push cpu (esi) */
2626 *a++ = 0x56;
2627
2628 /* eax = points to the right function */
2629 *a++ = 0x8b; *a++ = 0x86;
2630 *a++ = ofs; *a++ = ofs >> 8; *a++ = ofs >> 16; *a++ = ofs >> 24;
2631
2632 /* ff d0 call *%eax */
2633 *a++ = 0xff; *a++ = 0xd0;
2634
2635 switch (itype) {
2636 case CALL_RFE:
2637 case CALL_ERET:
2638 /* 83 c4 04 add $4,%esp */
2639 *a++ = 0x83; *a++ = 0xc4; *a++ = 4;
2640 break;
2641 default:
2642 /* 83 c4 08 add $8,%esp */
2643 *a++ = 0x83; *a++ = 0xc4; *a++ = 8;
2644 break;
2645 }
2646
2647 /* Load PC from the cpu struct. */
2648 *a++ = 0x8b; *a++ = 0xbe; *a++ = ofs_pc&255;
2649 *a++ = (ofs_pc>>8)&255; *a++ = (ofs_pc>>16)&255;
2650 *a++ = (ofs_pc>>24)&255; /* mov pc(%esi),%edi */
2651
2652 *addrp = a;
2653
2654 switch (itype) {
2655 case CALL_ERET:
2656 case CALL_SYSCALL:
2657 case CALL_BREAK:
2658 break;
2659 default:
2660 bintrans_write_pc_inc(addrp);
2661 }
2662
2663 return 1;
2664 }
2665
2666
2667 /*
2668 * bintrans_backend_init():
2669 *
2670 * This is neccessary for broken GCC 2.x. (For GCC 3.x, this wouldn't be
2671 * neccessary, and the old code would have worked.)
2672 */
2673 static void bintrans_backend_init(void)
2674 {
2675 int size;
2676 unsigned char *p;
2677
2678
2679 /* "runchunk": */
2680 size = 64; /* NOTE: This MUST be enough, or we fail */
2681 p = (unsigned char *)mmap(NULL, size, PROT_READ | PROT_WRITE |
2682 PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
2683
2684 /* If mmap() failed, try malloc(): */
2685 if (p == NULL) {
2686 p = malloc(size);
2687 if (p == NULL) {
2688 fprintf(stderr, "bintrans_backend_init():"
2689 " out of memory\n");
2690 exit(1);
2691 }
2692 }
2693
2694 bintrans_runchunk = (void *)p;
2695
2696 *p++ = 0x57; /* push %edi */
2697 *p++ = 0x56; /* push %esi */
2698 *p++ = 0x55; /* push %ebp */
2699 *p++ = 0x53; /* push %ebx */
2700
2701 /*
2702 * In all translated code, esi points to the cpu struct, and
2703 * ebp is the nr of executed (translated) instructions.
2704 */
2705
2706 /* 0=ebx, 4=ebp, 8=esi, 0xc=edi, 0x10=retaddr, 0x14=arg0, 0x18=arg1 */
2707
2708 /* mov 0x8(%esp,1),%esi */
2709 *p++ = 0x8b; *p++ = 0x74; *p++ = 0x24; *p++ = 0x14;
2710
2711 /* mov nr_instr(%esi),%ebp */
2712 *p++ = 0x8b; *p++ = 0xae; *p++ = ofs_i&255; *p++ = (ofs_i>>8)&255;
2713 *p++ = (ofs_i>>16)&255; *p++ = (ofs_i>>24)&255;
2714
2715 /* mov pc(%esi),%edi */
2716 *p++ = 0x8b; *p++ = 0xbe; *p++ = ofs_pc&255; *p++ = (ofs_pc>>8)&255;
2717 *p++ = (ofs_pc>>16)&255; *p++ = (ofs_pc>>24)&255;
2718
2719 /* call *0x18(%esp,1) */
2720 *p++ = 0xff; *p++ = 0x54; *p++ = 0x24; *p++ = 0x18;
2721
2722 /* mov %ebp,0x1234(%esi) */
2723 *p++ = 0x89; *p++ = 0xae; *p++ = ofs_i&255; *p++ = (ofs_i>>8)&255;
2724 *p++ = (ofs_i>>16)&255; *p++ = (ofs_i>>24)&255;
2725
2726 /* mov %edi,pc(%esi) */
2727 *p++ = 0x89; *p++ = 0xbe; *p++ = ofs_pc&255; *p++ = (ofs_pc>>8)&255;
2728 *p++ = (ofs_pc>>16)&255; *p++ = (ofs_pc>>24)&255;
2729
2730 *p++ = 0x5b; /* pop %ebx */
2731 *p++ = 0x5d; /* pop %ebp */
2732 *p++ = 0x5e; /* pop %esi */
2733 *p++ = 0x5f; /* pop %edi */
2734 *p++ = 0xc3; /* ret */
2735
2736
2737
2738 /* "jump_to_32bit_pc": */
2739 size = 128; /* NOTE: This MUST be enough, or we fail */
2740 p = (unsigned char *)mmap(NULL, size, PROT_READ | PROT_WRITE |
2741 PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
2742
2743 /* If mmap() failed, try malloc(): */
2744 if (p == NULL) {
2745 p = malloc(size);
2746 if (p == NULL) {
2747 fprintf(stderr, "bintrans_backend_init():"
2748 " out of memory\n");
2749 exit(1);
2750 }
2751 }
2752
2753 bintrans_jump_to_32bit_pc = (void *)p;
2754
2755 /* Don't execute too many instructions. */
2756 /* 81 fd f0 1f 00 00 cmpl $0x1ff0,%ebp */
2757 /* 7c 01 jl <okk> */
2758 /* c3 ret */
2759 *p++ = 0x81; *p++ = 0xfd; *p++ = (N_SAFE_BINTRANS_LIMIT-1) & 255;
2760 *p++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8) & 255; *p++ = 0; *p++ = 0;
2761 *p++ = 0x7c; *p++ = 0x01;
2762 *p++ = 0xc3;
2763
2764 /*
2765 * ebx = ((vaddr >> 22) & 1023) * sizeof(void *)
2766 *
2767 * 89 c3 mov %eax,%ebx
2768 * c1 eb 14 shr $20,%ebx
2769 * 81 e3 fc 0f 00 00 and $0xffc,%ebx
2770 */
2771 *p++ = 0x89; *p++ = 0xc3;
2772 *p++ = 0xc1; *p++ = 0xeb; *p++ = 0x14;
2773 *p++ = 0x81; *p++ = 0xe3; *p++ = 0xfc; *p++ = 0x0f; *p++ = 0; *p++ = 0;
2774
2775 /*
2776 * ecx = vaddr_to_hostaddr_table0
2777 *
2778 * 8b 8e 34 12 00 00 mov 0x1234(%esi),%ecx
2779 */
2780 *p++ = 0x8b; *p++ = 0x8e;
2781 *p++ = ofs_tabl0 & 255; *p++ = (ofs_tabl0 >> 8) & 255;
2782 *p++ = (ofs_tabl0 >> 16) & 255; *p++ = (ofs_tabl0 >> 24) & 255;
2783
2784 /*
2785 * ecx = vaddr_to_hostaddr_table0[a]
2786 *
2787 * 8b 0c 19 mov (%ecx,%ebx),%ecx
2788 */
2789 *p++ = 0x8b; *p++ = 0x0c; *p++ = 0x19;
2790
2791 /*
2792 * ebx = ((vaddr >> 12) & 1023) * sizeof(void *)
2793 *
2794 * 89 c3 mov %eax,%ebx
2795 * c1 eb 0a shr $10,%ebx
2796 * 81 e3 fc 0f 00 00 and $0xffc,%ebx
2797 */
2798 *p++ = 0x89; *p++ = 0xc3;
2799 *p++ = 0xc1; *p++ = 0xeb; *p++ = 0x0a;
2800 *p++ = 0x81; *p++ = 0xe3; *p++ = 0xfc; *p++ = 0x0f; *p++ = 0; *p++ = 0;
2801
2802 /*
2803 * ecx = vaddr_to_hostaddr_table0[a][b].cd.mips.chunks
2804 *
2805 * 8b 8c 19 56 34 12 00 mov 0x123456(%ecx,%ebx,1),%ecx
2806 */
2807 *p++ = 0x8b; *p++ = 0x8c; *p++ = 0x19; *p++ = ofs_chunks & 255;
2808 *p++ = (ofs_chunks >> 8) & 255; *p++ = (ofs_chunks >> 16) & 255;
2809 *p++ = (ofs_chunks >> 24) & 255;
2810
2811 /*
2812 * ecx = NULL? Then return with failure.
2813 *
2814 * 83 f9 00 cmp $0x0,%ecx
2815 * 75 01 jne <okzzz>
2816 */
2817 *p++ = 0x83; *p++ = 0xf9; *p++ = 0x00;
2818 *p++ = 0x75; *p++ = 0x01;
2819 *p++ = 0xc3; /* TODO: failure? */
2820
2821 /*
2822 * 25 fc 0f 00 00 and $0xffc,%eax
2823 * 01 c1 add %eax,%ecx
2824 *
2825 * 8b 01 mov (%ecx),%eax
2826 *
2827 * 83 f8 00 cmp $0x0,%eax
2828 * 75 01 jne <ok>
2829 * c3 ret
2830 */
2831 *p++ = 0x25; *p++ = 0xfc; *p++ = 0x0f; *p++ = 0; *p++ = 0;
2832 *p++ = 0x01; *p++ = 0xc1;
2833
2834 *p++ = 0x8b; *p++ = 0x01;
2835
2836 *p++ = 0x83; *p++ = 0xf8; *p++ = 0x00;
2837 *p++ = 0x75; *p++ = 0x01;
2838 *p++ = 0xc3; /* TODO: failure? */
2839
2840 /* 03 86 78 56 34 12 add 0x12345678(%esi),%eax */
2841 /* ff e0 jmp *%eax */
2842 *p++ = 0x03; *p++ = 0x86; *p++ = ofs_chunkbase & 255;
2843 *p++ = (ofs_chunkbase >> 8) & 255; *p++ = (ofs_chunkbase >> 16) & 255;
2844 *p++ = (ofs_chunkbase >> 24) & 255;
2845 *p++ = 0xff; *p++ = 0xe0;
2846
2847
2848
2849 /* "loadstore_32bit": */
2850 size = 48; /* NOTE: This MUST be enough, or we fail */
2851 p = (unsigned char *)mmap(NULL, size, PROT_READ | PROT_WRITE |
2852 PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
2853
2854 /* If mmap() failed, try malloc(): */
2855 if (p == NULL) {
2856 p = malloc(size);
2857 if (p == NULL) {
2858 fprintf(stderr, "bintrans_backend_init():"
2859 " out of memory\n");
2860 exit(1);
2861 }
2862 }
2863
2864 bintrans_loadstore_32bit = (void *)p;
2865
2866 /*
2867 * ebx = ((vaddr >> 22) & 1023) * sizeof(void *)
2868 *
2869 * 89 c3 mov %eax,%ebx
2870 * c1 eb 14 shr $20,%ebx
2871 * 81 e3 fc 0f 00 00 and $0xffc,%ebx
2872 */
2873 *p++ = 0x89; *p++ = 0xc3;
2874 *p++ = 0xc1; *p++ = 0xeb; *p++ = 0x14;
2875 *p++ = 0x81; *p++ = 0xe3; *p++ = 0xfc; *p++ = 0x0f; *p++ = 0; *p++ = 0;
2876
2877 /*
2878 * ecx = vaddr_to_hostaddr_table0
2879 *
2880 * 8b 8e 34 12 00 00 mov 0x1234(%esi),%ecx
2881 */
2882 *p++ = 0x8b; *p++ = 0x8e; *p++ = ofs_tabl0 & 255;
2883 *p++ = (ofs_tabl0 >> 8) & 255;
2884 *p++ = (ofs_tabl0 >> 16) & 255; *p++ = (ofs_tabl0 >> 24) & 255;
2885
2886 /*
2887 * ecx = vaddr_to_hostaddr_table0[a]
2888 *
2889 * 8b 0c 19 mov (%ecx,%ebx),%ecx
2890 */
2891 *p++ = 0x8b; *p++ = 0x0c; *p++ = 0x19;
2892
2893 /*
2894 * ebx = ((vaddr >> 12) & 1023) * sizeof(void *)
2895 *
2896 * 89 c3 mov %eax,%ebx
2897 * c1 eb 0a shr $10,%ebx
2898 * 81 e3 fc 0f 00 00 and $0xffc,%ebx
2899 */
2900 *p++ = 0x89; *p++ = 0xc3;
2901 *p++ = 0xc1; *p++ = 0xeb; *p++ = 0x0a;
2902 *p++ = 0x81; *p++ = 0xe3; *p++ = 0xfc; *p++ = 0x0f; *p++ = 0; *p++ = 0;
2903
2904 /*
2905 * ecx = vaddr_to_hostaddr_table0[a][b]
2906 *
2907 * 8b 0c 19 mov (%ecx,%ebx,1),%ecx
2908 */
2909 *p++ = 0x8b; *p++ = 0x0c; *p++ = 0x19;
2910
2911 /* ret */
2912 *p++ = 0xc3;
2913 }
2914

  ViewVC Help
Powered by ViewVC 1.1.26