/[gxemul]/upstream/0.3.4/src/bintrans_alpha.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /upstream/0.3.4/src/bintrans_alpha.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 11 - (show annotations)
Mon Oct 8 16:18:31 2007 UTC (16 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 84728 byte(s)
0.3.4
1 /*
2 * Copyright (C) 2004-2005 Anders Gavare. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * 3. The name of the author may not be used to endorse or promote products
13 * derived from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 *
28 * $Id: bintrans_alpha.c,v 1.119 2005/06/22 10:12:25 debug Exp $
29 *
30 * Alpha specific code for dynamic binary translation.
31 *
32 * See bintrans.c for more information. Included from bintrans.c.
33 *
34 *
35 * Some Alpha registers that are reasonable to use:
36 *
37 * t5..t7 6..8 3
38 * s0..s6 9..15 7
39 * a1..a5 17..21 5
40 * t8..t11 22..25 4
41 *
42 * These can be "mapped" to MIPS registers in the translated code, except a0
43 * which points to the cpu struct, and t0..t4 (or so) which are used by the
44 * translated code as temporaries.
45 *
46 * 3 + 7 + 5 + 4 = 19 available registers. Of course, all (except s0..s6) must
47 * be saved when calling external functions, such as when calling tlbp and
48 * other external functions.
49 *
50 * Which are the 19 most commonly used MIPS registers? (This will include the
51 * pc, and the "current number of executed translated instructions.)
52 *
53 * The current allocation is as follows:
54 *
55 * Alpha: MIPS:
56 * ------ -----
57 *
58 * t5 pc (64-bit)
59 * t6 bintrans_instructions_executed (32-bit int)
60 * t7 a0 (mips register 4) (64-bit)
61 * t8 a1 (mips register 5) (64-bit)
62 * t9 s0 (mips register 16) (64-bit)
63 * t10 table0 cached (for load/store)
64 * t11 v0 (mips register 2) (64-bit)
65 * s0 delay_slot (32-bit int)
66 * s1 delay_jmpaddr (64-bit)
67 * s2 sp (mips register 29) (64-bit)
68 * s3 ra (mips register 31) (64-bit)
69 * s4 t0 (mips register 8) (64-bit)
70 * s5 t1 (mips register 9) (64-bit)
71 * s6 t2 (mips register 10) (64-bit)
72 */
73
74 #define MIPSREG_PC -3
75 #define MIPSREG_DELAY_SLOT -2
76 #define MIPSREG_DELAY_JMPADDR -1
77
78 #define ALPHA_T0 1
79 #define ALPHA_T1 2
80 #define ALPHA_T2 3
81 #define ALPHA_T3 4
82 #define ALPHA_T4 5
83 #define ALPHA_T5 6
84 #define ALPHA_T6 7
85 #define ALPHA_T7 8
86 #define ALPHA_S0 9
87 #define ALPHA_S1 10
88 #define ALPHA_S2 11
89 #define ALPHA_S3 12
90 #define ALPHA_S4 13
91 #define ALPHA_S5 14
92 #define ALPHA_S6 15
93 #define ALPHA_A0 16
94 #define ALPHA_A1 17
95 #define ALPHA_A2 18
96 #define ALPHA_A3 19
97 #define ALPHA_A4 20
98 #define ALPHA_A5 21
99 #define ALPHA_T8 22
100 #define ALPHA_T9 23
101 #define ALPHA_T10 24
102 #define ALPHA_T11 25
103 #define ALPHA_ZERO 31
104
105 static int map_MIPS_to_Alpha[32] = {
106 ALPHA_ZERO, -1, ALPHA_T11, -1, /* 0 .. 3 */
107 ALPHA_T7, ALPHA_T8, -1, -1, /* 4 .. 7 */
108 ALPHA_S4, ALPHA_S5, ALPHA_S6, -1, /* 8 .. 11 */
109 -1, -1, -1, -1, /* 12 .. 15 */
110 ALPHA_T9, -1, -1, -1, /* 16 .. 19 */
111 -1, -1, -1, -1, /* 20 .. 23 */
112 -1, -1, -1, -1, /* 24 .. 27 */
113 -1, ALPHA_S2, -1, ALPHA_S3, /* 28 .. 31 */
114 };
115
116
117 struct cpu dummy_cpu;
118 struct mips_coproc dummy_coproc;
119 struct vth32_table dummy_vth32_table;
120
121 unsigned char bintrans_alpha_imb[32] = {
122 0x86, 0x00, 0x00, 0x00, /* imb */
123 0x01, 0x80, 0xfa, 0x6b, /* ret */
124 0x1f, 0x04, 0xff, 0x47, /* nop */
125 0x00, 0x00, 0xfe, 0x2e, /* unop */
126 0x1f, 0x04, 0xff, 0x47, /* nop */
127 0x00, 0x00, 0xfe, 0x2e, /* unop */
128 0x1f, 0x04, 0xff, 0x47, /* nop */
129 0x00, 0x00, 0xfe, 0x2e /* unop */
130 };
131
132
133 /*
134 * bintrans_host_cacheinvalidate()
135 *
136 * Invalidate the host's instruction cache. On Alpha, we do this by
137 * executing an imb instruction.
138 *
139 * NOTE: A simple asm("imb"); would be enough here, but not all
140 * compilers have such simple constructs, so an entire function has to
141 * be written as bintrans_alpha_imb[] above.
142 */
143 static void bintrans_host_cacheinvalidate(unsigned char *p, size_t len)
144 {
145 /* Long form of ``asm("imb");'' */
146
147 void (*f)(void);
148 f = (void *)&bintrans_alpha_imb[0];
149 f();
150 }
151
152
153 /*
154 * lda sp,-128(sp) some margin
155 * stq ra,0(sp)
156 * stq s0,8(sp)
157 * stq s1,16(sp)
158 * stq s2,24(sp)
159 * stq s3,32(sp)
160 * stq s4,40(sp)
161 * stq s5,48(sp)
162 * stq s6,56(sp)
163 *
164 * jsr ra,(a1),<back>
165 * back:
166 *
167 * ldq ra,0(sp)
168 * ldq s0,8(sp)
169 * ldq s1,16(sp)
170 * ldq s2,24(sp)
171 * ldq s3,32(sp)
172 * ldq s4,40(sp)
173 * ldq s5,48(sp)
174 * ldq s6,56(sp)
175 * lda sp,128(sp)
176 * ret
177 */
178 /* note: offsetof (in stdarg.h) could possibly be used, but I'm not sure
179 if it will take care of the compiler problems... */
180 #define ofs_pc (((size_t)&dummy_cpu.pc) - ((size_t)&dummy_cpu))
181 #define ofs_pc_last (((size_t)&dummy_cpu.cd.mips.pc_last) - ((size_t)&dummy_cpu))
182 #define ofs_n (((size_t)&dummy_cpu.cd.mips.bintrans_instructions_executed) - ((size_t)&dummy_cpu))
183 #define ofs_ds (((size_t)&dummy_cpu.cd.mips.delay_slot) - ((size_t)&dummy_cpu))
184 #define ofs_ja (((size_t)&dummy_cpu.cd.mips.delay_jmpaddr) - ((size_t)&dummy_cpu))
185 #define ofs_sp (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_SP]) - ((size_t)&dummy_cpu))
186 #define ofs_ra (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_RA]) - ((size_t)&dummy_cpu))
187 #define ofs_a0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A0]) - ((size_t)&dummy_cpu))
188 #define ofs_a1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A1]) - ((size_t)&dummy_cpu))
189 #define ofs_t0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T0]) - ((size_t)&dummy_cpu))
190 #define ofs_t1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T1]) - ((size_t)&dummy_cpu))
191 #define ofs_t2 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T2]) - ((size_t)&dummy_cpu))
192 #define ofs_v0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_V0]) - ((size_t)&dummy_cpu))
193 #define ofs_s0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_S0]) - ((size_t)&dummy_cpu))
194 #define ofs_tbl0 (((size_t)&dummy_cpu.cd.mips.vaddr_to_hostaddr_table0) - ((size_t)&dummy_cpu))
195 #define ofs_c0 ((size_t)&dummy_vth32_table.bintrans_chunks[0] - (size_t)&dummy_vth32_table)
196 #define ofs_cb (((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu)
197
198
199 static uint32_t bintrans_alpha_load_32bit[18] = {
200 /*
201 * t1 = 1023;
202 * t2 = ((a1 >> 22) & t1) * sizeof(void *);
203 * t3 = ((a1 >> 12) & t1) * sizeof(void *);
204 * t1 = a1 & 4095;
205 *
206 * f8 1f 5f 20 lda t1,1023 * 8
207 * 83 76 22 4a srl a1,19,t2
208 * 84 36 21 4a srl a1, 9,t3
209 * 03 00 62 44 and t2,t1,t2
210 */
211 0x205f1ff8,
212 0x4a227683,
213 0x4a213684,
214 0x44620003,
215
216 /*
217 * t10 is vaddr_to_hostaddr_table0
218 *
219 * a3 = tbl0[t2] (load entry from tbl0)
220 * 12 04 03 43 addq t10,t2,a2
221 */
222 0x43030412,
223
224 /* 04 00 82 44 and t3,t1,t3 */
225 0x44820004,
226
227 /* 00 00 72 a6 ldq a3,0(a2) */
228 0xa6720000,
229
230 /* 24 37 80 48 sll t3,0x1,t3 */
231 0x48803724,
232
233 /* ff 0f 5f 20 lda t1,4095 */
234 0x205f0fff,
235
236 /*
237 * a3 = tbl1[t3] (load entry from tbl1 (which is a3))
238 * 13 04 64 42 addq a3,t3,a3
239 */
240 0x42640413,
241
242 /* 02 00 22 46 and a1,t1,t1 */
243 0x46220002,
244
245 /* 00 00 73 a6 ldq a3,0(a3) */
246 0xa6730000,
247
248 /* NULL? Then return failure at once. */
249 /* beq a3, return */
250 0xe6600002,
251
252 /* The rest of the load/store code was written with t3 as the address. */
253
254 /* Add the offset within the page: */
255 /* 04 04 62 42 addq a3,t1,t3 */
256 0x42620404,
257
258 0x6be50000, /* jmp (t4) */
259
260 /* return: */
261 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16), /* ldah t0,256 */
262 0x44270407, /* or t0,t6,t6 */
263 0x6bfa8001 /* ret */
264 };
265
266 static uint32_t bintrans_alpha_store_32bit[18] = {
267 /*
268 * t1 = 1023;
269 * t2 = ((a1 >> 22) & t1) * sizeof(void *);
270 * t3 = ((a1 >> 12) & t1) * sizeof(void *);
271 * t1 = a1 & 4095;
272 *
273 * f8 1f 5f 20 lda t1,1023 * 8
274 * 83 76 22 4a srl a1,19,t2
275 * 84 36 21 4a srl a1, 9,t3
276 * 03 00 62 44 and t2,t1,t2
277 */
278 0x205f1ff8,
279 0x4a227683,
280 0x4a213684,
281 0x44620003,
282
283 /*
284 * t10 is vaddr_to_hostaddr_table0
285 *
286 * a3 = tbl0[t2] (load entry from tbl0)
287 * 12 04 03 43 addq t10,t2,a2
288 */
289 0x43030412,
290
291 /* 04 00 82 44 and t3,t1,t3 */
292 0x44820004,
293
294 /* 00 00 72 a6 ldq a3,0(a2) */
295 0xa6720000,
296
297 /* 24 37 80 48 sll t3,0x1,t3 */
298 0x48803724,
299
300 /* ff 0f 5f 20 lda t1,4095 */
301 0x205f0fff,
302
303 /*
304 * a3 = tbl1[t3] (load entry from tbl1 (which is a3))
305 * 13 04 64 42 addq a3,t3,a3
306 */
307 0x42640413,
308
309 /* 02 00 22 46 and a1,t1,t1 */
310 0x46220002,
311
312 /* 00 00 73 a6 ldq a3,8(a3) */
313 0xa6730008,
314
315 /* NULL? Then return failure at once. */
316 /* beq a3, return */
317 0xe6600002,
318
319 /* The rest of the load/store code was written with t3 as the address. */
320
321 /* Add the offset within the page: */
322 /* 04 04 62 42 addq a3,t1,t3 */
323 0x42620404,
324
325 0x6be50000, /* jmp (t4) */
326
327 /* return: */
328 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16), /* ldah t0,256 */
329 0x44270407, /* or t0,t6,t6 */
330 0x6bfa8001 /* ret */
331 };
332
333 static void (*bintrans_runchunk)(struct cpu *, unsigned char *);
334
335 static void (*bintrans_jump_to_32bit_pc)(struct cpu *);
336
337 static void (*bintrans_load_32bit)
338 (struct cpu *) = (void *)bintrans_alpha_load_32bit;
339
340 static void (*bintrans_store_32bit)
341 (struct cpu *) = (void *)bintrans_alpha_store_32bit;
342
343
344 /*
345 * bintrans_write_quickjump():
346 */
347 static void bintrans_write_quickjump(struct memory *mem,
348 unsigned char *quickjump_code, uint32_t chunkoffset)
349 {
350 int ofs;
351 uint64_t alpha_addr = chunkoffset +
352 (size_t)mem->translation_code_chunk_space;
353 uint32_t *a = (uint32_t *)quickjump_code;
354
355 ofs = (alpha_addr - ((size_t)a+4)) / 4;
356
357 /* printf("chunkoffset=%i, %016llx %016llx %i\n",
358 chunkoffset, (long long)alpha_addr, (long long)a, ofs); */
359
360 if (ofs > -0xfffff && ofs < 0xfffff) {
361 *a++ = 0xc3e00000 | (ofs & 0x1fffff); /* br <chunk> */
362 }
363 }
364
365
366 /*
367 * bintrans_write_chunkreturn():
368 */
369 static void bintrans_write_chunkreturn(unsigned char **addrp)
370 {
371 uint32_t *a = (uint32_t *) *addrp;
372 *a++ = 0x6bfa8001; /* ret */
373 *addrp = (unsigned char *) a;
374 }
375
376
377 /*
378 * bintrans_write_chunkreturn_fail():
379 */
380 static void bintrans_write_chunkreturn_fail(unsigned char **addrp)
381 {
382 uint32_t *a = (uint32_t *) *addrp;
383 /* 00 01 3f 24 ldah t0,256 */
384 /* 07 04 27 44 or t0,t6,t6 */
385 *a++ = 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16);
386 *a++ = 0x44270407;
387 *a++ = 0x6bfa8001; /* ret */
388 *addrp = (unsigned char *) a;
389 }
390
391
392 /*
393 * bintrans_move_MIPS_reg_into_Alpha_reg():
394 */
395 static void bintrans_move_MIPS_reg_into_Alpha_reg(unsigned char **addrp, int mipsreg, int alphareg)
396 {
397 uint32_t *a = (uint32_t *) *addrp;
398 int ofs, alpha_mips_reg;
399
400 switch (mipsreg) {
401 case MIPSREG_PC:
402 /* addq t5,0,alphareg */
403 *a++ = 0x40c01400 | alphareg;
404 break;
405 case MIPSREG_DELAY_SLOT:
406 /* addq s0,0,alphareg */
407 *a++ = 0x41201400 | alphareg;
408 break;
409 case MIPSREG_DELAY_JMPADDR:
410 /* addq s1,0,alphareg */
411 *a++ = 0x41401400 | alphareg;
412 break;
413 default:
414 alpha_mips_reg = map_MIPS_to_Alpha[mipsreg];
415 if (alpha_mips_reg < 0) {
416 ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu;
417 /* ldq alphareg,gpr[mipsreg](a0) */
418 *a++ = 0xa4100000 | (alphareg << 21) | ofs;
419 } else {
420 /* addq alpha_mips_reg,0,alphareg */
421 *a++ = 0x40001400 | (alpha_mips_reg << 21) | alphareg;
422 }
423 }
424 *addrp = (unsigned char *) a;
425 }
426
427
428 /*
429 * bintrans_move_Alpha_reg_into_MIPS_reg():
430 */
431 static void bintrans_move_Alpha_reg_into_MIPS_reg(unsigned char **addrp, int alphareg, int mipsreg)
432 {
433 uint32_t *a = (uint32_t *) *addrp;
434 int ofs, alpha_mips_reg;
435
436 switch (mipsreg) {
437 case MIPSREG_PC:
438 /* addq alphareg,0,t5 */
439 *a++ = 0x40001406 | (alphareg << 21);
440 break;
441 case MIPSREG_DELAY_SLOT:
442 /* addq alphareg,0,s0 */
443 *a++ = 0x40001409 | (alphareg << 21);
444 break;
445 case MIPSREG_DELAY_JMPADDR:
446 /* addq alphareg,0,s1 */
447 *a++ = 0x4000140a | (alphareg << 21);
448 break;
449 case 0: /* the zero register */
450 break;
451 default:
452 alpha_mips_reg = map_MIPS_to_Alpha[mipsreg];
453 if (alpha_mips_reg < 0) {
454 /* stq alphareg,gpr[mipsreg](a0) */
455 ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu;
456 *a++ = 0xb4100000 | (alphareg << 21) | ofs;
457 } else {
458 /* addq alphareg,0,alpha_mips_reg */
459 *a++ = 0x40001400 | (alphareg << 21) | alpha_mips_reg;
460 }
461 }
462 *addrp = (unsigned char *) a;
463 }
464
465
466 /*
467 * bintrans_write_pc_inc():
468 */
469 static void bintrans_write_pc_inc(unsigned char **addrp)
470 {
471 uint32_t *a = (uint32_t *) *addrp;
472
473 /* lda t6,1(t6) */
474 *a++ = 0x20e70001;
475
476 /* lda t5,4(t5) */
477 *a++ = 0x20c60004;
478
479 *addrp = (unsigned char *) a;
480 }
481
482
483 /*
484 * bintrans_write_instruction__addiu_etc():
485 */
486 static int bintrans_write_instruction__addiu_etc(unsigned char **addrp,
487 int rt, int rs, int imm, int instruction_type)
488 {
489 uint32_t *a;
490 unsigned int uimm;
491 int alpha_rs, alpha_rt;
492
493 /* TODO: overflow detection for ADDI and DADDI */
494 switch (instruction_type) {
495 case HI6_ADDI:
496 case HI6_DADDI:
497 return 0;
498 }
499
500 a = (uint32_t *) *addrp;
501
502 if (rt == 0)
503 goto rt0;
504
505 uimm = imm & 0xffff;
506
507 alpha_rs = map_MIPS_to_Alpha[rs];
508 alpha_rt = map_MIPS_to_Alpha[rt];
509
510 if (uimm == 0 && (instruction_type == HI6_ADDI ||
511 instruction_type == HI6_ADDIU || instruction_type == HI6_DADDI ||
512 instruction_type == HI6_DADDIU || instruction_type == HI6_ORI)) {
513 if (alpha_rs >= 0 && alpha_rt >= 0) {
514 /* addq rs,0,rt */
515 *a++ = 0x40001400 | (alpha_rs << 21) | alpha_rt;
516 } else {
517 *addrp = (unsigned char *) a;
518 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0);
519 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
520 a = (uint32_t *) *addrp;
521 }
522 goto rt0;
523 }
524
525 if (alpha_rs < 0) {
526 /* ldq t0,"rs"(a0) */
527 *addrp = (unsigned char *) a;
528 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0);
529 a = (uint32_t *) *addrp;
530 alpha_rs = ALPHA_T0;
531 }
532
533 if (alpha_rt < 0)
534 alpha_rt = ALPHA_T0;
535
536 /* Place the result of the calculation in alpha_rt: */
537
538 switch (instruction_type) {
539 case HI6_ADDIU:
540 case HI6_DADDIU:
541 case HI6_ADDI:
542 case HI6_DADDI:
543 if (uimm < 256) {
544 if (instruction_type == HI6_ADDI ||
545 instruction_type == HI6_ADDIU) {
546 /* addl rs,uimm,rt */
547 *a++ = 0x40001000 | (alpha_rs << 21)
548 | (uimm << 13) | alpha_rt;
549 } else {
550 /* addq rs,uimm,rt */
551 *a++ = 0x40001400 | (alpha_rs << 21)
552 | (uimm << 13) | alpha_rt;
553 }
554 } else {
555 /* lda rt,imm(rs) */
556 *a++ = 0x20000000 | (alpha_rt << 21) | (alpha_rs << 16) | uimm;
557 if (instruction_type == HI6_ADDI ||
558 instruction_type == HI6_ADDIU) {
559 /* sign extend, 32->64 bits: addl t0,zero,t0 */
560 *a++ = 0x40001000 | (alpha_rt << 21) | alpha_rt;
561 }
562 }
563 break;
564 case HI6_ANDI:
565 case HI6_ORI:
566 case HI6_XORI:
567 if (uimm >= 256) {
568 /* lda t1,4660 */
569 *a++ = 0x205f0000 | uimm;
570 if (uimm & 0x8000) {
571 /* 01 00 42 24 ldah t1,1(t1) <-- if negative only */
572 *a++ = 0x24420001;
573 }
574 }
575
576 switch (instruction_type) {
577 case HI6_ANDI:
578 if (uimm < 256) {
579 /* and rs,uimm,rt */
580 *a++ = 0x44001000 | (alpha_rs << 21)
581 | (uimm << 13) | alpha_rt;
582 } else {
583 /* and rs,t1,rt */
584 *a++ = 0x44020000 | (alpha_rs << 21) | alpha_rt;
585 }
586 break;
587 case HI6_ORI:
588 if (uimm < 256) {
589 /* or rs,uimm,rt */
590 *a++ = 0x44001400 | (alpha_rs << 21)
591 | (uimm << 13) | alpha_rt;
592 } else {
593 /* or rs,t1,rt */
594 *a++ = 0x44020400 | (alpha_rs << 21) | alpha_rt;
595 }
596 break;
597 case HI6_XORI:
598 if (uimm < 256) {
599 /* xor rs,uimm,rt */
600 *a++ = 0x44001800 | (alpha_rs << 21)
601 | (uimm << 13) | alpha_rt;
602 } else {
603 /* xor rs,t1,rt */
604 *a++ = 0x44020800 | (alpha_rs << 21) | alpha_rt;
605 }
606 break;
607 }
608 break;
609 case HI6_SLTI:
610 case HI6_SLTIU:
611 /* lda t1,4660 */
612 *a++ = 0x205f0000 | uimm;
613
614 switch (instruction_type) {
615 case HI6_SLTI:
616 /* cmplt rs,t1,rt */
617 *a++ = 0x400209a0 | (alpha_rs << 21) | alpha_rt;
618 break;
619 case HI6_SLTIU:
620 /* cmpult rs,t1,rt */
621 *a++ = 0x400203a0 | (alpha_rs << 21) | alpha_rt;
622 break;
623 }
624 break;
625 }
626
627 if (alpha_rt == ALPHA_T0) {
628 *a++ = 0x5fff041f; /* fnop */
629 *addrp = (unsigned char *) a;
630 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
631 a = (uint32_t *) *addrp;
632 }
633
634 rt0:
635 *addrp = (unsigned char *) a;
636 bintrans_write_pc_inc(addrp);
637 return 1;
638 }
639
640
641 /*
642 * bintrans_write_instruction__addu_etc():
643 */
644 static int bintrans_write_instruction__addu_etc(unsigned char **addrp,
645 int rd, int rs, int rt, int sa, int instruction_type)
646 {
647 unsigned char *a, *unmodified = NULL;
648 int load64 = 0, store = 1, ofs, alpha_rd = ALPHA_T0;
649
650 alpha_rd = map_MIPS_to_Alpha[rd];
651 if (alpha_rd < 0)
652 alpha_rd = ALPHA_T0;
653
654 switch (instruction_type) {
655 case SPECIAL_DIV:
656 case SPECIAL_DIVU:
657 return 0;
658 }
659
660 switch (instruction_type) {
661 case SPECIAL_DADDU:
662 case SPECIAL_DSUBU:
663 case SPECIAL_OR:
664 case SPECIAL_AND:
665 case SPECIAL_NOR:
666 case SPECIAL_XOR:
667 case SPECIAL_DSLL:
668 case SPECIAL_DSRL:
669 case SPECIAL_DSRA:
670 case SPECIAL_DSLL32:
671 case SPECIAL_DSRL32:
672 case SPECIAL_DSRA32:
673 case SPECIAL_SLT:
674 case SPECIAL_SLTU:
675 case SPECIAL_MOVZ:
676 case SPECIAL_MOVN:
677 load64 = 1;
678 }
679
680 switch (instruction_type) {
681 case SPECIAL_MULT:
682 case SPECIAL_MULTU:
683 if (rd != 0)
684 return 0;
685 store = 0;
686 break;
687 default:
688 if (rd == 0)
689 goto rd0;
690 }
691
692 a = *addrp;
693
694 if ((instruction_type == SPECIAL_ADDU || instruction_type == SPECIAL_DADDU
695 || instruction_type == SPECIAL_OR) && rt == 0) {
696 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
697 if (!load64) {
698 *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
699 }
700 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
701 *addrp = a;
702 goto rd0;
703 }
704
705 /* t0 = rs, t1 = rt */
706 if (load64) {
707 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
708 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1);
709 } else {
710 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
711 *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
712 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1);
713 *a++ = 0x02; *a++ = 0x10; *a++ = 0x40; *a++ = 0x40; /* addl t1,0,t1 */
714 }
715
716 switch (instruction_type) {
717 case SPECIAL_ADDU:
718 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x40; /* addl t0,t1,rd */
719 break;
720 case SPECIAL_DADDU:
721 *a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq t0,t1,rd */
722 break;
723 case SPECIAL_SUBU:
724 *a++ = 0x20 + alpha_rd; *a++ = 0x01; *a++ = 0x22; *a++ = 0x40; /* subl t0,t1,t0 */
725 break;
726 case SPECIAL_DSUBU:
727 *a++ = 0x20 + alpha_rd; *a++ = 0x05; *a++ = 0x22; *a++ = 0x40; /* subq t0,t1,t0 */
728 break;
729 case SPECIAL_AND:
730 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and t0,t1,t0 */
731 break;
732 case SPECIAL_OR:
733 *a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */
734 break;
735 case SPECIAL_NOR:
736 *a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */
737 *a++ = alpha_rd; *a++ = 0x05; *a++ = 0xe1; *a++ = 0x47; /* not t0,t0 */
738 break;
739 case SPECIAL_XOR:
740 *a++ = alpha_rd; *a++ = 0x08; *a++ = 0x22; *a++ = 0x44; /* xor t0,t1,t0 */
741 break;
742 case SPECIAL_SLL:
743 *a++ = 0x21; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
744 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
745 break;
746 case SPECIAL_SLLV:
747 /* rd = rt << (rs&31) (logical) t0 = t1 << (t0&31) */
748 *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
749 *a++ = 0x21; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sll t1,t0,t0 */
750 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
751 break;
752 case SPECIAL_DSLL:
753 *a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
754 break;
755 case SPECIAL_DSLL32:
756 sa += 32;
757 *a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
758 break;
759 case SPECIAL_SRA:
760 *a++ = 0x81; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
761 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
762 break;
763 case SPECIAL_SRAV:
764 /* rd = rt >> (rs&31) (arithmetic) t0 = t1 >> (t0&31) */
765 *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
766 *a++ = 0x81; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sra t1,t0,t0 */
767 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
768 break;
769 case SPECIAL_DSRA:
770 *a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
771 break;
772 case SPECIAL_DSRA32:
773 sa += 32;
774 *a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
775 break;
776 case SPECIAL_SRL:
777 *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */
778 /* Note: bits of sa are distributed among two different bytes. */
779 *a++ = 0x81; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
780 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl */
781 break;
782 case SPECIAL_SRLV:
783 /* rd = rt >> (rs&31) (logical) t0 = t1 >> (t0&31) */
784 *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */
785 *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
786 *a++ = 0x81; *a++ = 0x06; *a++ = 0x41; *a++ = 0x48; /* srl t1,t0,t0 */
787 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
788 break;
789 case SPECIAL_DSRL:
790 /* Note: bits of sa are distributed among two different bytes. */
791 *a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
792 break;
793 case SPECIAL_DSRL32:
794 /* Note: bits of sa are distributed among two different bytes. */
795 sa += 32;
796 *a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
797 break;
798 case SPECIAL_SLT:
799 *a++ = 0xa0 + alpha_rd; *a++ = 0x09; *a++ = 0x22; *a++ = 0x40; /* cmplt t0,t1,t0 */
800 break;
801 case SPECIAL_SLTU:
802 *a++ = 0xa0 + alpha_rd; *a++ = 0x03; *a++ = 0x22; *a++ = 0x40; /* cmpult t0,t1,t0 */
803 break;
804 case SPECIAL_MULT:
805 case SPECIAL_MULTU:
806 if (instruction_type == SPECIAL_MULTU) {
807 /* 21 f6 21 48 zapnot t0,0xf,t0 */
808 /* 22 f6 41 48 zapnot t1,0xf,t1 */
809 *a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48;
810 *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48;
811 }
812
813 /* 03 04 22 4c mulq t0,t1,t2 */
814 *a++ = 0x03; *a++ = 0x04; *a++ = 0x22; *a++ = 0x4c;
815
816 /* 01 10 60 40 addl t2,0,t0 */
817 *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
818
819 ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
820 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
821
822 /* 81 17 64 48 sra t2,0x20,t0 */
823 *a++ = 0x81; *a++ = 0x17; *a++ = 0x64; *a++ = 0x48;
824 *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
825 ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
826 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
827 break;
828 case SPECIAL_MOVZ:
829 /* if rt=0 then rd=rs ==> if t1!=0 then t0=unmodified else t0=rd */
830 /* 00 00 40 f4 bne t1,unmodified */
831 unmodified = a;
832 *a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xf4;
833 alpha_rd = ALPHA_T0;
834 break;
835 case SPECIAL_MOVN:
836 /* if rt!=0 then rd=rs ==> if t1=0 then t0=unmodified else t0=rd */
837 /* 00 00 40 e4 beq t1,unmodified */
838 unmodified = a;
839 *a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4;
840 alpha_rd = ALPHA_T0;
841 break;
842 }
843
844 if (store && alpha_rd == ALPHA_T0) {
845 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
846 }
847
848 if (unmodified != NULL)
849 *unmodified = ((size_t)a - (size_t)unmodified - 4) / 4;
850
851 *addrp = a;
852 rd0:
853 bintrans_write_pc_inc(addrp);
854 return 1;
855 }
856
857
858 /*
859 * bintrans_write_instruction__branch():
860 */
861 static int bintrans_write_instruction__branch(unsigned char **addrp,
862 int instruction_type, int regimm_type, int rt, int rs, int imm)
863 {
864 uint32_t *a, *b, *c = NULL;
865 int alpha_rs, alpha_rt, likely = 0, ofs;
866
867 alpha_rs = map_MIPS_to_Alpha[rs];
868 alpha_rt = map_MIPS_to_Alpha[rt];
869
870 switch (instruction_type) {
871 case HI6_BEQL:
872 case HI6_BNEL:
873 case HI6_BLEZL:
874 case HI6_BGTZL:
875 likely = 1;
876 }
877
878 /*
879 * t0 = gpr[rt]; t1 = gpr[rs];
880 *
881 * 50 00 30 a4 ldq t0,80(a0)
882 * 58 00 50 a4 ldq t1,88(a0)
883 */
884
885 switch (instruction_type) {
886 case HI6_BEQ:
887 case HI6_BNE:
888 case HI6_BEQL:
889 case HI6_BNEL:
890 if (alpha_rt < 0) {
891 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0);
892 alpha_rt = ALPHA_T0;
893 }
894 }
895
896 if (alpha_rs < 0) {
897 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T1);
898 alpha_rs = ALPHA_T1;
899 }
900
901 a = (uint32_t *) *addrp;
902
903 /*
904 * Compare alpha_rt (t0) and alpha_rs (t1) for equality (BEQ).
905 * If the result was false (equal to zero), then skip a lot
906 * of instructions:
907 *
908 * a1 05 22 40 cmpeq t0,t1,t0
909 * 01 00 20 e4 beq t0,14 <f+0x14>
910 */
911 b = NULL;
912 if ((instruction_type == HI6_BEQ ||
913 instruction_type == HI6_BEQL) && rt != rs) {
914 /* cmpeq rt,rs,t0 */
915 *a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16);
916 b = a;
917 *a++ = 0xe4200001; /* beq */
918 }
919 if (instruction_type == HI6_BNE || instruction_type == HI6_BNEL) {
920 /* cmpeq rt,rs,t0 */
921 *a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16);
922 b = a;
923 *a++ = 0xf4200001; /* bne */
924 }
925 if (instruction_type == HI6_BLEZ || instruction_type == HI6_BLEZL) {
926 /* cmple rs,0,t0 */
927 *a++ = 0x40001da1 | (alpha_rs << 21);
928 b = a;
929 *a++ = 0xe4200001; /* beq */
930 }
931 if (instruction_type == HI6_BGTZ || instruction_type == HI6_BGTZL) {
932 /* cmple rs,0,t0 */
933 *a++ = 0x40001da1 | (alpha_rs << 21);
934 b = a;
935 *a++ = 0xf4200001; /* bne */
936 }
937 if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BLTZ) {
938 /* cmplt rs,0,t0 */
939 *a++ = 0x400019a1 | (alpha_rs << 21);
940 b = a;
941 *a++ = 0xe4200001; /* beq */
942 }
943 if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BGEZ) {
944 *a++ = 0x207fffff; /* lda t2,-1 */
945 /* cmple rs,t2,t0 */
946 *a++ = 0x40030da1 | (alpha_rs << 21);
947 b = a;
948 *a++ = 0xf4200001; /* bne */
949 }
950
951 /*
952 * Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED
953 * and cpu->delay_jmpaddr = pc + 4 + (imm << 2).
954 *
955 * 04 00 26 20 lda t0,4(t5) add 4
956 * c8 01 5f 20 lda t1,456
957 * 4a 04 41 40 s4addq t1,t0,s1 s1 = (t1<<2) + t0
958 */
959
960 *a++ = 0x20260004; /* lda t0,4(t5) */
961 *a++ = 0x205f0000 | (imm & 0xffff); /* lda */
962 *a++ = 0x4041044a; /* s4addq */
963
964 /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
965 *a++ = 0x213f0000 | TO_BE_DELAYED;
966
967 /*
968 * Special case: "likely"-branches:
969 */
970 if (likely) {
971 c = a;
972 *a++ = 0xc3e00001; /* br delayed_ok */
973
974 if (b != NULL)
975 *((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4;
976
977 /* cpu->cd.mips.nullify_next = 1; */
978 /* 01 00 3f 20 lda t0,1 */
979 *a++ = 0x203f0001;
980 ofs = (size_t)&dummy_cpu.cd.mips.nullify_next - (size_t)&dummy_cpu;
981 *a++ = 0xb0300000 | (ofs & 0xffff);
982
983 /* fail, so that the next instruction is handled manually: */
984 *addrp = (unsigned char *) a;
985 bintrans_write_pc_inc(addrp);
986 bintrans_write_chunkreturn_fail(addrp);
987 a = (uint32_t *) *addrp;
988
989 if (c != NULL)
990 *((unsigned char *)c) = ((size_t)a - (size_t)c - 4) / 4;
991 } else {
992 /* Normal (non-likely) exit: */
993 if (b != NULL)
994 *((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4;
995 }
996
997 *addrp = (unsigned char *) a;
998 bintrans_write_pc_inc(addrp);
999 return 1;
1000 }
1001
1002
1003 /*
1004 * bintrans_write_instruction__jr():
1005 */
1006 static int bintrans_write_instruction__jr(unsigned char **addrp, int rs, int rd, int special)
1007 {
1008 uint32_t *a;
1009 int alpha_rd;
1010
1011 alpha_rd = map_MIPS_to_Alpha[rd];
1012 if (alpha_rd < 0)
1013 alpha_rd = ALPHA_T0;
1014
1015 /*
1016 * Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED
1017 * and cpu->delay_jmpaddr = gpr[rs].
1018 */
1019
1020 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_S1);
1021
1022 a = (uint32_t *) *addrp;
1023 /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
1024 *a++ = 0x213f0000 | TO_BE_DELAYED;
1025 *addrp = (unsigned char *) a;
1026
1027 if (special == SPECIAL_JALR && rd != 0) {
1028 /* gpr[rd] = retaddr (pc + 8) */
1029 a = (uint32_t *) *addrp;
1030 /* lda alpha_rd,8(t5) */
1031 *a++ = 0x20060008 | (alpha_rd << 21);
1032 *addrp = (unsigned char *) a;
1033 if (alpha_rd == ALPHA_T0)
1034 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rd);
1035 }
1036
1037 bintrans_write_pc_inc(addrp);
1038 return 1;
1039 }
1040
1041
1042 /*
1043 * bintrans_write_instruction__jal():
1044 */
1045 static int bintrans_write_instruction__jal(unsigned char **addrp,
1046 int imm, int link)
1047 {
1048 uint32_t *a;
1049
1050 a = (uint32_t *) *addrp;
1051
1052 /* gpr[31] = retaddr (NOTE: mips register 31 is in alpha reg s3) */
1053 if (link) {
1054 *a++ = 0x21860008; /* lda s3,8(t5) */
1055 }
1056
1057 /* Set the jmpaddr to top 4 bits of pc + lowest 28 bits of imm*4: */
1058
1059 /*
1060 * imm = 4*imm;
1061 * t0 = ((pc + 4) & ~0x0fffffff) | imm;
1062 *
1063 * 04 00 26 20 lda t0,4(t5) <-- because the jump is from the delay slot
1064 * 23 01 5f 24 ldah t1,291
1065 * 67 45 42 20 lda t1,17767(t1)
1066 * 00 f0 7f 24 ldah t2,-4096
1067 * 04 00 23 44 and t0,t2,t3
1068 * 0a 04 44 44 or t1,t3,s1
1069 */
1070 imm *= 4;
1071 *a++ = 0x20260004;
1072 *a++ = 0x245f0000 | ((imm >> 16) + (imm & 0x8000? 1 : 0));
1073 *a++ = 0x20420000 | (imm & 0xffff);
1074 *a++ = 0x247ff000;
1075 *a++ = 0x44230004;
1076 *a++ = 0x4444040a;
1077
1078 /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
1079 *a++ = 0x213f0000 | TO_BE_DELAYED;
1080
1081 /* If the machine continues executing here, it will return
1082 to the main loop, which is fine. */
1083
1084 *addrp = (unsigned char *) a;
1085 bintrans_write_pc_inc(addrp);
1086 return 1;
1087 }
1088
1089
1090 /*
1091 * bintrans_write_instruction__delayedbranch():
1092 */
1093 static int bintrans_write_instruction__delayedbranch(
1094 struct memory *mem, unsigned char **addrp,
1095 uint32_t *potential_chunk_p, uint32_t *chunks,
1096 int only_care_about_chunk_p, int p, int forward)
1097 {
1098 unsigned char *a, *skip=NULL, *generic64bit;
1099 int ofs;
1100 uint64_t alpha_addr, subaddr;
1101
1102 a = *addrp;
1103
1104 if (!only_care_about_chunk_p) {
1105 /* Skip all of this if there is no branch: */
1106 skip = a;
1107 *a++ = 0; *a++ = 0; *a++ = 0x20; *a++ = 0xe5; /* beq s0,skip */
1108
1109 /*
1110 * Perform the jump by setting cpu->delay_slot = 0
1111 * and pc = cpu->delay_jmpaddr.
1112 */
1113 /* 00 00 3f 21 lda s0,0 */
1114 *a++ = 0; *a++ = 0; *a++ = 0x3f; *a++ = 0x21;
1115
1116 bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_DELAY_JMPADDR, ALPHA_T0);
1117 bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T3);
1118 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, MIPSREG_PC);
1119 }
1120
1121 if (potential_chunk_p == NULL) {
1122 if (mem->bintrans_32bit_only) {
1123 /* 34 12 70 a7 ldq t12,4660(a0) */
1124 ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu;
1125 *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1126
1127 /* 00 00 fb 6b jmp (t12) */
1128 *a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b;
1129 } else {
1130 /*
1131 * If the highest 32 bits of the address are either
1132 * 0x00000000 or 0xffffffff, then the tables used for
1133 * 32-bit load/stores can be used.
1134 *
1135 * 81 16 24 4a srl a1,0x20,t0
1136 * 03 00 20 e4 beq t0,14 <ok1>
1137 * 01 30 20 40 addl t0,0x1,t0
1138 * 01 00 20 e4 beq t0,14 <ok1>
1139 * 01 00 e0 c3 br 18 <nook>
1140 */
1141 *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a;
1142 *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4;
1143 *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40;
1144 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4;
1145 generic64bit = a;
1146 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1147
1148 /* 34 12 70 a7 ldq t12,4660(a0) */
1149 ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu;
1150 *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1151
1152 /* 00 00 fb 6b jmp (t12) */
1153 *a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b;
1154
1155
1156 if (generic64bit != NULL)
1157 *generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4;
1158
1159 /* Not much we can do here if this wasn't to the same
1160 physical page... */
1161
1162 *a++ = 0xfc; *a++ = 0xff; *a++ = 0x84; *a++ = 0x20; /* lda t3,-4(t3) */
1163
1164 /*
1165 * Compare the old pc (t3) and the new pc (t0). If they are on the
1166 * same virtual page (which means that they are on the same physical
1167 * page), then we can check the right chunk pointer, and if it
1168 * is non-NULL, then we can jump there. Otherwise just return.
1169 *
1170 * 00 f0 5f 20 lda t1,-4096
1171 * 01 00 22 44 and t0,t1,t0
1172 * 04 00 82 44 and t3,t1,t3
1173 * a3 05 24 40 cmpeq t0,t3,t2
1174 * 01 00 60 f4 bne t2,7c <ok2>
1175 * 01 80 fa 6b ret
1176 */
1177 *a++ = 0x00; *a++ = 0xf0; *a++ = 0x5f; *a++ = 0x20; /* lda */
1178 *a++ = 0x01; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and */
1179 *a++ = 0x04; *a++ = 0x00; *a++ = 0x82; *a++ = 0x44; /* and */
1180 *a++ = 0xa3; *a++ = 0x05; *a++ = 0x24; *a++ = 0x40; /* cmpeq */
1181 *a++ = 0x01; *a++ = 0x00; *a++ = 0x60; *a++ = 0xf4; /* bne */
1182 *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1183
1184 /* Don't execute too many instructions. (see comment below) */
1185 *a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255;
1186 *a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */
1187 *a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */
1188 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */
1189 *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1190
1191 /* 15 bits at a time, which means max 60 bits, but
1192 that should be enough. the top 4 bits are probably
1193 not used by userland alpha code. (TODO: verify this) */
1194 alpha_addr = (size_t)chunks;
1195 subaddr = (alpha_addr >> 45) & 0x7fff;
1196
1197 /*
1198 * 00 00 3f 20 lda t0,0
1199 * 21 f7 21 48 sll t0,0xf,t0
1200 * 34 12 21 20 lda t0,4660(t0)
1201 * 21 f7 21 48 sll t0,0xf,t0
1202 * 34 12 21 20 lda t0,4660(t0)
1203 * 21 f7 21 48 sll t0,0xf,t0
1204 * 34 12 21 20 lda t0,4660(t0)
1205 */
1206
1207 /* Start with the topmost 15 bits: */
1208 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20;
1209 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1210
1211 subaddr = (alpha_addr >> 30) & 0x7fff;
1212 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1213 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1214
1215 subaddr = (alpha_addr >> 15) & 0x7fff;
1216 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1217 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1218
1219 subaddr = alpha_addr & 0x7fff;
1220 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1221
1222 /*
1223 * t2 = pc
1224 * t1 = t2 & 0xfff
1225 * t0 += t1
1226 *
1227 * ff 0f 5f 20 lda t1,4095
1228 * 02 00 62 44 and t2,t1,t1
1229 * 01 04 22 40 addq t0,t1,t0
1230 */
1231 bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T2);
1232 *a++ = 0xff; *a++ = 0x0f; *a++ = 0x5f; *a++ = 0x20; /* lda */
1233 *a++ = 0x02; *a++ = 0x00; *a++ = 0x62; *a++ = 0x44; /* and */
1234 *a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq */
1235
1236 /*
1237 * Load the chunk pointer (actually, a 32-bit offset) into t0.
1238 * If it is zero, then skip the following.
1239 * Add cpu->chunk_base_address to t0.
1240 * Jump to t0.
1241 */
1242
1243 *a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */
1244 *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */
1245
1246 /* ldq t2,chunk_base_address(a0) */
1247 ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu;
1248 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4;
1249 /* addq t0,t2,t0 */
1250 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40;
1251
1252 /* 00 00 e1 6b jmp (t0) */
1253 *a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */
1254
1255 /* Failure, then return to the main loop. */
1256 *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1257 }
1258 } else {
1259 /*
1260 * Just to make sure that we don't become too unreliant
1261 * on the main program loop, we need to return every once
1262 * in a while (interrupts etc).
1263 *
1264 * Load the "nr of instructions executed" (which is an int)
1265 * and see if it is below a certain threshold. If so, then
1266 * we go on with the fast path (bintrans), otherwise we
1267 * abort by returning.
1268 *
1269 * f4 01 5f 20 lda t1,500 (some low number...)
1270 * a1 0d c2 40 cmple t6,t1,t0
1271 * 01 00 20 f4 bne t0,14 <f+0x14>
1272 */
1273 if (!only_care_about_chunk_p && !forward) {
1274 *a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255;
1275 *a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */
1276 *a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */
1277 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */
1278 *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1279 }
1280
1281 /*
1282 * potential_chunk_p points to an "uint32_t".
1283 * If this value is non-NULL, then it is a piece of Alpha
1284 * machine language code corresponding to the address
1285 * we're jumping to. Otherwise, those instructions haven't
1286 * been translated yet, so we have to return to the main
1287 * loop. (Actually, we have to add cpu->chunk_base_address,
1288 * because the uint32_t is limited to 32-bit offsets.)
1289 *
1290 * Case 1: The value is non-NULL already at translation
1291 * time. Then we can make a direct (fast) native
1292 * Alpha jump to the code chunk.
1293 *
1294 * Case 2: The value was NULL at translation time, then we
1295 * have to check during runtime.
1296 */
1297
1298 /* Case 1: */
1299 /* printf("%08x ", *potential_chunk_p); */
1300 alpha_addr = *potential_chunk_p + (size_t)mem->translation_code_chunk_space;
1301 ofs = (alpha_addr - ((size_t)a+4)) / 4;
1302 /* printf("%016llx %016llx %i\n", (long long)alpha_addr, (long long)a, ofs); */
1303
1304 if ((*potential_chunk_p) != 0 && ofs > -0xfffff && ofs < 0xfffff) {
1305 *a++ = ofs & 255; *a++ = (ofs >> 8) & 255; *a++ = 0xe0 + ((ofs >> 16) & 0x1f); *a++ = 0xc3; /* br <chunk> */
1306 } else {
1307 /* Case 2: */
1308
1309 bintrans_register_potential_quick_jump(mem, a, p);
1310
1311 /* 15 bits at a time, which means max 60 bits, but
1312 that should be enough. the top 4 bits are probably
1313 not used by userland alpha code. (TODO: verify this) */
1314 alpha_addr = (size_t)potential_chunk_p;
1315 subaddr = (alpha_addr >> 45) & 0x7fff;
1316
1317 /*
1318 * 00 00 3f 20 lda t0,0
1319 * 21 f7 21 48 sll t0,0xf,t0
1320 * 34 12 21 20 lda t0,4660(t0)
1321 * 21 f7 21 48 sll t0,0xf,t0
1322 * 34 12 21 20 lda t0,4660(t0)
1323 * 21 f7 21 48 sll t0,0xf,t0
1324 * 34 12 21 20 lda t0,4660(t0)
1325 */
1326
1327 /* Start with the topmost 15 bits: */
1328 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20;
1329 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1330
1331 subaddr = (alpha_addr >> 30) & 0x7fff;
1332 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1333 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1334
1335 subaddr = (alpha_addr >> 15) & 0x7fff;
1336 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1337 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1338
1339 subaddr = alpha_addr & 0x7fff;
1340 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1341
1342 /*
1343 * Load the chunk pointer into t0.
1344 * If it is NULL (zero), then skip the following jump.
1345 * Jump to t0.
1346 */
1347 *a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */
1348 *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */
1349
1350 /* ldq t2,chunk_base_address(a0) */
1351 ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu;
1352 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4;
1353 /* addq t0,t2,t0 */
1354 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40;
1355
1356 /* 00 00 e1 6b jmp (t0) */
1357 *a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */
1358
1359 /* "Failure", then let's return to the main loop. */
1360 *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1361 }
1362 }
1363
1364 if (skip != NULL) {
1365 *skip = ((size_t)a - (size_t)skip - 4) / 4;
1366 skip ++;
1367 *skip = (((size_t)a - (size_t)skip - 4) / 4) >> 8;
1368 }
1369
1370 *addrp = a;
1371 return 1;
1372 }
1373
1374
1375 /*
1376 * bintrans_write_instruction__loadstore():
1377 */
1378 static int bintrans_write_instruction__loadstore(
1379 struct memory *mem, unsigned char **addrp,
1380 int rt, int imm, int rs, int instruction_type, int bigendian)
1381 {
1382 unsigned char *a, *fail, *generic64bit = NULL, *generic64bitA = NULL;
1383 unsigned char *doloadstore = NULL,
1384 *ok_unaligned_load3, *ok_unaligned_load2, *ok_unaligned_load1;
1385 uint32_t *b;
1386 int ofs, alignment, load = 0, alpha_rs, alpha_rt, unaligned = 0;
1387
1388 /* TODO: Not yet: */
1389 if (instruction_type == HI6_LQ_MDMX || instruction_type == HI6_SQ) {
1390 return 0;
1391 }
1392
1393 switch (instruction_type) {
1394 case HI6_LQ_MDMX:
1395 case HI6_LD:
1396 case HI6_LDL:
1397 case HI6_LDR:
1398 case HI6_LWU:
1399 case HI6_LW:
1400 case HI6_LWL:
1401 case HI6_LWR:
1402 case HI6_LHU:
1403 case HI6_LH:
1404 case HI6_LBU:
1405 case HI6_LB:
1406 load = 1;
1407 if (rt == 0)
1408 return 0;
1409 }
1410
1411 switch (instruction_type) {
1412 case HI6_LDL:
1413 case HI6_LDR:
1414 case HI6_LWL:
1415 case HI6_LWR:
1416 case HI6_SDL:
1417 case HI6_SDR:
1418 case HI6_SWL:
1419 case HI6_SWR:
1420 unaligned = 1;
1421 }
1422
1423 a = *addrp;
1424
1425 /*
1426 * a1 = gpr[rs] + imm;
1427 *
1428 * 88 08 30 a4 ldq t0,2184(a0)
1429 * 34 12 21 22 lda a1,4660(t0)
1430 */
1431
1432 alpha_rs = map_MIPS_to_Alpha[rs];
1433 if (alpha_rs < 0) {
1434 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1435 alpha_rs = ALPHA_T0;
1436 }
1437 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1438
1439 alignment = 0;
1440 switch (instruction_type) {
1441 case HI6_LQ_MDMX:
1442 case HI6_SQ:
1443 alignment = 15;
1444 break;
1445 case HI6_LD:
1446 case HI6_LDL:
1447 case HI6_LDR:
1448 case HI6_SD:
1449 case HI6_SDL:
1450 case HI6_SDR:
1451 alignment = 7;
1452 break;
1453 case HI6_LW:
1454 case HI6_LWL:
1455 case HI6_LWR:
1456 case HI6_LWU:
1457 case HI6_SW:
1458 case HI6_SWL:
1459 case HI6_SWR:
1460 alignment = 3;
1461 break;
1462 case HI6_LH:
1463 case HI6_LHU:
1464 case HI6_SH:
1465 alignment = 1;
1466 break;
1467 }
1468
1469 if (unaligned) {
1470 /*
1471 * Unaligned load/store: Perform the host load/store at
1472 * an aligned address, and then figure out which bytes to
1473 * actually load into the destination register.
1474 *
1475 * 02 30 20 46 and a1,alignment,t1
1476 * 31 05 22 42 subq a1,t1,a1
1477 */
1478 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1479 *a++ = 0x31; *a++ = 0x05; *a++ = 0x22; *a++ = 0x42;
1480 } else if (alignment > 0) {
1481 /*
1482 * Check alignment:
1483 *
1484 * 02 30 20 46 and a1,0x1,t1
1485 * 02 70 20 46 and a1,0x3,t1 (one of these "and"s)
1486 * 02 f0 20 46 and a1,0x7,t1
1487 * 02 f0 21 46 and a1,0xf,t1
1488 * 01 00 40 e4 beq t1,<okalign>
1489 * 01 80 fa 6b ret
1490 */
1491 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1492 fail = a;
1493 *a++ = 0x01; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4;
1494 *addrp = a;
1495 bintrans_write_chunkreturn_fail(addrp);
1496 a = *addrp;
1497 *fail = ((size_t)a - (size_t)fail - 4) / 4;
1498 }
1499
1500 alpha_rt = map_MIPS_to_Alpha[rt];
1501
1502 if (mem->bintrans_32bit_only) {
1503 /* Special case for 32-bit addressing: */
1504
1505 if (load)
1506 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_load_32bit) - (size_t)&dummy_cpu;
1507 else
1508 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_store_32bit) - (size_t)&dummy_cpu;
1509 /* ldq t12,bintrans_loadstore_32bit(a0) */
1510 *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1511
1512 /* jsr t4,(t12),<after> */
1513 *a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68;
1514
1515 /* Now: a3 = host page, t3 = address of host load/store */
1516 } else {
1517 /*
1518 * If the highest 33 bits of the address are either all ones
1519 * or all zeroes, then the tables used for 32-bit load/stores
1520 * can be used.
1521 */
1522 *a++ = 0x81; *a++ = 0xf6; *a++ = 0x23; *a++ = 0x4a; /* srl a1,0x1f,t0 */
1523 *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x44; /* and t0,0x1,t0 */
1524 *a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<noll> */
1525 *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */
1526 *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40; /* addl t0,0x1,t0 */
1527 *a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */
1528 generic64bit = a;
1529 *a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */
1530 /* <noll>: */
1531 *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */
1532 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */
1533 generic64bitA = a;
1534 *a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */
1535
1536 if (load)
1537 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_load_32bit) - (size_t)&dummy_cpu;
1538 else
1539 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_store_32bit) - (size_t)&dummy_cpu;
1540 /* ldq t12,bintrans_loadstore_32bit(a0) */
1541 *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1542
1543 /* jsr t4,(t12),<after> */
1544 *a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68;
1545
1546 /*
1547 * Now: a3 = host page
1548 * t3 = (potential) address of host load/store
1549 */
1550
1551 doloadstore = a;
1552 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1553
1554
1555 /*
1556 * Generic (64-bit) load/store:
1557 */
1558
1559 if (generic64bit != NULL)
1560 *generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4;
1561 if (generic64bitA != NULL)
1562 *generic64bitA = ((size_t)a - (size_t)generic64bitA - 4) / 4;
1563
1564 *addrp = a;
1565 b = (uint32_t *) *addrp;
1566
1567 /* Save a0 and the old return address on the stack: */
1568 *b++ = 0x23deff80; /* lda sp,-128(sp) */
1569
1570 *b++ = 0xb75e0000; /* stq ra,0(sp) */
1571 *b++ = 0xb61e0008; /* stq a0,8(sp) */
1572 *b++ = 0xb4de0010; /* stq t5,16(sp) */
1573 *b++ = 0xb0fe0018; /* stl t6,24(sp) */
1574 *b++ = 0xb71e0020; /* stq t10,32(sp) */
1575 *b++ = 0xb73e0028; /* stq t11,40(sp) */
1576 *b++ = 0xb51e0030; /* stq t7,48(sp) */
1577 *b++ = 0xb6de0038; /* stq t8,56(sp) */
1578 *b++ = 0xb6fe0040; /* stq t9,64(sp) */
1579
1580 ofs = ((size_t)&dummy_cpu.cd.mips.fast_vaddr_to_hostaddr) - (size_t)&dummy_cpu;
1581
1582 *b++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */
1583
1584 /* a1 is already vaddr. set a2 = writeflag */
1585 *b++ = 0x225f0000 | (load? 0 : 1);
1586
1587 /* Call fast_vaddr_to_hostaddr: */
1588 *b++ = 0x6b5b4000; /* jsr ra,(t12),<after> */
1589
1590 /* Restore the old return address and a0 from the stack: */
1591 *b++ = 0xa75e0000; /* ldq ra,0(sp) */
1592 *b++ = 0xa61e0008; /* ldq a0,8(sp) */
1593 *b++ = 0xa4de0010; /* ldq t5,16(sp) */
1594 *b++ = 0xa0fe0018; /* ldl t6,24(sp) */
1595 *b++ = 0xa71e0020; /* ldq t10,32(sp) */
1596 *b++ = 0xa73e0028; /* ldq t11,40(sp) */
1597 *b++ = 0xa51e0030; /* ldq t7,48(sp) */
1598 *b++ = 0xa6de0038; /* ldq t8,56(sp) */
1599 *b++ = 0xa6fe0040; /* ldq t9,64(sp) */
1600
1601 *b++ = 0x23de0080; /* lda sp,128(sp) */
1602
1603 *addrp = (unsigned char *) b;
1604 a = *addrp;
1605
1606 /*
1607 * NULL? Then return failure.
1608 * 01 00 00 f4 bne v0,f8 <okzz>
1609 */
1610 fail = a;
1611 *a++ = 0x01; *a++ = 0x00; *a++ = 0x00; *a++ = 0xf4;
1612 bintrans_write_chunkreturn_fail(&a);
1613 *fail = ((size_t)a - (size_t)fail - 4) / 4;
1614
1615 /* The rest of this code was written with t3 as the address. */
1616
1617 /* 04 14 00 40 addq v0,0,t3 */
1618 *a++ = 0x04; *a++ = 0x14; *a++ = 0x00; *a++ = 0x40;
1619
1620 if (doloadstore != NULL)
1621 *doloadstore = ((size_t)a - (size_t)doloadstore - 4) / 4;
1622 }
1623
1624
1625 switch (instruction_type) {
1626 case HI6_LQ_MDMX:
1627 /* TODO */
1628 break;
1629 case HI6_LD:
1630 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa4; /* ldq t0,0(t3) */
1631 if (bigendian) {
1632 /* remember original 8 bytes of t0: */
1633 *a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */
1634
1635 /* swap lowest 4 bytes: */
1636 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1637 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1638 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1639 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1640 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1641 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1642 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1643 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1644 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1645
1646 /* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */
1647 *a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */
1648 *a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */
1649 *a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */
1650
1651 /* swap highest 4 bytes: */
1652 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1653 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1654 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1655 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1656 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1657 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1658 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1659 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1660 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1661
1662 /* or the results together: */
1663 *a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */
1664 }
1665 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1666 break;
1667 case HI6_LW:
1668 case HI6_LWU:
1669 if (alpha_rt < 0 || bigendian || instruction_type == HI6_LWU)
1670 alpha_rt = ALPHA_T0;
1671 /* ldl rt,0(t3) */
1672 *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1673 *a++ = 0xa0 | ((alpha_rt >> 3) & 3);
1674 if (bigendian) {
1675 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1676 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1677 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1678 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1679 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1680 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1681 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1682 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1683 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1684 *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,zero,t0 (sign extend) 32->64 */
1685 }
1686 if (instruction_type == HI6_LWU) {
1687 /* Use only lowest 32 bits: */
1688 *a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48; /* zapnot t0,0xf,t0 */
1689 }
1690 if (alpha_rt == ALPHA_T0)
1691 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1692 break;
1693 case HI6_LHU:
1694 case HI6_LH:
1695 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x30; /* ldwu from memory */
1696 if (bigendian) {
1697 *a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */
1698 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1699 *a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */
1700 }
1701 if (instruction_type == HI6_LH) {
1702 *a++ = 0x21; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x73; /* sextw t0,t0 */
1703 }
1704 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1705 break;
1706 case HI6_LBU:
1707 case HI6_LB:
1708 if (alpha_rt < 0)
1709 alpha_rt = ALPHA_T0;
1710 /* ldbu rt,0(t3) */
1711 *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1712 *a++ = 0x28 | ((alpha_rt >> 3) & 3);
1713 if (instruction_type == HI6_LB) {
1714 /* sextb rt,rt */
1715 *a++ = alpha_rt; *a++ = 0x00; *a++ = 0xe0 + alpha_rt; *a++ = 0x73;
1716 }
1717 if (alpha_rt == ALPHA_T0)
1718 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1719 break;
1720
1721 case HI6_LWL:
1722 /* a1 = 0..3 (or 0..7 for 64-bit loads): */
1723 alpha_rs = map_MIPS_to_Alpha[rs];
1724 if (alpha_rs < 0) {
1725 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1726 alpha_rs = ALPHA_T0;
1727 }
1728 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1729 /* 02 30 20 46 and a1,alignment,t1 */
1730 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1731
1732 /* ldl t0,0(t3) */
1733 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1734
1735 if (bigendian) {
1736 /* TODO */
1737 bintrans_write_chunkreturn_fail(&a);
1738 }
1739 /*
1740 * lwl: memory = 0x12 0x34 0x56 0x78
1741 * offset (a1): register rt becomes:
1742 * 0 0x12......
1743 * 1 0x3412....
1744 * 2 0x563412..
1745 * 3 0x78563412
1746 */
1747
1748 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1749
1750 /*
1751 10: 03 00 9f 20 lda t3,3
1752 14: a5 05 82 40 cmpeq t3,t1,t4
1753 18: 01 00 a0 e4 beq t4,20 <skip>
1754 */
1755 *a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1756 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1757 *a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1758
1759 /* 03 14 20 40 addq t0,0,t2 */
1760 *a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40;
1761
1762 ok_unaligned_load3 = a;
1763 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1764
1765
1766
1767 *a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1768 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1769 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1770 /*
1771 * 2 0x563412..
1772 2c: 21 17 21 48 sll t0,0x8,t0
1773 30: 01 10 20 40 addl t0,0,t0
1774 34: 03 f0 7f 44 and t2,0xff,t2
1775 38: 03 04 23 44 or t0,t2,t2
1776 */
1777 *a++ = 0x21; *a++ = 0x17; *a++ = 0x21; *a++ = 0x48;
1778 *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1779 *a++ = 0x03; *a++ = 0xf0; *a++ = 0x7f; *a++ = 0x44;
1780 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1781
1782 ok_unaligned_load2 = a;
1783 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1784
1785
1786
1787 *a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1788 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1789 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1790 /*
1791 * 1 0x3412....
1792 2c: 21 17 22 48 sll t0,0x10,t0
1793 30: 01 10 20 40 addl t0,0,t0
1794 34: 23 76 60 48 zapnot t2,0x3,t2
1795 38: 03 04 23 44 or t0,t2,t2
1796 */
1797 *a++ = 0x21; *a++ = 0x17; *a++ = 0x22; *a++ = 0x48;
1798 *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1799 *a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48;
1800 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1801
1802 ok_unaligned_load1 = a;
1803 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1804
1805
1806
1807
1808 /*
1809 * 0 0x12......
1810 2c: 21 17 23 48 sll t0,0x18,t0
1811 30: 01 10 20 40 addl t0,0,t0
1812 34: 23 f6 60 48 zapnot t2,0x7,t2
1813 38: 03 04 23 44 or t0,t2,t2
1814 */
1815 *a++ = 0x21; *a++ = 0x17; *a++ = 0x23; *a++ = 0x48;
1816 *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1817 *a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48;
1818 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1819
1820
1821 *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
1822 *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
1823 *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
1824
1825 /* 03 10 60 40 addl t2,0,t2 */
1826 *a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
1827
1828 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt);
1829 break;
1830
1831 case HI6_LWR:
1832 /* a1 = 0..3 (or 0..7 for 64-bit loads): */
1833 alpha_rs = map_MIPS_to_Alpha[rs];
1834 if (alpha_rs < 0) {
1835 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1836 alpha_rs = ALPHA_T0;
1837 }
1838 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1839 /* 02 30 20 46 and a1,alignment,t1 */
1840 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1841
1842 /* ldl t0,0(t3) */
1843 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1844
1845 if (bigendian) {
1846 /* TODO */
1847 bintrans_write_chunkreturn_fail(&a);
1848 }
1849 /*
1850 * lwr: memory = 0x12 0x34 0x56 0x78
1851 * offset (a1): register rt becomes:
1852 * 0 0x78563412
1853 * 1 0x..785634
1854 * 2 0x....7856
1855 * 3 0x......78
1856 */
1857
1858 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1859
1860 /*
1861 10: 03 00 9f 20 lda t3,3
1862 14: a5 05 82 40 cmpeq t3,t1,t4
1863 18: 01 00 a0 e4 beq t4,20 <skip>
1864 */
1865 *a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1866 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1867 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1868
1869 /*
1870 2c: 81 16 23 48 srl t0,0x18,t0
1871 b0: 21 36 20 48 zapnot t0,0x1,t0
1872 34: 23 d6 7f 48 zapnot t2,0xfe,t2
1873 38: 03 04 23 44 or t0,t2,t2
1874 */
1875 *a++ = 0x81; *a++ = 0x16; *a++ = 0x23; *a++ = 0x48;
1876 *a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48;
1877 *a++ = 0x23; *a++ = 0xd6; *a++ = 0x7f; *a++ = 0x48;
1878 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1879
1880 ok_unaligned_load3 = a;
1881 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1882
1883
1884
1885 *a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1886 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1887 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1888 /*
1889 2c: 81 16 22 48 srl t0,0x10,t0
1890 b4: 21 76 20 48 zapnot t0,0x3,t0
1891 34: 23 96 7f 48 zapnot t2,0xfc,t2
1892 38: 03 04 23 44 or t0,t2,t2
1893 */
1894 *a++ = 0x81; *a++ = 0x16; *a++ = 0x22; *a++ = 0x48;
1895 *a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48;
1896 *a++ = 0x23; *a++ = 0x96; *a++ = 0x7f; *a++ = 0x48;
1897 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1898
1899 ok_unaligned_load2 = a;
1900 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1901
1902
1903
1904 *a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1905 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1906 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1907 /*
1908 2c: 81 16 21 48 srl t0,0x8,t0
1909 b8: 21 f6 20 48 zapnot t0,0x7,t0
1910 3c: 23 16 7f 48 zapnot t2,0xf8,t2
1911 40: 03 04 23 44 or t0,t2,t2
1912 */
1913 *a++ = 0x81; *a++ = 0x16; *a++ = 0x21; *a++ = 0x48;
1914 *a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48;
1915 *a++ = 0x23; *a++ = 0x16; *a++ = 0x7f; *a++ = 0x48;
1916 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1917
1918 ok_unaligned_load1 = a;
1919 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1920
1921
1922
1923
1924 /*
1925 * 0 0x12......
1926 */
1927 /* 03 14 20 40 addq t0,0,t2 */
1928 *a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40;
1929
1930
1931
1932 *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
1933 *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
1934 *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
1935
1936 /* 03 10 60 40 addl t2,0,t2 */
1937 *a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
1938
1939 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt);
1940 break;
1941
1942 case HI6_SQ:
1943 /* TODO */
1944 break;
1945 case HI6_SD:
1946 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1947 if (bigendian) {
1948 /* remember original 8 bytes of t0: */
1949 *a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */
1950
1951 /* swap lowest 4 bytes: */
1952 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1953 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1954 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1955 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1956 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1957 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1958 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1959 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1960 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1961
1962 /* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */
1963 *a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */
1964 *a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */
1965 *a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */
1966
1967 /* swap highest 4 bytes: */
1968 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1969 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1970 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1971 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1972 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1973 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1974 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1975 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1976 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1977
1978 /* or the results together: */
1979 *a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */
1980 }
1981 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb4; /* stq to memory */
1982 break;
1983 case HI6_SW:
1984 if (alpha_rt < 0 || bigendian) {
1985 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1986 alpha_rt = ALPHA_T0;
1987 }
1988 if (bigendian) {
1989 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1990 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1991 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1992 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1993 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1994 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1995 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1996 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1997 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1998 }
1999 /* stl to memory: stl rt,0(t3) */
2000 *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
2001 *a++ = 0xb0 | ((alpha_rt >> 3) & 3);
2002 break;
2003 case HI6_SH:
2004 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
2005 if (bigendian) {
2006 *a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */
2007 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
2008 *a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */
2009 }
2010 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x34; /* stw to memory */
2011 break;
2012 case HI6_SB:
2013 if (alpha_rt < 0) {
2014 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
2015 alpha_rt = ALPHA_T0;
2016 }
2017 /* stb to memory: stb rt,0(t3) */
2018 *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
2019 *a++ = 0x38 | ((alpha_rt >> 3) & 3);
2020 break;
2021
2022 case HI6_SWL:
2023 /* a1 = 0..3 (or 0..7 for 64-bit stores): */
2024 alpha_rs = map_MIPS_to_Alpha[rs];
2025 if (alpha_rs < 0) {
2026 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
2027 alpha_rs = ALPHA_T0;
2028 }
2029 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
2030 /* 02 30 20 46 and a1,alignment,t1 */
2031 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
2032
2033 /* ldl t0,0(t3) */
2034 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
2035
2036 if (bigendian) {
2037 /* TODO */
2038 bintrans_write_chunkreturn_fail(&a);
2039 }
2040
2041 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
2042
2043 /*
2044 * swl: memory = 0x12 0x34 0x56 0x78
2045 * register = 0x89abcdef
2046 * offset (a1): memory becomes:
2047 * 0 0x89 0x.. 0x.. 0x..
2048 * 1 0xab 0x89 0x.. 0x..
2049 * 2 0xcd 0xab 0x89 0x..
2050 * 3 0xef 0xcd 0xab 0x89
2051 */
2052
2053 /*
2054 a5 75 40 40 cmpeq t1,0x03,t4
2055 01 00 a0 e4 beq t4,20 <skip>
2056 */
2057 *a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40;
2058 *a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2059
2060 /* 01 10 60 40 addl t2,0,t0 */
2061 *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
2062
2063 ok_unaligned_load3 = a;
2064 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2065
2066
2067
2068
2069 *a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40;
2070 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2071 /*
2072 2:
2073 e8: 83 16 61 48 srl t2,0x8,t2
2074 ec: 23 f6 60 48 zapnot t2,0x7,t2
2075 f0: 21 16 3f 48 zapnot t0,0xf8,t0
2076 f4: 01 04 23 44 or t0,t2,t0
2077 */
2078 *a++ = 0x83; *a++ = 0x16; *a++ = 0x61; *a++ = 0x48;
2079 *a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48;
2080 *a++ = 0x21; *a++ = 0x16; *a++ = 0x3f; *a++ = 0x48;
2081 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2082
2083 ok_unaligned_load2 = a;
2084 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2085
2086
2087
2088 *a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40;
2089 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2090 /*
2091 1:
2092 f8: 83 16 62 48 srl t2,0x10,t2
2093 fc: 23 76 60 48 zapnot t2,0x3,t2
2094 100: 21 96 3f 48 zapnot t0,0xfc,t0
2095 104: 01 04 23 44 or t0,t2,t0
2096 */
2097 *a++ = 0x83; *a++ = 0x16; *a++ = 0x62; *a++ = 0x48;
2098 *a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48;
2099 *a++ = 0x21; *a++ = 0x96; *a++ = 0x3f; *a++ = 0x48;
2100 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2101
2102 ok_unaligned_load1 = a;
2103 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2104
2105
2106
2107
2108
2109 /*
2110 0:
2111 108: 83 16 63 48 srl t2,0x18,t2
2112 10c: 23 36 60 48 zapnot t2,0x1,t2
2113 110: 21 d6 3f 48 zapnot t0,0xfe,t0
2114 114: 01 04 23 44 or t0,t2,t0
2115 */
2116 *a++ = 0x83; *a++ = 0x16; *a++ = 0x63; *a++ = 0x48;
2117 *a++ = 0x23; *a++ = 0x36; *a++ = 0x60; *a++ = 0x48;
2118 *a++ = 0x21; *a++ = 0xd6; *a++ = 0x3f; *a++ = 0x48;
2119 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2120
2121
2122 *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
2123 *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
2124 *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
2125
2126 /* sdl t0,0(t3) */
2127 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0;
2128 break;
2129
2130 case HI6_SWR:
2131 /* a1 = 0..3 (or 0..7 for 64-bit stores): */
2132 alpha_rs = map_MIPS_to_Alpha[rs];
2133 if (alpha_rs < 0) {
2134 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
2135 alpha_rs = ALPHA_T0;
2136 }
2137 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
2138 /* 02 30 20 46 and a1,alignment,t1 */
2139 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
2140
2141 /* ldl t0,0(t3) */
2142 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
2143
2144 if (bigendian) {
2145 /* TODO */
2146 bintrans_write_chunkreturn_fail(&a);
2147 }
2148
2149 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
2150
2151 /*
2152 * swr: memory = 0x12 0x34 0x56 0x78
2153 * register = 0x89abcdef
2154 * offset (a1): memory becomes:
2155 * 0 0xef 0xcd 0xab 0x89
2156 * 1 0x.. 0xef 0xcd 0xab
2157 * 2 0x.. 0x.. 0xef 0xcd
2158 * 3 0x.. 0x.. 0x.. 0xef
2159 */
2160
2161
2162 /*
2163 a5 75 40 40 cmpeq t1,0x03,t4
2164 01 00 a0 e4 beq t4,20 <skip>
2165 */
2166 *a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40;
2167 *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2168
2169 /*
2170 118: 23 17 63 48 sll t2,0x18,t2
2171 11c: 21 f6 20 48 zapnot t0,0x7,t0
2172 120: 01 04 23 44 or t0,t2,t0
2173 */
2174 *a++ = 0x23; *a++ = 0x17; *a++ = 0x63; *a++ = 0x48;
2175 *a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48;
2176 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2177
2178 ok_unaligned_load3 = a;
2179 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2180
2181
2182
2183
2184
2185 *a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40;
2186 *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2187 /*
2188 2:
2189 124: 23 17 62 48 sll t2,0x10,t2
2190 128: 21 76 20 48 zapnot t0,0x3,t0
2191 12c: 01 04 23 44 or t0,t2,t0
2192 */
2193 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48;
2194 *a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48;
2195 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2196
2197 ok_unaligned_load2 = a;
2198 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2199
2200
2201
2202 *a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40;
2203 *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2204 /*
2205 1:
2206 130: 23 17 61 48 sll t2,0x8,t2
2207 134: 21 36 20 48 zapnot t0,0x1,t0
2208 138: 01 04 23 44 or t0,t2,t0
2209 */
2210 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48;
2211 *a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48;
2212 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2213
2214 ok_unaligned_load1 = a;
2215 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2216
2217
2218
2219 /*
2220 0:
2221 13c: 01 10 60 40 addl t2,0,t0
2222 */
2223 *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
2224
2225
2226 *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
2227 *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
2228 *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
2229
2230 /* sdl t0,0(t3) */
2231 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0;
2232 break;
2233
2234 default:
2235 ;
2236 }
2237
2238 *addrp = a;
2239 bintrans_write_pc_inc(addrp);
2240 return 1;
2241 }
2242
2243
2244 /*
2245 * bintrans_write_instruction__lui():
2246 */
2247 static int bintrans_write_instruction__lui(unsigned char **addrp,
2248 int rt, int imm)
2249 {
2250 uint32_t *a;
2251
2252 /*
2253 * dc fe 3f 24 ldah t0,-292
2254 * 1f 04 ff 5f fnop
2255 * 88 08 30 b4 stq t0,2184(a0)
2256 */
2257 if (rt != 0) {
2258 int alpha_rt = map_MIPS_to_Alpha[rt];
2259 if (alpha_rt < 0)
2260 alpha_rt = ALPHA_T0;
2261
2262 a = (uint32_t *) *addrp;
2263 *a++ = 0x241f0000 | (alpha_rt << 21) | ((uint32_t)imm & 0xffff);
2264 *addrp = (unsigned char *) a;
2265
2266 if (alpha_rt == ALPHA_T0) {
2267 *a++ = 0x5fff041f; /* fnop */
2268 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
2269 }
2270 }
2271
2272 bintrans_write_pc_inc(addrp);
2273
2274 return 1;
2275 }
2276
2277
2278 /*
2279 * bintrans_write_instruction__mfmthilo():
2280 */
2281 static int bintrans_write_instruction__mfmthilo(unsigned char **addrp,
2282 int rd, int from_flag, int hi_flag)
2283 {
2284 unsigned char *a;
2285 int ofs;
2286
2287 a = *addrp;
2288
2289 /*
2290 * 18 09 30 a4 ldq t0,hi(a0) (or lo)
2291 * 18 09 30 b4 stq t0,rd(a0)
2292 *
2293 * (or if from_flag is cleared then move the other way, it's
2294 * actually not rd then, but rs...)
2295 */
2296
2297 if (from_flag) {
2298 if (rd != 0) {
2299 /* mfhi or mflo */
2300 if (hi_flag)
2301 ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
2302 else
2303 ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
2304 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xa4;
2305
2306 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
2307 }
2308 } else {
2309 /* mthi or mtlo */
2310 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rd, ALPHA_T0);
2311
2312 if (hi_flag)
2313 ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
2314 else
2315 ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
2316 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
2317 }
2318
2319 *addrp = a;
2320 bintrans_write_pc_inc(addrp);
2321 return 1;
2322 }
2323
2324
2325 /*
2326 * bintrans_write_instruction__mfc_mtc():
2327 */
2328 static int bintrans_write_instruction__mfc_mtc(struct memory *mem,
2329 unsigned char **addrp, int coproc_nr, int flag64bit, int rt,
2330 int rd, int mtcflag)
2331 {
2332 uint32_t *a, *jump;
2333 int ofs;
2334
2335 /*
2336 * NOTE: Only a few registers are readable without side effects.
2337 */
2338 if (rt == 0 && !mtcflag)
2339 return 0;
2340
2341 if (coproc_nr >= 1)
2342 return 0;
2343
2344 if (rd == COP0_RANDOM || rd == COP0_COUNT)
2345 return 0;
2346
2347
2348 /*************************************************************
2349 *
2350 * TODO: Check for kernel mode, or Coproc X usability bit!
2351 *
2352 *************************************************************/
2353
2354 a = (uint32_t *) *addrp;
2355
2356 ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2357 *a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */
2358
2359 ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc;
2360 *a++ = 0xa4410000 | (ofs & 0xffff); /* ldq t1,reg_rd(t0) */
2361
2362 if (mtcflag) {
2363 /* mtc: */
2364 *addrp = (unsigned char *) a;
2365 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0);
2366 a = (uint32_t *) *addrp;
2367
2368 if (!flag64bit) {
2369 *a++ = 0x40201001; /* addl t0,0,t0 */
2370 *a++ = 0x40401002; /* addl t1,0,t1 */
2371 }
2372
2373 /*
2374 * In the general case: Only allow mtc if it does NOT
2375 * change the register!!
2376 */
2377
2378 switch (rd) {
2379 case COP0_INDEX:
2380 break;
2381
2382 case COP0_EPC:
2383 break;
2384
2385 /* TODO: Some bits are not writable */
2386 case COP0_ENTRYLO0:
2387 case COP0_ENTRYLO1:
2388 break;
2389
2390 case COP0_ENTRYHI:
2391 /*
2392 * Entryhi is ok to write to, as long as the
2393 * ASID isn't changed. (That would require
2394 * cache invalidations etc. Instead of checking
2395 * for MMU3K vs others, we just assume that all the
2396 * lowest 12 bits must be the same.
2397 */
2398 /* ff 0f bf 20 lda t4,0x0fff */
2399 /* 03 00 25 44 and t0,t4,t2 */
2400 /* 04 00 45 44 and t1,t4,t3 */
2401 /* a3 05 64 40 cmpeq t2,t3,t2 */
2402 /* 01 00 60 f4 bne t2,<ok> */
2403 *a++ = 0x20bf0fff;
2404 *a++ = 0x44250003;
2405 *a++ = 0x44450004;
2406 *a++ = 0x406405a3;
2407 jump = a;
2408 *a++ = 0; /* later */
2409 *addrp = (unsigned char *) a;
2410 bintrans_write_chunkreturn_fail(addrp);
2411 a = (uint32_t *) *addrp;
2412 *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2413 break;
2414
2415 case COP0_STATUS:
2416 /* Only allow updates to the status register if
2417 the interrupt enable bits were changed, but no
2418 other bits! */
2419 if (mem->bintrans_32bit_only) {
2420 /* R3000 etc. */
2421 /* t4 = 0x0fe70000; */
2422 *a++ = 0x20bf0000;
2423 *a++ = 0x24a50fe7;
2424 } else {
2425 /* fe 00 bf 20 lda t4,0x00fe */
2426 /* ff ff a5 24 ldah t4,-1(t4) */
2427 *a++ = 0x20bf0000;
2428 *a++ = 0x24a5ffff;
2429 }
2430
2431 /* 03 00 25 44 and t0,t4,t2 */
2432 /* 04 00 45 44 and t1,t4,t3 */
2433 /* a3 05 64 40 cmpeq t2,t3,t2 */
2434 /* 01 00 60 f4 bne t2,<ok> */
2435 *a++ = 0x44250003;
2436 *a++ = 0x44450004;
2437 *a++ = 0x406405a3;
2438 jump = a;
2439 *a++ = 0; /* later */
2440 *addrp = (unsigned char *) a;
2441 bintrans_write_chunkreturn_fail(addrp);
2442 a = (uint32_t *) *addrp;
2443 *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2444
2445 /* If enabling interrupt bits would cause an
2446 exception, then don't do it: */
2447 ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2448 *a++ = 0xa4900000 | (ofs & 0xffff); /* ldq t3,coproc[0](a0) */
2449 ofs = ((size_t)&dummy_coproc.reg[COP0_CAUSE]) - (size_t)&dummy_coproc;
2450 *a++ = 0xa4a40000 | (ofs & 0xffff); /* ldq t4,reg_rd(t3) */
2451
2452 /* 02 00 a1 44 and t4,t0,t1 */
2453 /* 83 16 41 48 srl t1,0x8,t2 */
2454 /* 04 f0 7f 44 and t2,0xff,t3 */
2455 *a++ = 0x44a10002;
2456 *a++ = 0x48411683;
2457 *a++ = 0x447ff004;
2458 /* 01 00 80 e4 beq t3,<ok> */
2459 jump = a;
2460 *a++ = 0; /* later */
2461 *addrp = (unsigned char *) a;
2462 bintrans_write_chunkreturn_fail(addrp);
2463 a = (uint32_t *) *addrp;
2464 *jump = 0xe4800000 | (((size_t)a - (size_t)jump - 4) / 4);
2465 break;
2466
2467 default:
2468 /* a3 05 22 40 cmpeq t0,t1,t2 */
2469 /* 01 00 60 f4 bne t2,<ok> */
2470 *a++ = 0x402205a3;
2471 jump = a;
2472 *a++ = 0; /* later */
2473 *addrp = (unsigned char *) a;
2474 bintrans_write_chunkreturn_fail(addrp);
2475 a = (uint32_t *) *addrp;
2476 *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2477 }
2478
2479 *a++ = 0x40201402; /* addq t0,0,t1 */
2480
2481 ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2482 *a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */
2483 ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc;
2484 *a++ = 0xb4410000 | (ofs & 0xffff); /* stq t1,reg_rd(t0) */
2485 } else {
2486 /* mfc: */
2487 if (!flag64bit) {
2488 *a++ = 0x40401002; /* addl t1,0,t1 */
2489 }
2490
2491 *addrp = (unsigned char *) a;
2492 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T1, rt);
2493 a = (uint32_t *) *addrp;
2494 }
2495
2496 *addrp = (unsigned char *) a;
2497
2498 bintrans_write_pc_inc(addrp);
2499 return 1;
2500 }
2501
2502
2503 /*
2504 * bintrans_write_instruction__tlb_rfe_etc():
2505 */
2506 static int bintrans_write_instruction__tlb_rfe_etc(unsigned char **addrp,
2507 int itype)
2508 {
2509 uint32_t *a;
2510 int ofs = 0;
2511
2512 switch (itype) {
2513 case CALL_TLBWI:
2514 case CALL_TLBWR:
2515 case CALL_TLBP:
2516 case CALL_TLBR:
2517 case CALL_RFE:
2518 case CALL_ERET:
2519 case CALL_BREAK:
2520 case CALL_SYSCALL:
2521 break;
2522 default:
2523 return 0;
2524 }
2525
2526 a = (uint32_t *) *addrp;
2527
2528 /* a0 = pointer to the cpu struct */
2529
2530 switch (itype) {
2531 case CALL_TLBWI:
2532 case CALL_TLBWR:
2533 /* a1 = 0 for indexed, 1 for random */
2534 *a++ = 0x223f0000 | (itype == CALL_TLBWR);
2535 break;
2536 case CALL_TLBP:
2537 case CALL_TLBR:
2538 /* a1 = 0 for probe, 1 for read */
2539 *a++ = 0x223f0000 | (itype == CALL_TLBR);
2540 break;
2541 case CALL_BREAK:
2542 case CALL_SYSCALL:
2543 *a++ = 0x223f0000 | (itype == CALL_BREAK? EXCEPTION_BP : EXCEPTION_SYS);
2544 break;
2545 }
2546
2547 /* Put PC into the cpu struct (both pc and pc_last). */
2548 *a++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */
2549 *a++ = 0xb4d00000 | ofs_pc_last;/* stq t5,"pc_last"(a0) */
2550
2551 /* Save a0 and the old return address on the stack: */
2552 *a++ = 0x23deff80; /* lda sp,-128(sp) */
2553
2554 *a++ = 0xb75e0000; /* stq ra,0(sp) */
2555 *a++ = 0xb61e0008; /* stq a0,8(sp) */
2556 *a++ = 0xb0fe0018; /* stl t6,24(sp) */
2557 *a++ = 0xb71e0020; /* stq t10,32(sp) */
2558 *a++ = 0xb73e0028; /* stq t11,40(sp) */
2559 *a++ = 0xb51e0030; /* stq t7,48(sp) */
2560 *a++ = 0xb6de0038; /* stq t8,56(sp) */
2561 *a++ = 0xb6fe0040; /* stq t9,64(sp) */
2562
2563 switch (itype) {
2564 case CALL_TLBP:
2565 case CALL_TLBR:
2566 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbpr) - (size_t)&dummy_cpu;
2567 break;
2568 case CALL_TLBWR:
2569 case CALL_TLBWI:
2570 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbwri) - (size_t)&dummy_cpu;
2571 break;
2572 case CALL_RFE:
2573 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_rfe) - (size_t)&dummy_cpu;
2574 break;
2575 case CALL_ERET:
2576 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_eret) - (size_t)&dummy_cpu;
2577 break;
2578 case CALL_BREAK:
2579 case CALL_SYSCALL:
2580 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_simple_exception) - (size_t)&dummy_cpu;
2581 break;
2582 }
2583
2584 *a++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */
2585
2586 /* Call bintrans_fast_tlbwr: */
2587 *a++ = 0x6b5b4000; /* jsr ra,(t12),<after> */
2588
2589 /* Restore the old return address and a0 from the stack: */
2590 *a++ = 0xa75e0000; /* ldq ra,0(sp) */
2591 *a++ = 0xa61e0008; /* ldq a0,8(sp) */
2592 *a++ = 0xa0fe0018; /* ldl t6,24(sp) */
2593 *a++ = 0xa71e0020; /* ldq t10,32(sp) */
2594 *a++ = 0xa73e0028; /* ldq t11,40(sp) */
2595 *a++ = 0xa51e0030; /* ldq t7,48(sp) */
2596 *a++ = 0xa6de0038; /* ldq t8,56(sp) */
2597 *a++ = 0xa6fe0040; /* ldq t9,64(sp) */
2598
2599 *a++ = 0x23de0080; /* lda sp,128(sp) */
2600
2601 /* Load PC from the cpu struct. */
2602 *a++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */
2603
2604 *addrp = (unsigned char *) a;
2605
2606 switch (itype) {
2607 case CALL_ERET:
2608 case CALL_BREAK:
2609 case CALL_SYSCALL:
2610 break;
2611 default:
2612 bintrans_write_pc_inc(addrp);
2613 }
2614
2615 return 1;
2616 }
2617
2618
2619 /*
2620 * bintrans_backend_init():
2621 *
2622 * This is neccessary for broken 2.95.4 compilers on FreeBSD/Alpha 4.9,
2623 * and probably a few others. (For Compaq's CC, and for gcc 3.x, this
2624 * wouldn't be neccessary, and the old code would have worked.)
2625 */
2626 static void bintrans_backend_init(void)
2627 {
2628 int size;
2629 uint32_t *p, *q;
2630
2631
2632 /* "runchunk": */
2633 size = 256; /* NOTE: This MUST be enough, or we fail */
2634 p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
2635 MAP_ANON | MAP_PRIVATE, -1, 0);
2636
2637 /* If mmap() failed, try malloc(): */
2638 if (p == NULL) {
2639 p = malloc(size);
2640 if (p == NULL) {
2641 fprintf(stderr, "bintrans_backend_init(): out of memory\n");
2642 exit(1);
2643 }
2644 }
2645
2646 bintrans_runchunk = (void *)p;
2647
2648 *p++ = 0x23deffa0; /* lda sp,-0x60(sp) */
2649 *p++ = 0xb75e0000; /* stq ra,0(sp) */
2650 *p++ = 0xb53e0008; /* stq s0,8(sp) */
2651 *p++ = 0xb55e0010; /* stq s1,16(sp) */
2652 *p++ = 0xb57e0018; /* stq s2,24(sp) */
2653 *p++ = 0xb59e0020; /* stq s3,32(sp) */
2654 *p++ = 0xb5be0028; /* stq s4,40(sp) */
2655 *p++ = 0xb5de0030; /* stq s5,48(sp) */
2656 *p++ = 0xb5fe0038; /* stq s6,56(sp) */
2657 *p++ = 0xb7be0058; /* stq gp,0x58(sp) */
2658
2659 *p++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */
2660 *p++ = 0xa0f00000 | ofs_n; /* ldl t6,"bintrans_instructions_executed"(a0) */
2661 *p++ = 0xa5100000 | ofs_a0; /* ldq t7,"a0"(a0) */
2662 *p++ = 0xa6d00000 | ofs_a1; /* ldq t8,"a1"(a0) */
2663 *p++ = 0xa6f00000 | ofs_s0; /* ldq t9,"s0"(a0) */
2664 *p++ = 0xa1300000 | ofs_ds; /* ldl s0,"delay_slot"(a0) */
2665 *p++ = 0xa5500000 | ofs_ja; /* ldq s1,"delay_jmpaddr"(a0) */
2666 *p++ = 0xa5700000 | ofs_sp; /* ldq s2,"gpr[sp]"(a0) */
2667 *p++ = 0xa5900000 | ofs_ra; /* ldq s3,"gpr[ra]"(a0) */
2668 *p++ = 0xa5b00000 | ofs_t0; /* ldq s4,"gpr[t0]"(a0) */
2669 *p++ = 0xa5d00000 | ofs_t1; /* ldq s5,"gpr[t1]"(a0) */
2670 *p++ = 0xa5f00000 | ofs_t2; /* ldq s6,"gpr[t2]"(a0) */
2671 *p++ = 0xa7100000 | ofs_tbl0; /* ldq t10,table0(a0) */
2672 *p++ = 0xa7300000 | ofs_v0; /* ldq t11,"gpr[v0]"(a0) */
2673
2674 *p++ = 0x6b514000; /* jsr ra,(a1),<back> */
2675
2676 *p++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */
2677 *p++ = 0xb0f00000 | ofs_n; /* stl t6,"bintrans_instructions_executed"(a0) */
2678 *p++ = 0xb5100000 | ofs_a0; /* stq t7,"a0"(a0) */
2679 *p++ = 0xb6d00000 | ofs_a1; /* stq t8,"a1"(a0) */
2680 *p++ = 0xb6f00000 | ofs_s0; /* stq t9,"s0"(a0) */
2681 *p++ = 0xb1300000 | ofs_ds; /* stl s0,"delay_slot"(a0) */
2682 *p++ = 0xb5500000 | ofs_ja; /* stq s1,"delay_jmpaddr"(a0) */
2683 *p++ = 0xb5700000 | ofs_sp; /* stq s2,"gpr[sp]"(a0) */
2684 *p++ = 0xb5900000 | ofs_ra; /* stq s3,"gpr[ra]"(a0) */
2685 *p++ = 0xb5b00000 | ofs_t0; /* stq s4,"gpr[t0]"(a0) */
2686 *p++ = 0xb5d00000 | ofs_t1; /* stq s5,"gpr[t1]"(a0) */
2687 *p++ = 0xb5f00000 | ofs_t2; /* stq s6,"gpr[t2]"(a0) */
2688 *p++ = 0xb7300000 | ofs_v0; /* stq t11,"gpr[v0]"(a0) */
2689
2690 *p++ = 0xa75e0000; /* ldq ra,0(sp) */
2691 *p++ = 0xa53e0008; /* ldq s0,8(sp) */
2692 *p++ = 0xa55e0010; /* ldq s1,16(sp) */
2693 *p++ = 0xa57e0018; /* ldq s2,24(sp) */
2694 *p++ = 0xa59e0020; /* ldq s3,32(sp) */
2695 *p++ = 0xa5be0028; /* ldq s4,40(sp) */
2696 *p++ = 0xa5de0030; /* ldq s5,48(sp) */
2697 *p++ = 0xa5fe0038; /* ldq s6,56(sp) */
2698 *p++ = 0xa7be0058; /* ldq gp,0x58(sp) */
2699 *p++ = 0x23de0060; /* lda sp,0x60(sp) */
2700 *p++ = 0x6bfa8001; /* ret */
2701
2702
2703 /* "jump to 32bit pc": */
2704 size = 128; /* WARNING! Don't make this too small. */
2705 p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
2706 MAP_ANON | MAP_PRIVATE, -1, 0);
2707
2708 /* If mmap() failed, try malloc(): */
2709 if (p == NULL) {
2710 p = malloc(size);
2711 if (p == NULL) {
2712 fprintf(stderr, "bintrans_backend_init(): out of memory\n");
2713 exit(1);
2714 }
2715 }
2716
2717 bintrans_jump_to_32bit_pc = (void *)p;
2718
2719 /* Don't execute too many instructions: */
2720 *p++ = 0x205f0000 | (N_SAFE_BINTRANS_LIMIT-1); /* lda t1,safe-1 */
2721
2722 *p++ = 0x40e20da1; /* cmple t6,t1,t0 */
2723 q = p; /* *q is updated later */
2724 *p++ = 0xe4200001; /* beq ret (far below) */
2725
2726 *p++ = 0x40c01411; /* addq t5,0,a1 */
2727
2728 /*
2729 * Special case for 32-bit addressing:
2730 *
2731 * t1 = 1023;
2732 * t2 = ((a1 >> 22) & t1) * sizeof(void *);
2733 * t3 = ((a1 >> 12) & t1) * sizeof(void *);
2734 * t1 = a1 & 4095;
2735 */
2736 *p++ = 0x205f1ff8; /* lda t1,1023 * 8 */
2737 *p++ = 0x4a227683; /* srl a1,19,t2 */
2738 *p++ = 0x4a213684; /* srl a1, 9,t3 */
2739 *p++ = 0x44620003; /* and t2,t1,t2 */
2740
2741 /*
2742 * t10 is vaddr_to_hostaddr_table0
2743 *
2744 * a3 = tbl0[t2] (load entry from tbl0)
2745 */
2746 *p++ = 0x43030412; /* addq t10,t2,a2 */
2747 *p++ = 0x44820004; /* and t3,t1,t3 */
2748 *p++ = 0xa6720000; /* ldq a3,0(a2) */
2749 *p++ = 0x205f0ffc; /* lda t1,0xffc */
2750
2751 /*
2752 * a3 = tbl1[t3] (load entry from tbl1 (which is a3))
2753 */
2754 *p++ = 0x42640413; /* addq a3,t3,a3 */
2755 *p++ = 0x46220002; /* and a1,t1,t1 */
2756
2757 *p++ = 0xa6730000 | ofs_c0; /* ldq a3,chunks[0](a3) */
2758
2759 /*
2760 * NULL? Then just return.
2761 */
2762 *p++ = 0xf6600001; /* bne a3,<ok> */
2763 *p++ = 0x6bfa8001; /* ret */
2764
2765 *p++ = 0x40530402; /* addq t1,a3,t1 */
2766 *p++ = 0xa0220000; /* ldl t0,0(t1) */
2767
2768 /* No translation? Then return. */
2769 *p++ = 0xe4200003; /* beq t0,<skip> */
2770
2771 *p++ = 0xa4700000 | ofs_cb; /* ldq t2,chunk_base_address(a0) */
2772
2773 *p++ = 0x40230401; /* addq t0,t2,t0 */
2774 *p++ = 0x6be10000; /* jmp (t0) */
2775
2776 /* Now, update *q to point here: */
2777 *q = 0xe4200000 | (((size_t)p - (size_t)q)/4 - 1); /* beq ret */
2778
2779 /* Return to the main translation loop. */
2780 *p++ = 0x6bfa8001; /* ret */
2781 }
2782

  ViewVC Help
Powered by ViewVC 1.1.26