1 |
/* |
2 |
* Copyright (C) 2004-2005 Anders Gavare. All rights reserved. |
3 |
* |
4 |
* Redistribution and use in source and binary forms, with or without |
5 |
* modification, are permitted provided that the following conditions are met: |
6 |
* |
7 |
* 1. Redistributions of source code must retain the above copyright |
8 |
* notice, this list of conditions and the following disclaimer. |
9 |
* 2. Redistributions in binary form must reproduce the above copyright |
10 |
* notice, this list of conditions and the following disclaimer in the |
11 |
* documentation and/or other materials provided with the distribution. |
12 |
* 3. The name of the author may not be used to endorse or promote products |
13 |
* derived from this software without specific prior written permission. |
14 |
* |
15 |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
16 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 |
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
19 |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
21 |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
23 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
24 |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
25 |
* SUCH DAMAGE. |
26 |
* |
27 |
* |
28 |
* $Id: bintrans_alpha.c,v 1.119 2005/06/22 10:12:25 debug Exp $ |
29 |
* |
30 |
* Alpha specific code for dynamic binary translation. |
31 |
* |
32 |
* See bintrans.c for more information. Included from bintrans.c. |
33 |
* |
34 |
* |
35 |
* Some Alpha registers that are reasonable to use: |
36 |
* |
37 |
* t5..t7 6..8 3 |
38 |
* s0..s6 9..15 7 |
39 |
* a1..a5 17..21 5 |
40 |
* t8..t11 22..25 4 |
41 |
* |
42 |
* These can be "mapped" to MIPS registers in the translated code, except a0 |
43 |
* which points to the cpu struct, and t0..t4 (or so) which are used by the |
44 |
* translated code as temporaries. |
45 |
* |
46 |
* 3 + 7 + 5 + 4 = 19 available registers. Of course, all (except s0..s6) must |
47 |
* be saved when calling external functions, such as when calling tlbp and |
48 |
* other external functions. |
49 |
* |
50 |
* Which are the 19 most commonly used MIPS registers? (This will include the |
51 |
* pc, and the "current number of executed translated instructions.) |
52 |
* |
53 |
* The current allocation is as follows: |
54 |
* |
55 |
* Alpha: MIPS: |
56 |
* ------ ----- |
57 |
* |
58 |
* t5 pc (64-bit) |
59 |
* t6 bintrans_instructions_executed (32-bit int) |
60 |
* t7 a0 (mips register 4) (64-bit) |
61 |
* t8 a1 (mips register 5) (64-bit) |
62 |
* t9 s0 (mips register 16) (64-bit) |
63 |
* t10 table0 cached (for load/store) |
64 |
* t11 v0 (mips register 2) (64-bit) |
65 |
* s0 delay_slot (32-bit int) |
66 |
* s1 delay_jmpaddr (64-bit) |
67 |
* s2 sp (mips register 29) (64-bit) |
68 |
* s3 ra (mips register 31) (64-bit) |
69 |
* s4 t0 (mips register 8) (64-bit) |
70 |
* s5 t1 (mips register 9) (64-bit) |
71 |
* s6 t2 (mips register 10) (64-bit) |
72 |
*/ |
73 |
|
74 |
#define MIPSREG_PC -3 |
75 |
#define MIPSREG_DELAY_SLOT -2 |
76 |
#define MIPSREG_DELAY_JMPADDR -1 |
77 |
|
78 |
#define ALPHA_T0 1 |
79 |
#define ALPHA_T1 2 |
80 |
#define ALPHA_T2 3 |
81 |
#define ALPHA_T3 4 |
82 |
#define ALPHA_T4 5 |
83 |
#define ALPHA_T5 6 |
84 |
#define ALPHA_T6 7 |
85 |
#define ALPHA_T7 8 |
86 |
#define ALPHA_S0 9 |
87 |
#define ALPHA_S1 10 |
88 |
#define ALPHA_S2 11 |
89 |
#define ALPHA_S3 12 |
90 |
#define ALPHA_S4 13 |
91 |
#define ALPHA_S5 14 |
92 |
#define ALPHA_S6 15 |
93 |
#define ALPHA_A0 16 |
94 |
#define ALPHA_A1 17 |
95 |
#define ALPHA_A2 18 |
96 |
#define ALPHA_A3 19 |
97 |
#define ALPHA_A4 20 |
98 |
#define ALPHA_A5 21 |
99 |
#define ALPHA_T8 22 |
100 |
#define ALPHA_T9 23 |
101 |
#define ALPHA_T10 24 |
102 |
#define ALPHA_T11 25 |
103 |
#define ALPHA_ZERO 31 |
104 |
|
105 |
static int map_MIPS_to_Alpha[32] = { |
106 |
ALPHA_ZERO, -1, ALPHA_T11, -1, /* 0 .. 3 */ |
107 |
ALPHA_T7, ALPHA_T8, -1, -1, /* 4 .. 7 */ |
108 |
ALPHA_S4, ALPHA_S5, ALPHA_S6, -1, /* 8 .. 11 */ |
109 |
-1, -1, -1, -1, /* 12 .. 15 */ |
110 |
ALPHA_T9, -1, -1, -1, /* 16 .. 19 */ |
111 |
-1, -1, -1, -1, /* 20 .. 23 */ |
112 |
-1, -1, -1, -1, /* 24 .. 27 */ |
113 |
-1, ALPHA_S2, -1, ALPHA_S3, /* 28 .. 31 */ |
114 |
}; |
115 |
|
116 |
|
117 |
struct cpu dummy_cpu; |
118 |
struct mips_coproc dummy_coproc; |
119 |
struct vth32_table dummy_vth32_table; |
120 |
|
121 |
unsigned char bintrans_alpha_imb[32] = { |
122 |
0x86, 0x00, 0x00, 0x00, /* imb */ |
123 |
0x01, 0x80, 0xfa, 0x6b, /* ret */ |
124 |
0x1f, 0x04, 0xff, 0x47, /* nop */ |
125 |
0x00, 0x00, 0xfe, 0x2e, /* unop */ |
126 |
0x1f, 0x04, 0xff, 0x47, /* nop */ |
127 |
0x00, 0x00, 0xfe, 0x2e, /* unop */ |
128 |
0x1f, 0x04, 0xff, 0x47, /* nop */ |
129 |
0x00, 0x00, 0xfe, 0x2e /* unop */ |
130 |
}; |
131 |
|
132 |
|
133 |
/* |
134 |
* bintrans_host_cacheinvalidate() |
135 |
* |
136 |
* Invalidate the host's instruction cache. On Alpha, we do this by |
137 |
* executing an imb instruction. |
138 |
* |
139 |
* NOTE: A simple asm("imb"); would be enough here, but not all |
140 |
* compilers have such simple constructs, so an entire function has to |
141 |
* be written as bintrans_alpha_imb[] above. |
142 |
*/ |
143 |
static void bintrans_host_cacheinvalidate(unsigned char *p, size_t len) |
144 |
{ |
145 |
/* Long form of ``asm("imb");'' */ |
146 |
|
147 |
void (*f)(void); |
148 |
f = (void *)&bintrans_alpha_imb[0]; |
149 |
f(); |
150 |
} |
151 |
|
152 |
|
153 |
/* |
154 |
* lda sp,-128(sp) some margin |
155 |
* stq ra,0(sp) |
156 |
* stq s0,8(sp) |
157 |
* stq s1,16(sp) |
158 |
* stq s2,24(sp) |
159 |
* stq s3,32(sp) |
160 |
* stq s4,40(sp) |
161 |
* stq s5,48(sp) |
162 |
* stq s6,56(sp) |
163 |
* |
164 |
* jsr ra,(a1),<back> |
165 |
* back: |
166 |
* |
167 |
* ldq ra,0(sp) |
168 |
* ldq s0,8(sp) |
169 |
* ldq s1,16(sp) |
170 |
* ldq s2,24(sp) |
171 |
* ldq s3,32(sp) |
172 |
* ldq s4,40(sp) |
173 |
* ldq s5,48(sp) |
174 |
* ldq s6,56(sp) |
175 |
* lda sp,128(sp) |
176 |
* ret |
177 |
*/ |
178 |
/* note: offsetof (in stdarg.h) could possibly be used, but I'm not sure |
179 |
if it will take care of the compiler problems... */ |
180 |
#define ofs_pc (((size_t)&dummy_cpu.pc) - ((size_t)&dummy_cpu)) |
181 |
#define ofs_pc_last (((size_t)&dummy_cpu.cd.mips.pc_last) - ((size_t)&dummy_cpu)) |
182 |
#define ofs_n (((size_t)&dummy_cpu.cd.mips.bintrans_instructions_executed) - ((size_t)&dummy_cpu)) |
183 |
#define ofs_ds (((size_t)&dummy_cpu.cd.mips.delay_slot) - ((size_t)&dummy_cpu)) |
184 |
#define ofs_ja (((size_t)&dummy_cpu.cd.mips.delay_jmpaddr) - ((size_t)&dummy_cpu)) |
185 |
#define ofs_sp (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_SP]) - ((size_t)&dummy_cpu)) |
186 |
#define ofs_ra (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_RA]) - ((size_t)&dummy_cpu)) |
187 |
#define ofs_a0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A0]) - ((size_t)&dummy_cpu)) |
188 |
#define ofs_a1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A1]) - ((size_t)&dummy_cpu)) |
189 |
#define ofs_t0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T0]) - ((size_t)&dummy_cpu)) |
190 |
#define ofs_t1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T1]) - ((size_t)&dummy_cpu)) |
191 |
#define ofs_t2 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T2]) - ((size_t)&dummy_cpu)) |
192 |
#define ofs_v0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_V0]) - ((size_t)&dummy_cpu)) |
193 |
#define ofs_s0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_S0]) - ((size_t)&dummy_cpu)) |
194 |
#define ofs_tbl0 (((size_t)&dummy_cpu.cd.mips.vaddr_to_hostaddr_table0) - ((size_t)&dummy_cpu)) |
195 |
#define ofs_c0 ((size_t)&dummy_vth32_table.bintrans_chunks[0] - (size_t)&dummy_vth32_table) |
196 |
#define ofs_cb (((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu) |
197 |
|
198 |
|
199 |
static uint32_t bintrans_alpha_load_32bit[18] = { |
200 |
/* |
201 |
* t1 = 1023; |
202 |
* t2 = ((a1 >> 22) & t1) * sizeof(void *); |
203 |
* t3 = ((a1 >> 12) & t1) * sizeof(void *); |
204 |
* t1 = a1 & 4095; |
205 |
* |
206 |
* f8 1f 5f 20 lda t1,1023 * 8 |
207 |
* 83 76 22 4a srl a1,19,t2 |
208 |
* 84 36 21 4a srl a1, 9,t3 |
209 |
* 03 00 62 44 and t2,t1,t2 |
210 |
*/ |
211 |
0x205f1ff8, |
212 |
0x4a227683, |
213 |
0x4a213684, |
214 |
0x44620003, |
215 |
|
216 |
/* |
217 |
* t10 is vaddr_to_hostaddr_table0 |
218 |
* |
219 |
* a3 = tbl0[t2] (load entry from tbl0) |
220 |
* 12 04 03 43 addq t10,t2,a2 |
221 |
*/ |
222 |
0x43030412, |
223 |
|
224 |
/* 04 00 82 44 and t3,t1,t3 */ |
225 |
0x44820004, |
226 |
|
227 |
/* 00 00 72 a6 ldq a3,0(a2) */ |
228 |
0xa6720000, |
229 |
|
230 |
/* 24 37 80 48 sll t3,0x1,t3 */ |
231 |
0x48803724, |
232 |
|
233 |
/* ff 0f 5f 20 lda t1,4095 */ |
234 |
0x205f0fff, |
235 |
|
236 |
/* |
237 |
* a3 = tbl1[t3] (load entry from tbl1 (which is a3)) |
238 |
* 13 04 64 42 addq a3,t3,a3 |
239 |
*/ |
240 |
0x42640413, |
241 |
|
242 |
/* 02 00 22 46 and a1,t1,t1 */ |
243 |
0x46220002, |
244 |
|
245 |
/* 00 00 73 a6 ldq a3,0(a3) */ |
246 |
0xa6730000, |
247 |
|
248 |
/* NULL? Then return failure at once. */ |
249 |
/* beq a3, return */ |
250 |
0xe6600002, |
251 |
|
252 |
/* The rest of the load/store code was written with t3 as the address. */ |
253 |
|
254 |
/* Add the offset within the page: */ |
255 |
/* 04 04 62 42 addq a3,t1,t3 */ |
256 |
0x42620404, |
257 |
|
258 |
0x6be50000, /* jmp (t4) */ |
259 |
|
260 |
/* return: */ |
261 |
0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16), /* ldah t0,256 */ |
262 |
0x44270407, /* or t0,t6,t6 */ |
263 |
0x6bfa8001 /* ret */ |
264 |
}; |
265 |
|
266 |
static uint32_t bintrans_alpha_store_32bit[18] = { |
267 |
/* |
268 |
* t1 = 1023; |
269 |
* t2 = ((a1 >> 22) & t1) * sizeof(void *); |
270 |
* t3 = ((a1 >> 12) & t1) * sizeof(void *); |
271 |
* t1 = a1 & 4095; |
272 |
* |
273 |
* f8 1f 5f 20 lda t1,1023 * 8 |
274 |
* 83 76 22 4a srl a1,19,t2 |
275 |
* 84 36 21 4a srl a1, 9,t3 |
276 |
* 03 00 62 44 and t2,t1,t2 |
277 |
*/ |
278 |
0x205f1ff8, |
279 |
0x4a227683, |
280 |
0x4a213684, |
281 |
0x44620003, |
282 |
|
283 |
/* |
284 |
* t10 is vaddr_to_hostaddr_table0 |
285 |
* |
286 |
* a3 = tbl0[t2] (load entry from tbl0) |
287 |
* 12 04 03 43 addq t10,t2,a2 |
288 |
*/ |
289 |
0x43030412, |
290 |
|
291 |
/* 04 00 82 44 and t3,t1,t3 */ |
292 |
0x44820004, |
293 |
|
294 |
/* 00 00 72 a6 ldq a3,0(a2) */ |
295 |
0xa6720000, |
296 |
|
297 |
/* 24 37 80 48 sll t3,0x1,t3 */ |
298 |
0x48803724, |
299 |
|
300 |
/* ff 0f 5f 20 lda t1,4095 */ |
301 |
0x205f0fff, |
302 |
|
303 |
/* |
304 |
* a3 = tbl1[t3] (load entry from tbl1 (which is a3)) |
305 |
* 13 04 64 42 addq a3,t3,a3 |
306 |
*/ |
307 |
0x42640413, |
308 |
|
309 |
/* 02 00 22 46 and a1,t1,t1 */ |
310 |
0x46220002, |
311 |
|
312 |
/* 00 00 73 a6 ldq a3,8(a3) */ |
313 |
0xa6730008, |
314 |
|
315 |
/* NULL? Then return failure at once. */ |
316 |
/* beq a3, return */ |
317 |
0xe6600002, |
318 |
|
319 |
/* The rest of the load/store code was written with t3 as the address. */ |
320 |
|
321 |
/* Add the offset within the page: */ |
322 |
/* 04 04 62 42 addq a3,t1,t3 */ |
323 |
0x42620404, |
324 |
|
325 |
0x6be50000, /* jmp (t4) */ |
326 |
|
327 |
/* return: */ |
328 |
0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16), /* ldah t0,256 */ |
329 |
0x44270407, /* or t0,t6,t6 */ |
330 |
0x6bfa8001 /* ret */ |
331 |
}; |
332 |
|
333 |
static void (*bintrans_runchunk)(struct cpu *, unsigned char *); |
334 |
|
335 |
static void (*bintrans_jump_to_32bit_pc)(struct cpu *); |
336 |
|
337 |
static void (*bintrans_load_32bit) |
338 |
(struct cpu *) = (void *)bintrans_alpha_load_32bit; |
339 |
|
340 |
static void (*bintrans_store_32bit) |
341 |
(struct cpu *) = (void *)bintrans_alpha_store_32bit; |
342 |
|
343 |
|
344 |
/* |
345 |
* bintrans_write_quickjump(): |
346 |
*/ |
347 |
static void bintrans_write_quickjump(struct memory *mem, |
348 |
unsigned char *quickjump_code, uint32_t chunkoffset) |
349 |
{ |
350 |
int ofs; |
351 |
uint64_t alpha_addr = chunkoffset + |
352 |
(size_t)mem->translation_code_chunk_space; |
353 |
uint32_t *a = (uint32_t *)quickjump_code; |
354 |
|
355 |
ofs = (alpha_addr - ((size_t)a+4)) / 4; |
356 |
|
357 |
/* printf("chunkoffset=%i, %016llx %016llx %i\n", |
358 |
chunkoffset, (long long)alpha_addr, (long long)a, ofs); */ |
359 |
|
360 |
if (ofs > -0xfffff && ofs < 0xfffff) { |
361 |
*a++ = 0xc3e00000 | (ofs & 0x1fffff); /* br <chunk> */ |
362 |
} |
363 |
} |
364 |
|
365 |
|
366 |
/* |
367 |
* bintrans_write_chunkreturn(): |
368 |
*/ |
369 |
static void bintrans_write_chunkreturn(unsigned char **addrp) |
370 |
{ |
371 |
uint32_t *a = (uint32_t *) *addrp; |
372 |
*a++ = 0x6bfa8001; /* ret */ |
373 |
*addrp = (unsigned char *) a; |
374 |
} |
375 |
|
376 |
|
377 |
/* |
378 |
* bintrans_write_chunkreturn_fail(): |
379 |
*/ |
380 |
static void bintrans_write_chunkreturn_fail(unsigned char **addrp) |
381 |
{ |
382 |
uint32_t *a = (uint32_t *) *addrp; |
383 |
/* 00 01 3f 24 ldah t0,256 */ |
384 |
/* 07 04 27 44 or t0,t6,t6 */ |
385 |
*a++ = 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16); |
386 |
*a++ = 0x44270407; |
387 |
*a++ = 0x6bfa8001; /* ret */ |
388 |
*addrp = (unsigned char *) a; |
389 |
} |
390 |
|
391 |
|
392 |
/* |
393 |
* bintrans_move_MIPS_reg_into_Alpha_reg(): |
394 |
*/ |
395 |
static void bintrans_move_MIPS_reg_into_Alpha_reg(unsigned char **addrp, int mipsreg, int alphareg) |
396 |
{ |
397 |
uint32_t *a = (uint32_t *) *addrp; |
398 |
int ofs, alpha_mips_reg; |
399 |
|
400 |
switch (mipsreg) { |
401 |
case MIPSREG_PC: |
402 |
/* addq t5,0,alphareg */ |
403 |
*a++ = 0x40c01400 | alphareg; |
404 |
break; |
405 |
case MIPSREG_DELAY_SLOT: |
406 |
/* addq s0,0,alphareg */ |
407 |
*a++ = 0x41201400 | alphareg; |
408 |
break; |
409 |
case MIPSREG_DELAY_JMPADDR: |
410 |
/* addq s1,0,alphareg */ |
411 |
*a++ = 0x41401400 | alphareg; |
412 |
break; |
413 |
default: |
414 |
alpha_mips_reg = map_MIPS_to_Alpha[mipsreg]; |
415 |
if (alpha_mips_reg < 0) { |
416 |
ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu; |
417 |
/* ldq alphareg,gpr[mipsreg](a0) */ |
418 |
*a++ = 0xa4100000 | (alphareg << 21) | ofs; |
419 |
} else { |
420 |
/* addq alpha_mips_reg,0,alphareg */ |
421 |
*a++ = 0x40001400 | (alpha_mips_reg << 21) | alphareg; |
422 |
} |
423 |
} |
424 |
*addrp = (unsigned char *) a; |
425 |
} |
426 |
|
427 |
|
428 |
/* |
429 |
* bintrans_move_Alpha_reg_into_MIPS_reg(): |
430 |
*/ |
431 |
static void bintrans_move_Alpha_reg_into_MIPS_reg(unsigned char **addrp, int alphareg, int mipsreg) |
432 |
{ |
433 |
uint32_t *a = (uint32_t *) *addrp; |
434 |
int ofs, alpha_mips_reg; |
435 |
|
436 |
switch (mipsreg) { |
437 |
case MIPSREG_PC: |
438 |
/* addq alphareg,0,t5 */ |
439 |
*a++ = 0x40001406 | (alphareg << 21); |
440 |
break; |
441 |
case MIPSREG_DELAY_SLOT: |
442 |
/* addq alphareg,0,s0 */ |
443 |
*a++ = 0x40001409 | (alphareg << 21); |
444 |
break; |
445 |
case MIPSREG_DELAY_JMPADDR: |
446 |
/* addq alphareg,0,s1 */ |
447 |
*a++ = 0x4000140a | (alphareg << 21); |
448 |
break; |
449 |
case 0: /* the zero register */ |
450 |
break; |
451 |
default: |
452 |
alpha_mips_reg = map_MIPS_to_Alpha[mipsreg]; |
453 |
if (alpha_mips_reg < 0) { |
454 |
/* stq alphareg,gpr[mipsreg](a0) */ |
455 |
ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu; |
456 |
*a++ = 0xb4100000 | (alphareg << 21) | ofs; |
457 |
} else { |
458 |
/* addq alphareg,0,alpha_mips_reg */ |
459 |
*a++ = 0x40001400 | (alphareg << 21) | alpha_mips_reg; |
460 |
} |
461 |
} |
462 |
*addrp = (unsigned char *) a; |
463 |
} |
464 |
|
465 |
|
466 |
/* |
467 |
* bintrans_write_pc_inc(): |
468 |
*/ |
469 |
static void bintrans_write_pc_inc(unsigned char **addrp) |
470 |
{ |
471 |
uint32_t *a = (uint32_t *) *addrp; |
472 |
|
473 |
/* lda t6,1(t6) */ |
474 |
*a++ = 0x20e70001; |
475 |
|
476 |
/* lda t5,4(t5) */ |
477 |
*a++ = 0x20c60004; |
478 |
|
479 |
*addrp = (unsigned char *) a; |
480 |
} |
481 |
|
482 |
|
483 |
/* |
484 |
* bintrans_write_instruction__addiu_etc(): |
485 |
*/ |
486 |
static int bintrans_write_instruction__addiu_etc(unsigned char **addrp, |
487 |
int rt, int rs, int imm, int instruction_type) |
488 |
{ |
489 |
uint32_t *a; |
490 |
unsigned int uimm; |
491 |
int alpha_rs, alpha_rt; |
492 |
|
493 |
/* TODO: overflow detection for ADDI and DADDI */ |
494 |
switch (instruction_type) { |
495 |
case HI6_ADDI: |
496 |
case HI6_DADDI: |
497 |
return 0; |
498 |
} |
499 |
|
500 |
a = (uint32_t *) *addrp; |
501 |
|
502 |
if (rt == 0) |
503 |
goto rt0; |
504 |
|
505 |
uimm = imm & 0xffff; |
506 |
|
507 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
508 |
alpha_rt = map_MIPS_to_Alpha[rt]; |
509 |
|
510 |
if (uimm == 0 && (instruction_type == HI6_ADDI || |
511 |
instruction_type == HI6_ADDIU || instruction_type == HI6_DADDI || |
512 |
instruction_type == HI6_DADDIU || instruction_type == HI6_ORI)) { |
513 |
if (alpha_rs >= 0 && alpha_rt >= 0) { |
514 |
/* addq rs,0,rt */ |
515 |
*a++ = 0x40001400 | (alpha_rs << 21) | alpha_rt; |
516 |
} else { |
517 |
*addrp = (unsigned char *) a; |
518 |
bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0); |
519 |
bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt); |
520 |
a = (uint32_t *) *addrp; |
521 |
} |
522 |
goto rt0; |
523 |
} |
524 |
|
525 |
if (alpha_rs < 0) { |
526 |
/* ldq t0,"rs"(a0) */ |
527 |
*addrp = (unsigned char *) a; |
528 |
bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0); |
529 |
a = (uint32_t *) *addrp; |
530 |
alpha_rs = ALPHA_T0; |
531 |
} |
532 |
|
533 |
if (alpha_rt < 0) |
534 |
alpha_rt = ALPHA_T0; |
535 |
|
536 |
/* Place the result of the calculation in alpha_rt: */ |
537 |
|
538 |
switch (instruction_type) { |
539 |
case HI6_ADDIU: |
540 |
case HI6_DADDIU: |
541 |
case HI6_ADDI: |
542 |
case HI6_DADDI: |
543 |
if (uimm < 256) { |
544 |
if (instruction_type == HI6_ADDI || |
545 |
instruction_type == HI6_ADDIU) { |
546 |
/* addl rs,uimm,rt */ |
547 |
*a++ = 0x40001000 | (alpha_rs << 21) |
548 |
| (uimm << 13) | alpha_rt; |
549 |
} else { |
550 |
/* addq rs,uimm,rt */ |
551 |
*a++ = 0x40001400 | (alpha_rs << 21) |
552 |
| (uimm << 13) | alpha_rt; |
553 |
} |
554 |
} else { |
555 |
/* lda rt,imm(rs) */ |
556 |
*a++ = 0x20000000 | (alpha_rt << 21) | (alpha_rs << 16) | uimm; |
557 |
if (instruction_type == HI6_ADDI || |
558 |
instruction_type == HI6_ADDIU) { |
559 |
/* sign extend, 32->64 bits: addl t0,zero,t0 */ |
560 |
*a++ = 0x40001000 | (alpha_rt << 21) | alpha_rt; |
561 |
} |
562 |
} |
563 |
break; |
564 |
case HI6_ANDI: |
565 |
case HI6_ORI: |
566 |
case HI6_XORI: |
567 |
if (uimm >= 256) { |
568 |
/* lda t1,4660 */ |
569 |
*a++ = 0x205f0000 | uimm; |
570 |
if (uimm & 0x8000) { |
571 |
/* 01 00 42 24 ldah t1,1(t1) <-- if negative only */ |
572 |
*a++ = 0x24420001; |
573 |
} |
574 |
} |
575 |
|
576 |
switch (instruction_type) { |
577 |
case HI6_ANDI: |
578 |
if (uimm < 256) { |
579 |
/* and rs,uimm,rt */ |
580 |
*a++ = 0x44001000 | (alpha_rs << 21) |
581 |
| (uimm << 13) | alpha_rt; |
582 |
} else { |
583 |
/* and rs,t1,rt */ |
584 |
*a++ = 0x44020000 | (alpha_rs << 21) | alpha_rt; |
585 |
} |
586 |
break; |
587 |
case HI6_ORI: |
588 |
if (uimm < 256) { |
589 |
/* or rs,uimm,rt */ |
590 |
*a++ = 0x44001400 | (alpha_rs << 21) |
591 |
| (uimm << 13) | alpha_rt; |
592 |
} else { |
593 |
/* or rs,t1,rt */ |
594 |
*a++ = 0x44020400 | (alpha_rs << 21) | alpha_rt; |
595 |
} |
596 |
break; |
597 |
case HI6_XORI: |
598 |
if (uimm < 256) { |
599 |
/* xor rs,uimm,rt */ |
600 |
*a++ = 0x44001800 | (alpha_rs << 21) |
601 |
| (uimm << 13) | alpha_rt; |
602 |
} else { |
603 |
/* xor rs,t1,rt */ |
604 |
*a++ = 0x44020800 | (alpha_rs << 21) | alpha_rt; |
605 |
} |
606 |
break; |
607 |
} |
608 |
break; |
609 |
case HI6_SLTI: |
610 |
case HI6_SLTIU: |
611 |
/* lda t1,4660 */ |
612 |
*a++ = 0x205f0000 | uimm; |
613 |
|
614 |
switch (instruction_type) { |
615 |
case HI6_SLTI: |
616 |
/* cmplt rs,t1,rt */ |
617 |
*a++ = 0x400209a0 | (alpha_rs << 21) | alpha_rt; |
618 |
break; |
619 |
case HI6_SLTIU: |
620 |
/* cmpult rs,t1,rt */ |
621 |
*a++ = 0x400203a0 | (alpha_rs << 21) | alpha_rt; |
622 |
break; |
623 |
} |
624 |
break; |
625 |
} |
626 |
|
627 |
if (alpha_rt == ALPHA_T0) { |
628 |
*a++ = 0x5fff041f; /* fnop */ |
629 |
*addrp = (unsigned char *) a; |
630 |
bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt); |
631 |
a = (uint32_t *) *addrp; |
632 |
} |
633 |
|
634 |
rt0: |
635 |
*addrp = (unsigned char *) a; |
636 |
bintrans_write_pc_inc(addrp); |
637 |
return 1; |
638 |
} |
639 |
|
640 |
|
641 |
/* |
642 |
* bintrans_write_instruction__addu_etc(): |
643 |
*/ |
644 |
static int bintrans_write_instruction__addu_etc(unsigned char **addrp, |
645 |
int rd, int rs, int rt, int sa, int instruction_type) |
646 |
{ |
647 |
unsigned char *a, *unmodified = NULL; |
648 |
int load64 = 0, store = 1, ofs, alpha_rd = ALPHA_T0; |
649 |
|
650 |
alpha_rd = map_MIPS_to_Alpha[rd]; |
651 |
if (alpha_rd < 0) |
652 |
alpha_rd = ALPHA_T0; |
653 |
|
654 |
switch (instruction_type) { |
655 |
case SPECIAL_DIV: |
656 |
case SPECIAL_DIVU: |
657 |
return 0; |
658 |
} |
659 |
|
660 |
switch (instruction_type) { |
661 |
case SPECIAL_DADDU: |
662 |
case SPECIAL_DSUBU: |
663 |
case SPECIAL_OR: |
664 |
case SPECIAL_AND: |
665 |
case SPECIAL_NOR: |
666 |
case SPECIAL_XOR: |
667 |
case SPECIAL_DSLL: |
668 |
case SPECIAL_DSRL: |
669 |
case SPECIAL_DSRA: |
670 |
case SPECIAL_DSLL32: |
671 |
case SPECIAL_DSRL32: |
672 |
case SPECIAL_DSRA32: |
673 |
case SPECIAL_SLT: |
674 |
case SPECIAL_SLTU: |
675 |
case SPECIAL_MOVZ: |
676 |
case SPECIAL_MOVN: |
677 |
load64 = 1; |
678 |
} |
679 |
|
680 |
switch (instruction_type) { |
681 |
case SPECIAL_MULT: |
682 |
case SPECIAL_MULTU: |
683 |
if (rd != 0) |
684 |
return 0; |
685 |
store = 0; |
686 |
break; |
687 |
default: |
688 |
if (rd == 0) |
689 |
goto rd0; |
690 |
} |
691 |
|
692 |
a = *addrp; |
693 |
|
694 |
if ((instruction_type == SPECIAL_ADDU || instruction_type == SPECIAL_DADDU |
695 |
|| instruction_type == SPECIAL_OR) && rt == 0) { |
696 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
697 |
if (!load64) { |
698 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
699 |
} |
700 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd); |
701 |
*addrp = a; |
702 |
goto rd0; |
703 |
} |
704 |
|
705 |
/* t0 = rs, t1 = rt */ |
706 |
if (load64) { |
707 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
708 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1); |
709 |
} else { |
710 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
711 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
712 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1); |
713 |
*a++ = 0x02; *a++ = 0x10; *a++ = 0x40; *a++ = 0x40; /* addl t1,0,t1 */ |
714 |
} |
715 |
|
716 |
switch (instruction_type) { |
717 |
case SPECIAL_ADDU: |
718 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x40; /* addl t0,t1,rd */ |
719 |
break; |
720 |
case SPECIAL_DADDU: |
721 |
*a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq t0,t1,rd */ |
722 |
break; |
723 |
case SPECIAL_SUBU: |
724 |
*a++ = 0x20 + alpha_rd; *a++ = 0x01; *a++ = 0x22; *a++ = 0x40; /* subl t0,t1,t0 */ |
725 |
break; |
726 |
case SPECIAL_DSUBU: |
727 |
*a++ = 0x20 + alpha_rd; *a++ = 0x05; *a++ = 0x22; *a++ = 0x40; /* subq t0,t1,t0 */ |
728 |
break; |
729 |
case SPECIAL_AND: |
730 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and t0,t1,t0 */ |
731 |
break; |
732 |
case SPECIAL_OR: |
733 |
*a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */ |
734 |
break; |
735 |
case SPECIAL_NOR: |
736 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */ |
737 |
*a++ = alpha_rd; *a++ = 0x05; *a++ = 0xe1; *a++ = 0x47; /* not t0,t0 */ |
738 |
break; |
739 |
case SPECIAL_XOR: |
740 |
*a++ = alpha_rd; *a++ = 0x08; *a++ = 0x22; *a++ = 0x44; /* xor t0,t1,t0 */ |
741 |
break; |
742 |
case SPECIAL_SLL: |
743 |
*a++ = 0x21; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */ |
744 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
745 |
break; |
746 |
case SPECIAL_SLLV: |
747 |
/* rd = rt << (rs&31) (logical) t0 = t1 << (t0&31) */ |
748 |
*a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */ |
749 |
*a++ = 0x21; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sll t1,t0,t0 */ |
750 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
751 |
break; |
752 |
case SPECIAL_DSLL: |
753 |
*a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */ |
754 |
break; |
755 |
case SPECIAL_DSLL32: |
756 |
sa += 32; |
757 |
*a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */ |
758 |
break; |
759 |
case SPECIAL_SRA: |
760 |
*a++ = 0x81; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */ |
761 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
762 |
break; |
763 |
case SPECIAL_SRAV: |
764 |
/* rd = rt >> (rs&31) (arithmetic) t0 = t1 >> (t0&31) */ |
765 |
*a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */ |
766 |
*a++ = 0x81; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sra t1,t0,t0 */ |
767 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
768 |
break; |
769 |
case SPECIAL_DSRA: |
770 |
*a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */ |
771 |
break; |
772 |
case SPECIAL_DSRA32: |
773 |
sa += 32; |
774 |
*a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */ |
775 |
break; |
776 |
case SPECIAL_SRL: |
777 |
*a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */ |
778 |
/* Note: bits of sa are distributed among two different bytes. */ |
779 |
*a++ = 0x81; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; |
780 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl */ |
781 |
break; |
782 |
case SPECIAL_SRLV: |
783 |
/* rd = rt >> (rs&31) (logical) t0 = t1 >> (t0&31) */ |
784 |
*a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */ |
785 |
*a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */ |
786 |
*a++ = 0x81; *a++ = 0x06; *a++ = 0x41; *a++ = 0x48; /* srl t1,t0,t0 */ |
787 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
788 |
break; |
789 |
case SPECIAL_DSRL: |
790 |
/* Note: bits of sa are distributed among two different bytes. */ |
791 |
*a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; |
792 |
break; |
793 |
case SPECIAL_DSRL32: |
794 |
/* Note: bits of sa are distributed among two different bytes. */ |
795 |
sa += 32; |
796 |
*a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; |
797 |
break; |
798 |
case SPECIAL_SLT: |
799 |
*a++ = 0xa0 + alpha_rd; *a++ = 0x09; *a++ = 0x22; *a++ = 0x40; /* cmplt t0,t1,t0 */ |
800 |
break; |
801 |
case SPECIAL_SLTU: |
802 |
*a++ = 0xa0 + alpha_rd; *a++ = 0x03; *a++ = 0x22; *a++ = 0x40; /* cmpult t0,t1,t0 */ |
803 |
break; |
804 |
case SPECIAL_MULT: |
805 |
case SPECIAL_MULTU: |
806 |
if (instruction_type == SPECIAL_MULTU) { |
807 |
/* 21 f6 21 48 zapnot t0,0xf,t0 */ |
808 |
/* 22 f6 41 48 zapnot t1,0xf,t1 */ |
809 |
*a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48; |
810 |
*a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; |
811 |
} |
812 |
|
813 |
/* 03 04 22 4c mulq t0,t1,t2 */ |
814 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x22; *a++ = 0x4c; |
815 |
|
816 |
/* 01 10 60 40 addl t2,0,t0 */ |
817 |
*a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40; |
818 |
|
819 |
ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu; |
820 |
*a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4; |
821 |
|
822 |
/* 81 17 64 48 sra t2,0x20,t0 */ |
823 |
*a++ = 0x81; *a++ = 0x17; *a++ = 0x64; *a++ = 0x48; |
824 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
825 |
ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu; |
826 |
*a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4; |
827 |
break; |
828 |
case SPECIAL_MOVZ: |
829 |
/* if rt=0 then rd=rs ==> if t1!=0 then t0=unmodified else t0=rd */ |
830 |
/* 00 00 40 f4 bne t1,unmodified */ |
831 |
unmodified = a; |
832 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xf4; |
833 |
alpha_rd = ALPHA_T0; |
834 |
break; |
835 |
case SPECIAL_MOVN: |
836 |
/* if rt!=0 then rd=rs ==> if t1=0 then t0=unmodified else t0=rd */ |
837 |
/* 00 00 40 e4 beq t1,unmodified */ |
838 |
unmodified = a; |
839 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4; |
840 |
alpha_rd = ALPHA_T0; |
841 |
break; |
842 |
} |
843 |
|
844 |
if (store && alpha_rd == ALPHA_T0) { |
845 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd); |
846 |
} |
847 |
|
848 |
if (unmodified != NULL) |
849 |
*unmodified = ((size_t)a - (size_t)unmodified - 4) / 4; |
850 |
|
851 |
*addrp = a; |
852 |
rd0: |
853 |
bintrans_write_pc_inc(addrp); |
854 |
return 1; |
855 |
} |
856 |
|
857 |
|
858 |
/* |
859 |
* bintrans_write_instruction__branch(): |
860 |
*/ |
861 |
static int bintrans_write_instruction__branch(unsigned char **addrp, |
862 |
int instruction_type, int regimm_type, int rt, int rs, int imm) |
863 |
{ |
864 |
uint32_t *a, *b, *c = NULL; |
865 |
int alpha_rs, alpha_rt, likely = 0, ofs; |
866 |
|
867 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
868 |
alpha_rt = map_MIPS_to_Alpha[rt]; |
869 |
|
870 |
switch (instruction_type) { |
871 |
case HI6_BEQL: |
872 |
case HI6_BNEL: |
873 |
case HI6_BLEZL: |
874 |
case HI6_BGTZL: |
875 |
likely = 1; |
876 |
} |
877 |
|
878 |
/* |
879 |
* t0 = gpr[rt]; t1 = gpr[rs]; |
880 |
* |
881 |
* 50 00 30 a4 ldq t0,80(a0) |
882 |
* 58 00 50 a4 ldq t1,88(a0) |
883 |
*/ |
884 |
|
885 |
switch (instruction_type) { |
886 |
case HI6_BEQ: |
887 |
case HI6_BNE: |
888 |
case HI6_BEQL: |
889 |
case HI6_BNEL: |
890 |
if (alpha_rt < 0) { |
891 |
bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0); |
892 |
alpha_rt = ALPHA_T0; |
893 |
} |
894 |
} |
895 |
|
896 |
if (alpha_rs < 0) { |
897 |
bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T1); |
898 |
alpha_rs = ALPHA_T1; |
899 |
} |
900 |
|
901 |
a = (uint32_t *) *addrp; |
902 |
|
903 |
/* |
904 |
* Compare alpha_rt (t0) and alpha_rs (t1) for equality (BEQ). |
905 |
* If the result was false (equal to zero), then skip a lot |
906 |
* of instructions: |
907 |
* |
908 |
* a1 05 22 40 cmpeq t0,t1,t0 |
909 |
* 01 00 20 e4 beq t0,14 <f+0x14> |
910 |
*/ |
911 |
b = NULL; |
912 |
if ((instruction_type == HI6_BEQ || |
913 |
instruction_type == HI6_BEQL) && rt != rs) { |
914 |
/* cmpeq rt,rs,t0 */ |
915 |
*a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16); |
916 |
b = a; |
917 |
*a++ = 0xe4200001; /* beq */ |
918 |
} |
919 |
if (instruction_type == HI6_BNE || instruction_type == HI6_BNEL) { |
920 |
/* cmpeq rt,rs,t0 */ |
921 |
*a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16); |
922 |
b = a; |
923 |
*a++ = 0xf4200001; /* bne */ |
924 |
} |
925 |
if (instruction_type == HI6_BLEZ || instruction_type == HI6_BLEZL) { |
926 |
/* cmple rs,0,t0 */ |
927 |
*a++ = 0x40001da1 | (alpha_rs << 21); |
928 |
b = a; |
929 |
*a++ = 0xe4200001; /* beq */ |
930 |
} |
931 |
if (instruction_type == HI6_BGTZ || instruction_type == HI6_BGTZL) { |
932 |
/* cmple rs,0,t0 */ |
933 |
*a++ = 0x40001da1 | (alpha_rs << 21); |
934 |
b = a; |
935 |
*a++ = 0xf4200001; /* bne */ |
936 |
} |
937 |
if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BLTZ) { |
938 |
/* cmplt rs,0,t0 */ |
939 |
*a++ = 0x400019a1 | (alpha_rs << 21); |
940 |
b = a; |
941 |
*a++ = 0xe4200001; /* beq */ |
942 |
} |
943 |
if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BGEZ) { |
944 |
*a++ = 0x207fffff; /* lda t2,-1 */ |
945 |
/* cmple rs,t2,t0 */ |
946 |
*a++ = 0x40030da1 | (alpha_rs << 21); |
947 |
b = a; |
948 |
*a++ = 0xf4200001; /* bne */ |
949 |
} |
950 |
|
951 |
/* |
952 |
* Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED |
953 |
* and cpu->delay_jmpaddr = pc + 4 + (imm << 2). |
954 |
* |
955 |
* 04 00 26 20 lda t0,4(t5) add 4 |
956 |
* c8 01 5f 20 lda t1,456 |
957 |
* 4a 04 41 40 s4addq t1,t0,s1 s1 = (t1<<2) + t0 |
958 |
*/ |
959 |
|
960 |
*a++ = 0x20260004; /* lda t0,4(t5) */ |
961 |
*a++ = 0x205f0000 | (imm & 0xffff); /* lda */ |
962 |
*a++ = 0x4041044a; /* s4addq */ |
963 |
|
964 |
/* 02 00 3f 21 lda s0,TO_BE_DELAYED */ |
965 |
*a++ = 0x213f0000 | TO_BE_DELAYED; |
966 |
|
967 |
/* |
968 |
* Special case: "likely"-branches: |
969 |
*/ |
970 |
if (likely) { |
971 |
c = a; |
972 |
*a++ = 0xc3e00001; /* br delayed_ok */ |
973 |
|
974 |
if (b != NULL) |
975 |
*((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4; |
976 |
|
977 |
/* cpu->cd.mips.nullify_next = 1; */ |
978 |
/* 01 00 3f 20 lda t0,1 */ |
979 |
*a++ = 0x203f0001; |
980 |
ofs = (size_t)&dummy_cpu.cd.mips.nullify_next - (size_t)&dummy_cpu; |
981 |
*a++ = 0xb0300000 | (ofs & 0xffff); |
982 |
|
983 |
/* fail, so that the next instruction is handled manually: */ |
984 |
*addrp = (unsigned char *) a; |
985 |
bintrans_write_pc_inc(addrp); |
986 |
bintrans_write_chunkreturn_fail(addrp); |
987 |
a = (uint32_t *) *addrp; |
988 |
|
989 |
if (c != NULL) |
990 |
*((unsigned char *)c) = ((size_t)a - (size_t)c - 4) / 4; |
991 |
} else { |
992 |
/* Normal (non-likely) exit: */ |
993 |
if (b != NULL) |
994 |
*((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4; |
995 |
} |
996 |
|
997 |
*addrp = (unsigned char *) a; |
998 |
bintrans_write_pc_inc(addrp); |
999 |
return 1; |
1000 |
} |
1001 |
|
1002 |
|
1003 |
/* |
1004 |
* bintrans_write_instruction__jr(): |
1005 |
*/ |
1006 |
static int bintrans_write_instruction__jr(unsigned char **addrp, int rs, int rd, int special) |
1007 |
{ |
1008 |
uint32_t *a; |
1009 |
int alpha_rd; |
1010 |
|
1011 |
alpha_rd = map_MIPS_to_Alpha[rd]; |
1012 |
if (alpha_rd < 0) |
1013 |
alpha_rd = ALPHA_T0; |
1014 |
|
1015 |
/* |
1016 |
* Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED |
1017 |
* and cpu->delay_jmpaddr = gpr[rs]. |
1018 |
*/ |
1019 |
|
1020 |
bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_S1); |
1021 |
|
1022 |
a = (uint32_t *) *addrp; |
1023 |
/* 02 00 3f 21 lda s0,TO_BE_DELAYED */ |
1024 |
*a++ = 0x213f0000 | TO_BE_DELAYED; |
1025 |
*addrp = (unsigned char *) a; |
1026 |
|
1027 |
if (special == SPECIAL_JALR && rd != 0) { |
1028 |
/* gpr[rd] = retaddr (pc + 8) */ |
1029 |
a = (uint32_t *) *addrp; |
1030 |
/* lda alpha_rd,8(t5) */ |
1031 |
*a++ = 0x20060008 | (alpha_rd << 21); |
1032 |
*addrp = (unsigned char *) a; |
1033 |
if (alpha_rd == ALPHA_T0) |
1034 |
bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rd); |
1035 |
} |
1036 |
|
1037 |
bintrans_write_pc_inc(addrp); |
1038 |
return 1; |
1039 |
} |
1040 |
|
1041 |
|
1042 |
/* |
1043 |
* bintrans_write_instruction__jal(): |
1044 |
*/ |
1045 |
static int bintrans_write_instruction__jal(unsigned char **addrp, |
1046 |
int imm, int link) |
1047 |
{ |
1048 |
uint32_t *a; |
1049 |
|
1050 |
a = (uint32_t *) *addrp; |
1051 |
|
1052 |
/* gpr[31] = retaddr (NOTE: mips register 31 is in alpha reg s3) */ |
1053 |
if (link) { |
1054 |
*a++ = 0x21860008; /* lda s3,8(t5) */ |
1055 |
} |
1056 |
|
1057 |
/* Set the jmpaddr to top 4 bits of pc + lowest 28 bits of imm*4: */ |
1058 |
|
1059 |
/* |
1060 |
* imm = 4*imm; |
1061 |
* t0 = ((pc + 4) & ~0x0fffffff) | imm; |
1062 |
* |
1063 |
* 04 00 26 20 lda t0,4(t5) <-- because the jump is from the delay slot |
1064 |
* 23 01 5f 24 ldah t1,291 |
1065 |
* 67 45 42 20 lda t1,17767(t1) |
1066 |
* 00 f0 7f 24 ldah t2,-4096 |
1067 |
* 04 00 23 44 and t0,t2,t3 |
1068 |
* 0a 04 44 44 or t1,t3,s1 |
1069 |
*/ |
1070 |
imm *= 4; |
1071 |
*a++ = 0x20260004; |
1072 |
*a++ = 0x245f0000 | ((imm >> 16) + (imm & 0x8000? 1 : 0)); |
1073 |
*a++ = 0x20420000 | (imm & 0xffff); |
1074 |
*a++ = 0x247ff000; |
1075 |
*a++ = 0x44230004; |
1076 |
*a++ = 0x4444040a; |
1077 |
|
1078 |
/* 02 00 3f 21 lda s0,TO_BE_DELAYED */ |
1079 |
*a++ = 0x213f0000 | TO_BE_DELAYED; |
1080 |
|
1081 |
/* If the machine continues executing here, it will return |
1082 |
to the main loop, which is fine. */ |
1083 |
|
1084 |
*addrp = (unsigned char *) a; |
1085 |
bintrans_write_pc_inc(addrp); |
1086 |
return 1; |
1087 |
} |
1088 |
|
1089 |
|
1090 |
/* |
1091 |
* bintrans_write_instruction__delayedbranch(): |
1092 |
*/ |
1093 |
static int bintrans_write_instruction__delayedbranch( |
1094 |
struct memory *mem, unsigned char **addrp, |
1095 |
uint32_t *potential_chunk_p, uint32_t *chunks, |
1096 |
int only_care_about_chunk_p, int p, int forward) |
1097 |
{ |
1098 |
unsigned char *a, *skip=NULL, *generic64bit; |
1099 |
int ofs; |
1100 |
uint64_t alpha_addr, subaddr; |
1101 |
|
1102 |
a = *addrp; |
1103 |
|
1104 |
if (!only_care_about_chunk_p) { |
1105 |
/* Skip all of this if there is no branch: */ |
1106 |
skip = a; |
1107 |
*a++ = 0; *a++ = 0; *a++ = 0x20; *a++ = 0xe5; /* beq s0,skip */ |
1108 |
|
1109 |
/* |
1110 |
* Perform the jump by setting cpu->delay_slot = 0 |
1111 |
* and pc = cpu->delay_jmpaddr. |
1112 |
*/ |
1113 |
/* 00 00 3f 21 lda s0,0 */ |
1114 |
*a++ = 0; *a++ = 0; *a++ = 0x3f; *a++ = 0x21; |
1115 |
|
1116 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_DELAY_JMPADDR, ALPHA_T0); |
1117 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T3); |
1118 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, MIPSREG_PC); |
1119 |
} |
1120 |
|
1121 |
if (potential_chunk_p == NULL) { |
1122 |
if (mem->bintrans_32bit_only) { |
1123 |
/* 34 12 70 a7 ldq t12,4660(a0) */ |
1124 |
ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu; |
1125 |
*a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7; |
1126 |
|
1127 |
/* 00 00 fb 6b jmp (t12) */ |
1128 |
*a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b; |
1129 |
} else { |
1130 |
/* |
1131 |
* If the highest 32 bits of the address are either |
1132 |
* 0x00000000 or 0xffffffff, then the tables used for |
1133 |
* 32-bit load/stores can be used. |
1134 |
* |
1135 |
* 81 16 24 4a srl a1,0x20,t0 |
1136 |
* 03 00 20 e4 beq t0,14 <ok1> |
1137 |
* 01 30 20 40 addl t0,0x1,t0 |
1138 |
* 01 00 20 e4 beq t0,14 <ok1> |
1139 |
* 01 00 e0 c3 br 18 <nook> |
1140 |
*/ |
1141 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; |
1142 |
*a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; |
1143 |
*a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40; |
1144 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; |
1145 |
generic64bit = a; |
1146 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1147 |
|
1148 |
/* 34 12 70 a7 ldq t12,4660(a0) */ |
1149 |
ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu; |
1150 |
*a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7; |
1151 |
|
1152 |
/* 00 00 fb 6b jmp (t12) */ |
1153 |
*a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b; |
1154 |
|
1155 |
|
1156 |
if (generic64bit != NULL) |
1157 |
*generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4; |
1158 |
|
1159 |
/* Not much we can do here if this wasn't to the same |
1160 |
physical page... */ |
1161 |
|
1162 |
*a++ = 0xfc; *a++ = 0xff; *a++ = 0x84; *a++ = 0x20; /* lda t3,-4(t3) */ |
1163 |
|
1164 |
/* |
1165 |
* Compare the old pc (t3) and the new pc (t0). If they are on the |
1166 |
* same virtual page (which means that they are on the same physical |
1167 |
* page), then we can check the right chunk pointer, and if it |
1168 |
* is non-NULL, then we can jump there. Otherwise just return. |
1169 |
* |
1170 |
* 00 f0 5f 20 lda t1,-4096 |
1171 |
* 01 00 22 44 and t0,t1,t0 |
1172 |
* 04 00 82 44 and t3,t1,t3 |
1173 |
* a3 05 24 40 cmpeq t0,t3,t2 |
1174 |
* 01 00 60 f4 bne t2,7c <ok2> |
1175 |
* 01 80 fa 6b ret |
1176 |
*/ |
1177 |
*a++ = 0x00; *a++ = 0xf0; *a++ = 0x5f; *a++ = 0x20; /* lda */ |
1178 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and */ |
1179 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0x82; *a++ = 0x44; /* and */ |
1180 |
*a++ = 0xa3; *a++ = 0x05; *a++ = 0x24; *a++ = 0x40; /* cmpeq */ |
1181 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x60; *a++ = 0xf4; /* bne */ |
1182 |
*a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ |
1183 |
|
1184 |
/* Don't execute too many instructions. (see comment below) */ |
1185 |
*a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255; |
1186 |
*a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */ |
1187 |
*a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */ |
1188 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */ |
1189 |
*a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ |
1190 |
|
1191 |
/* 15 bits at a time, which means max 60 bits, but |
1192 |
that should be enough. the top 4 bits are probably |
1193 |
not used by userland alpha code. (TODO: verify this) */ |
1194 |
alpha_addr = (size_t)chunks; |
1195 |
subaddr = (alpha_addr >> 45) & 0x7fff; |
1196 |
|
1197 |
/* |
1198 |
* 00 00 3f 20 lda t0,0 |
1199 |
* 21 f7 21 48 sll t0,0xf,t0 |
1200 |
* 34 12 21 20 lda t0,4660(t0) |
1201 |
* 21 f7 21 48 sll t0,0xf,t0 |
1202 |
* 34 12 21 20 lda t0,4660(t0) |
1203 |
* 21 f7 21 48 sll t0,0xf,t0 |
1204 |
* 34 12 21 20 lda t0,4660(t0) |
1205 |
*/ |
1206 |
|
1207 |
/* Start with the topmost 15 bits: */ |
1208 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20; |
1209 |
*a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */ |
1210 |
|
1211 |
subaddr = (alpha_addr >> 30) & 0x7fff; |
1212 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20; |
1213 |
*a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */ |
1214 |
|
1215 |
subaddr = (alpha_addr >> 15) & 0x7fff; |
1216 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20; |
1217 |
*a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */ |
1218 |
|
1219 |
subaddr = alpha_addr & 0x7fff; |
1220 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20; |
1221 |
|
1222 |
/* |
1223 |
* t2 = pc |
1224 |
* t1 = t2 & 0xfff |
1225 |
* t0 += t1 |
1226 |
* |
1227 |
* ff 0f 5f 20 lda t1,4095 |
1228 |
* 02 00 62 44 and t2,t1,t1 |
1229 |
* 01 04 22 40 addq t0,t1,t0 |
1230 |
*/ |
1231 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T2); |
1232 |
*a++ = 0xff; *a++ = 0x0f; *a++ = 0x5f; *a++ = 0x20; /* lda */ |
1233 |
*a++ = 0x02; *a++ = 0x00; *a++ = 0x62; *a++ = 0x44; /* and */ |
1234 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq */ |
1235 |
|
1236 |
/* |
1237 |
* Load the chunk pointer (actually, a 32-bit offset) into t0. |
1238 |
* If it is zero, then skip the following. |
1239 |
* Add cpu->chunk_base_address to t0. |
1240 |
* Jump to t0. |
1241 |
*/ |
1242 |
|
1243 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */ |
1244 |
*a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */ |
1245 |
|
1246 |
/* ldq t2,chunk_base_address(a0) */ |
1247 |
ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu; |
1248 |
*a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4; |
1249 |
/* addq t0,t2,t0 */ |
1250 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40; |
1251 |
|
1252 |
/* 00 00 e1 6b jmp (t0) */ |
1253 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */ |
1254 |
|
1255 |
/* Failure, then return to the main loop. */ |
1256 |
*a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ |
1257 |
} |
1258 |
} else { |
1259 |
/* |
1260 |
* Just to make sure that we don't become too unreliant |
1261 |
* on the main program loop, we need to return every once |
1262 |
* in a while (interrupts etc). |
1263 |
* |
1264 |
* Load the "nr of instructions executed" (which is an int) |
1265 |
* and see if it is below a certain threshold. If so, then |
1266 |
* we go on with the fast path (bintrans), otherwise we |
1267 |
* abort by returning. |
1268 |
* |
1269 |
* f4 01 5f 20 lda t1,500 (some low number...) |
1270 |
* a1 0d c2 40 cmple t6,t1,t0 |
1271 |
* 01 00 20 f4 bne t0,14 <f+0x14> |
1272 |
*/ |
1273 |
if (!only_care_about_chunk_p && !forward) { |
1274 |
*a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255; |
1275 |
*a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */ |
1276 |
*a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */ |
1277 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */ |
1278 |
*a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ |
1279 |
} |
1280 |
|
1281 |
/* |
1282 |
* potential_chunk_p points to an "uint32_t". |
1283 |
* If this value is non-NULL, then it is a piece of Alpha |
1284 |
* machine language code corresponding to the address |
1285 |
* we're jumping to. Otherwise, those instructions haven't |
1286 |
* been translated yet, so we have to return to the main |
1287 |
* loop. (Actually, we have to add cpu->chunk_base_address, |
1288 |
* because the uint32_t is limited to 32-bit offsets.) |
1289 |
* |
1290 |
* Case 1: The value is non-NULL already at translation |
1291 |
* time. Then we can make a direct (fast) native |
1292 |
* Alpha jump to the code chunk. |
1293 |
* |
1294 |
* Case 2: The value was NULL at translation time, then we |
1295 |
* have to check during runtime. |
1296 |
*/ |
1297 |
|
1298 |
/* Case 1: */ |
1299 |
/* printf("%08x ", *potential_chunk_p); */ |
1300 |
alpha_addr = *potential_chunk_p + (size_t)mem->translation_code_chunk_space; |
1301 |
ofs = (alpha_addr - ((size_t)a+4)) / 4; |
1302 |
/* printf("%016llx %016llx %i\n", (long long)alpha_addr, (long long)a, ofs); */ |
1303 |
|
1304 |
if ((*potential_chunk_p) != 0 && ofs > -0xfffff && ofs < 0xfffff) { |
1305 |
*a++ = ofs & 255; *a++ = (ofs >> 8) & 255; *a++ = 0xe0 + ((ofs >> 16) & 0x1f); *a++ = 0xc3; /* br <chunk> */ |
1306 |
} else { |
1307 |
/* Case 2: */ |
1308 |
|
1309 |
bintrans_register_potential_quick_jump(mem, a, p); |
1310 |
|
1311 |
/* 15 bits at a time, which means max 60 bits, but |
1312 |
that should be enough. the top 4 bits are probably |
1313 |
not used by userland alpha code. (TODO: verify this) */ |
1314 |
alpha_addr = (size_t)potential_chunk_p; |
1315 |
subaddr = (alpha_addr >> 45) & 0x7fff; |
1316 |
|
1317 |
/* |
1318 |
* 00 00 3f 20 lda t0,0 |
1319 |
* 21 f7 21 48 sll t0,0xf,t0 |
1320 |
* 34 12 21 20 lda t0,4660(t0) |
1321 |
* 21 f7 21 48 sll t0,0xf,t0 |
1322 |
* 34 12 21 20 lda t0,4660(t0) |
1323 |
* 21 f7 21 48 sll t0,0xf,t0 |
1324 |
* 34 12 21 20 lda t0,4660(t0) |
1325 |
*/ |
1326 |
|
1327 |
/* Start with the topmost 15 bits: */ |
1328 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20; |
1329 |
*a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */ |
1330 |
|
1331 |
subaddr = (alpha_addr >> 30) & 0x7fff; |
1332 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20; |
1333 |
*a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */ |
1334 |
|
1335 |
subaddr = (alpha_addr >> 15) & 0x7fff; |
1336 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20; |
1337 |
*a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */ |
1338 |
|
1339 |
subaddr = alpha_addr & 0x7fff; |
1340 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20; |
1341 |
|
1342 |
/* |
1343 |
* Load the chunk pointer into t0. |
1344 |
* If it is NULL (zero), then skip the following jump. |
1345 |
* Jump to t0. |
1346 |
*/ |
1347 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */ |
1348 |
*a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */ |
1349 |
|
1350 |
/* ldq t2,chunk_base_address(a0) */ |
1351 |
ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu; |
1352 |
*a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4; |
1353 |
/* addq t0,t2,t0 */ |
1354 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40; |
1355 |
|
1356 |
/* 00 00 e1 6b jmp (t0) */ |
1357 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */ |
1358 |
|
1359 |
/* "Failure", then let's return to the main loop. */ |
1360 |
*a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ |
1361 |
} |
1362 |
} |
1363 |
|
1364 |
if (skip != NULL) { |
1365 |
*skip = ((size_t)a - (size_t)skip - 4) / 4; |
1366 |
skip ++; |
1367 |
*skip = (((size_t)a - (size_t)skip - 4) / 4) >> 8; |
1368 |
} |
1369 |
|
1370 |
*addrp = a; |
1371 |
return 1; |
1372 |
} |
1373 |
|
1374 |
|
1375 |
/* |
1376 |
* bintrans_write_instruction__loadstore(): |
1377 |
*/ |
1378 |
static int bintrans_write_instruction__loadstore( |
1379 |
struct memory *mem, unsigned char **addrp, |
1380 |
int rt, int imm, int rs, int instruction_type, int bigendian) |
1381 |
{ |
1382 |
unsigned char *a, *fail, *generic64bit = NULL, *generic64bitA = NULL; |
1383 |
unsigned char *doloadstore = NULL, |
1384 |
*ok_unaligned_load3, *ok_unaligned_load2, *ok_unaligned_load1; |
1385 |
uint32_t *b; |
1386 |
int ofs, alignment, load = 0, alpha_rs, alpha_rt, unaligned = 0; |
1387 |
|
1388 |
/* TODO: Not yet: */ |
1389 |
if (instruction_type == HI6_LQ_MDMX || instruction_type == HI6_SQ) { |
1390 |
return 0; |
1391 |
} |
1392 |
|
1393 |
switch (instruction_type) { |
1394 |
case HI6_LQ_MDMX: |
1395 |
case HI6_LD: |
1396 |
case HI6_LDL: |
1397 |
case HI6_LDR: |
1398 |
case HI6_LWU: |
1399 |
case HI6_LW: |
1400 |
case HI6_LWL: |
1401 |
case HI6_LWR: |
1402 |
case HI6_LHU: |
1403 |
case HI6_LH: |
1404 |
case HI6_LBU: |
1405 |
case HI6_LB: |
1406 |
load = 1; |
1407 |
if (rt == 0) |
1408 |
return 0; |
1409 |
} |
1410 |
|
1411 |
switch (instruction_type) { |
1412 |
case HI6_LDL: |
1413 |
case HI6_LDR: |
1414 |
case HI6_LWL: |
1415 |
case HI6_LWR: |
1416 |
case HI6_SDL: |
1417 |
case HI6_SDR: |
1418 |
case HI6_SWL: |
1419 |
case HI6_SWR: |
1420 |
unaligned = 1; |
1421 |
} |
1422 |
|
1423 |
a = *addrp; |
1424 |
|
1425 |
/* |
1426 |
* a1 = gpr[rs] + imm; |
1427 |
* |
1428 |
* 88 08 30 a4 ldq t0,2184(a0) |
1429 |
* 34 12 21 22 lda a1,4660(t0) |
1430 |
*/ |
1431 |
|
1432 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
1433 |
if (alpha_rs < 0) { |
1434 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
1435 |
alpha_rs = ALPHA_T0; |
1436 |
} |
1437 |
*a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22; |
1438 |
|
1439 |
alignment = 0; |
1440 |
switch (instruction_type) { |
1441 |
case HI6_LQ_MDMX: |
1442 |
case HI6_SQ: |
1443 |
alignment = 15; |
1444 |
break; |
1445 |
case HI6_LD: |
1446 |
case HI6_LDL: |
1447 |
case HI6_LDR: |
1448 |
case HI6_SD: |
1449 |
case HI6_SDL: |
1450 |
case HI6_SDR: |
1451 |
alignment = 7; |
1452 |
break; |
1453 |
case HI6_LW: |
1454 |
case HI6_LWL: |
1455 |
case HI6_LWR: |
1456 |
case HI6_LWU: |
1457 |
case HI6_SW: |
1458 |
case HI6_SWL: |
1459 |
case HI6_SWR: |
1460 |
alignment = 3; |
1461 |
break; |
1462 |
case HI6_LH: |
1463 |
case HI6_LHU: |
1464 |
case HI6_SH: |
1465 |
alignment = 1; |
1466 |
break; |
1467 |
} |
1468 |
|
1469 |
if (unaligned) { |
1470 |
/* |
1471 |
* Unaligned load/store: Perform the host load/store at |
1472 |
* an aligned address, and then figure out which bytes to |
1473 |
* actually load into the destination register. |
1474 |
* |
1475 |
* 02 30 20 46 and a1,alignment,t1 |
1476 |
* 31 05 22 42 subq a1,t1,a1 |
1477 |
*/ |
1478 |
*a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; |
1479 |
*a++ = 0x31; *a++ = 0x05; *a++ = 0x22; *a++ = 0x42; |
1480 |
} else if (alignment > 0) { |
1481 |
/* |
1482 |
* Check alignment: |
1483 |
* |
1484 |
* 02 30 20 46 and a1,0x1,t1 |
1485 |
* 02 70 20 46 and a1,0x3,t1 (one of these "and"s) |
1486 |
* 02 f0 20 46 and a1,0x7,t1 |
1487 |
* 02 f0 21 46 and a1,0xf,t1 |
1488 |
* 01 00 40 e4 beq t1,<okalign> |
1489 |
* 01 80 fa 6b ret |
1490 |
*/ |
1491 |
*a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; |
1492 |
fail = a; |
1493 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4; |
1494 |
*addrp = a; |
1495 |
bintrans_write_chunkreturn_fail(addrp); |
1496 |
a = *addrp; |
1497 |
*fail = ((size_t)a - (size_t)fail - 4) / 4; |
1498 |
} |
1499 |
|
1500 |
alpha_rt = map_MIPS_to_Alpha[rt]; |
1501 |
|
1502 |
if (mem->bintrans_32bit_only) { |
1503 |
/* Special case for 32-bit addressing: */ |
1504 |
|
1505 |
if (load) |
1506 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_load_32bit) - (size_t)&dummy_cpu; |
1507 |
else |
1508 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_store_32bit) - (size_t)&dummy_cpu; |
1509 |
/* ldq t12,bintrans_loadstore_32bit(a0) */ |
1510 |
*a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7; |
1511 |
|
1512 |
/* jsr t4,(t12),<after> */ |
1513 |
*a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68; |
1514 |
|
1515 |
/* Now: a3 = host page, t3 = address of host load/store */ |
1516 |
} else { |
1517 |
/* |
1518 |
* If the highest 33 bits of the address are either all ones |
1519 |
* or all zeroes, then the tables used for 32-bit load/stores |
1520 |
* can be used. |
1521 |
*/ |
1522 |
*a++ = 0x81; *a++ = 0xf6; *a++ = 0x23; *a++ = 0x4a; /* srl a1,0x1f,t0 */ |
1523 |
*a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x44; /* and t0,0x1,t0 */ |
1524 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<noll> */ |
1525 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */ |
1526 |
*a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40; /* addl t0,0x1,t0 */ |
1527 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */ |
1528 |
generic64bit = a; |
1529 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */ |
1530 |
/* <noll>: */ |
1531 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */ |
1532 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */ |
1533 |
generic64bitA = a; |
1534 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */ |
1535 |
|
1536 |
if (load) |
1537 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_load_32bit) - (size_t)&dummy_cpu; |
1538 |
else |
1539 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_store_32bit) - (size_t)&dummy_cpu; |
1540 |
/* ldq t12,bintrans_loadstore_32bit(a0) */ |
1541 |
*a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7; |
1542 |
|
1543 |
/* jsr t4,(t12),<after> */ |
1544 |
*a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68; |
1545 |
|
1546 |
/* |
1547 |
* Now: a3 = host page |
1548 |
* t3 = (potential) address of host load/store |
1549 |
*/ |
1550 |
|
1551 |
doloadstore = a; |
1552 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1553 |
|
1554 |
|
1555 |
/* |
1556 |
* Generic (64-bit) load/store: |
1557 |
*/ |
1558 |
|
1559 |
if (generic64bit != NULL) |
1560 |
*generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4; |
1561 |
if (generic64bitA != NULL) |
1562 |
*generic64bitA = ((size_t)a - (size_t)generic64bitA - 4) / 4; |
1563 |
|
1564 |
*addrp = a; |
1565 |
b = (uint32_t *) *addrp; |
1566 |
|
1567 |
/* Save a0 and the old return address on the stack: */ |
1568 |
*b++ = 0x23deff80; /* lda sp,-128(sp) */ |
1569 |
|
1570 |
*b++ = 0xb75e0000; /* stq ra,0(sp) */ |
1571 |
*b++ = 0xb61e0008; /* stq a0,8(sp) */ |
1572 |
*b++ = 0xb4de0010; /* stq t5,16(sp) */ |
1573 |
*b++ = 0xb0fe0018; /* stl t6,24(sp) */ |
1574 |
*b++ = 0xb71e0020; /* stq t10,32(sp) */ |
1575 |
*b++ = 0xb73e0028; /* stq t11,40(sp) */ |
1576 |
*b++ = 0xb51e0030; /* stq t7,48(sp) */ |
1577 |
*b++ = 0xb6de0038; /* stq t8,56(sp) */ |
1578 |
*b++ = 0xb6fe0040; /* stq t9,64(sp) */ |
1579 |
|
1580 |
ofs = ((size_t)&dummy_cpu.cd.mips.fast_vaddr_to_hostaddr) - (size_t)&dummy_cpu; |
1581 |
|
1582 |
*b++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */ |
1583 |
|
1584 |
/* a1 is already vaddr. set a2 = writeflag */ |
1585 |
*b++ = 0x225f0000 | (load? 0 : 1); |
1586 |
|
1587 |
/* Call fast_vaddr_to_hostaddr: */ |
1588 |
*b++ = 0x6b5b4000; /* jsr ra,(t12),<after> */ |
1589 |
|
1590 |
/* Restore the old return address and a0 from the stack: */ |
1591 |
*b++ = 0xa75e0000; /* ldq ra,0(sp) */ |
1592 |
*b++ = 0xa61e0008; /* ldq a0,8(sp) */ |
1593 |
*b++ = 0xa4de0010; /* ldq t5,16(sp) */ |
1594 |
*b++ = 0xa0fe0018; /* ldl t6,24(sp) */ |
1595 |
*b++ = 0xa71e0020; /* ldq t10,32(sp) */ |
1596 |
*b++ = 0xa73e0028; /* ldq t11,40(sp) */ |
1597 |
*b++ = 0xa51e0030; /* ldq t7,48(sp) */ |
1598 |
*b++ = 0xa6de0038; /* ldq t8,56(sp) */ |
1599 |
*b++ = 0xa6fe0040; /* ldq t9,64(sp) */ |
1600 |
|
1601 |
*b++ = 0x23de0080; /* lda sp,128(sp) */ |
1602 |
|
1603 |
*addrp = (unsigned char *) b; |
1604 |
a = *addrp; |
1605 |
|
1606 |
/* |
1607 |
* NULL? Then return failure. |
1608 |
* 01 00 00 f4 bne v0,f8 <okzz> |
1609 |
*/ |
1610 |
fail = a; |
1611 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x00; *a++ = 0xf4; |
1612 |
bintrans_write_chunkreturn_fail(&a); |
1613 |
*fail = ((size_t)a - (size_t)fail - 4) / 4; |
1614 |
|
1615 |
/* The rest of this code was written with t3 as the address. */ |
1616 |
|
1617 |
/* 04 14 00 40 addq v0,0,t3 */ |
1618 |
*a++ = 0x04; *a++ = 0x14; *a++ = 0x00; *a++ = 0x40; |
1619 |
|
1620 |
if (doloadstore != NULL) |
1621 |
*doloadstore = ((size_t)a - (size_t)doloadstore - 4) / 4; |
1622 |
} |
1623 |
|
1624 |
|
1625 |
switch (instruction_type) { |
1626 |
case HI6_LQ_MDMX: |
1627 |
/* TODO */ |
1628 |
break; |
1629 |
case HI6_LD: |
1630 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa4; /* ldq t0,0(t3) */ |
1631 |
if (bigendian) { |
1632 |
/* remember original 8 bytes of t0: */ |
1633 |
*a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */ |
1634 |
|
1635 |
/* swap lowest 4 bytes: */ |
1636 |
*a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */ |
1637 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1638 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */ |
1639 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1640 |
*a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */ |
1641 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */ |
1642 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1643 |
*a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */ |
1644 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */ |
1645 |
|
1646 |
/* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */ |
1647 |
*a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */ |
1648 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */ |
1649 |
*a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */ |
1650 |
|
1651 |
/* swap highest 4 bytes: */ |
1652 |
*a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */ |
1653 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1654 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */ |
1655 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1656 |
*a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */ |
1657 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */ |
1658 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1659 |
*a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */ |
1660 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */ |
1661 |
|
1662 |
/* or the results together: */ |
1663 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */ |
1664 |
} |
1665 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt); |
1666 |
break; |
1667 |
case HI6_LW: |
1668 |
case HI6_LWU: |
1669 |
if (alpha_rt < 0 || bigendian || instruction_type == HI6_LWU) |
1670 |
alpha_rt = ALPHA_T0; |
1671 |
/* ldl rt,0(t3) */ |
1672 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5); |
1673 |
*a++ = 0xa0 | ((alpha_rt >> 3) & 3); |
1674 |
if (bigendian) { |
1675 |
*a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */ |
1676 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1677 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */ |
1678 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1679 |
*a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */ |
1680 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */ |
1681 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1682 |
*a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */ |
1683 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */ |
1684 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,zero,t0 (sign extend) 32->64 */ |
1685 |
} |
1686 |
if (instruction_type == HI6_LWU) { |
1687 |
/* Use only lowest 32 bits: */ |
1688 |
*a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48; /* zapnot t0,0xf,t0 */ |
1689 |
} |
1690 |
if (alpha_rt == ALPHA_T0) |
1691 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt); |
1692 |
break; |
1693 |
case HI6_LHU: |
1694 |
case HI6_LH: |
1695 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x30; /* ldwu from memory */ |
1696 |
if (bigendian) { |
1697 |
*a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */ |
1698 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1699 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */ |
1700 |
} |
1701 |
if (instruction_type == HI6_LH) { |
1702 |
*a++ = 0x21; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x73; /* sextw t0,t0 */ |
1703 |
} |
1704 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt); |
1705 |
break; |
1706 |
case HI6_LBU: |
1707 |
case HI6_LB: |
1708 |
if (alpha_rt < 0) |
1709 |
alpha_rt = ALPHA_T0; |
1710 |
/* ldbu rt,0(t3) */ |
1711 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5); |
1712 |
*a++ = 0x28 | ((alpha_rt >> 3) & 3); |
1713 |
if (instruction_type == HI6_LB) { |
1714 |
/* sextb rt,rt */ |
1715 |
*a++ = alpha_rt; *a++ = 0x00; *a++ = 0xe0 + alpha_rt; *a++ = 0x73; |
1716 |
} |
1717 |
if (alpha_rt == ALPHA_T0) |
1718 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt); |
1719 |
break; |
1720 |
|
1721 |
case HI6_LWL: |
1722 |
/* a1 = 0..3 (or 0..7 for 64-bit loads): */ |
1723 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
1724 |
if (alpha_rs < 0) { |
1725 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
1726 |
alpha_rs = ALPHA_T0; |
1727 |
} |
1728 |
*a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22; |
1729 |
/* 02 30 20 46 and a1,alignment,t1 */ |
1730 |
*a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; |
1731 |
|
1732 |
/* ldl t0,0(t3) */ |
1733 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0; |
1734 |
|
1735 |
if (bigendian) { |
1736 |
/* TODO */ |
1737 |
bintrans_write_chunkreturn_fail(&a); |
1738 |
} |
1739 |
/* |
1740 |
* lwl: memory = 0x12 0x34 0x56 0x78 |
1741 |
* offset (a1): register rt becomes: |
1742 |
* 0 0x12...... |
1743 |
* 1 0x3412.... |
1744 |
* 2 0x563412.. |
1745 |
* 3 0x78563412 |
1746 |
*/ |
1747 |
|
1748 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2); |
1749 |
|
1750 |
/* |
1751 |
10: 03 00 9f 20 lda t3,3 |
1752 |
14: a5 05 82 40 cmpeq t3,t1,t4 |
1753 |
18: 01 00 a0 e4 beq t4,20 <skip> |
1754 |
*/ |
1755 |
*a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20; |
1756 |
*a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40; |
1757 |
*a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1758 |
|
1759 |
/* 03 14 20 40 addq t0,0,t2 */ |
1760 |
*a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40; |
1761 |
|
1762 |
ok_unaligned_load3 = a; |
1763 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1764 |
|
1765 |
|
1766 |
|
1767 |
*a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20; |
1768 |
*a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40; |
1769 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1770 |
/* |
1771 |
* 2 0x563412.. |
1772 |
2c: 21 17 21 48 sll t0,0x8,t0 |
1773 |
30: 01 10 20 40 addl t0,0,t0 |
1774 |
34: 03 f0 7f 44 and t2,0xff,t2 |
1775 |
38: 03 04 23 44 or t0,t2,t2 |
1776 |
*/ |
1777 |
*a++ = 0x21; *a++ = 0x17; *a++ = 0x21; *a++ = 0x48; |
1778 |
*a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40; |
1779 |
*a++ = 0x03; *a++ = 0xf0; *a++ = 0x7f; *a++ = 0x44; |
1780 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1781 |
|
1782 |
ok_unaligned_load2 = a; |
1783 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1784 |
|
1785 |
|
1786 |
|
1787 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20; |
1788 |
*a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40; |
1789 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1790 |
/* |
1791 |
* 1 0x3412.... |
1792 |
2c: 21 17 22 48 sll t0,0x10,t0 |
1793 |
30: 01 10 20 40 addl t0,0,t0 |
1794 |
34: 23 76 60 48 zapnot t2,0x3,t2 |
1795 |
38: 03 04 23 44 or t0,t2,t2 |
1796 |
*/ |
1797 |
*a++ = 0x21; *a++ = 0x17; *a++ = 0x22; *a++ = 0x48; |
1798 |
*a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40; |
1799 |
*a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48; |
1800 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1801 |
|
1802 |
ok_unaligned_load1 = a; |
1803 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1804 |
|
1805 |
|
1806 |
|
1807 |
|
1808 |
/* |
1809 |
* 0 0x12...... |
1810 |
2c: 21 17 23 48 sll t0,0x18,t0 |
1811 |
30: 01 10 20 40 addl t0,0,t0 |
1812 |
34: 23 f6 60 48 zapnot t2,0x7,t2 |
1813 |
38: 03 04 23 44 or t0,t2,t2 |
1814 |
*/ |
1815 |
*a++ = 0x21; *a++ = 0x17; *a++ = 0x23; *a++ = 0x48; |
1816 |
*a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40; |
1817 |
*a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48; |
1818 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1819 |
|
1820 |
|
1821 |
*ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4; |
1822 |
*ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4; |
1823 |
*ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4; |
1824 |
|
1825 |
/* 03 10 60 40 addl t2,0,t2 */ |
1826 |
*a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40; |
1827 |
|
1828 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt); |
1829 |
break; |
1830 |
|
1831 |
case HI6_LWR: |
1832 |
/* a1 = 0..3 (or 0..7 for 64-bit loads): */ |
1833 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
1834 |
if (alpha_rs < 0) { |
1835 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
1836 |
alpha_rs = ALPHA_T0; |
1837 |
} |
1838 |
*a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22; |
1839 |
/* 02 30 20 46 and a1,alignment,t1 */ |
1840 |
*a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; |
1841 |
|
1842 |
/* ldl t0,0(t3) */ |
1843 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0; |
1844 |
|
1845 |
if (bigendian) { |
1846 |
/* TODO */ |
1847 |
bintrans_write_chunkreturn_fail(&a); |
1848 |
} |
1849 |
/* |
1850 |
* lwr: memory = 0x12 0x34 0x56 0x78 |
1851 |
* offset (a1): register rt becomes: |
1852 |
* 0 0x78563412 |
1853 |
* 1 0x..785634 |
1854 |
* 2 0x....7856 |
1855 |
* 3 0x......78 |
1856 |
*/ |
1857 |
|
1858 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2); |
1859 |
|
1860 |
/* |
1861 |
10: 03 00 9f 20 lda t3,3 |
1862 |
14: a5 05 82 40 cmpeq t3,t1,t4 |
1863 |
18: 01 00 a0 e4 beq t4,20 <skip> |
1864 |
*/ |
1865 |
*a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20; |
1866 |
*a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40; |
1867 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1868 |
|
1869 |
/* |
1870 |
2c: 81 16 23 48 srl t0,0x18,t0 |
1871 |
b0: 21 36 20 48 zapnot t0,0x1,t0 |
1872 |
34: 23 d6 7f 48 zapnot t2,0xfe,t2 |
1873 |
38: 03 04 23 44 or t0,t2,t2 |
1874 |
*/ |
1875 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0x23; *a++ = 0x48; |
1876 |
*a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48; |
1877 |
*a++ = 0x23; *a++ = 0xd6; *a++ = 0x7f; *a++ = 0x48; |
1878 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1879 |
|
1880 |
ok_unaligned_load3 = a; |
1881 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1882 |
|
1883 |
|
1884 |
|
1885 |
*a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20; |
1886 |
*a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40; |
1887 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1888 |
/* |
1889 |
2c: 81 16 22 48 srl t0,0x10,t0 |
1890 |
b4: 21 76 20 48 zapnot t0,0x3,t0 |
1891 |
34: 23 96 7f 48 zapnot t2,0xfc,t2 |
1892 |
38: 03 04 23 44 or t0,t2,t2 |
1893 |
*/ |
1894 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0x22; *a++ = 0x48; |
1895 |
*a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48; |
1896 |
*a++ = 0x23; *a++ = 0x96; *a++ = 0x7f; *a++ = 0x48; |
1897 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1898 |
|
1899 |
ok_unaligned_load2 = a; |
1900 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1901 |
|
1902 |
|
1903 |
|
1904 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20; |
1905 |
*a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40; |
1906 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1907 |
/* |
1908 |
2c: 81 16 21 48 srl t0,0x8,t0 |
1909 |
b8: 21 f6 20 48 zapnot t0,0x7,t0 |
1910 |
3c: 23 16 7f 48 zapnot t2,0xf8,t2 |
1911 |
40: 03 04 23 44 or t0,t2,t2 |
1912 |
*/ |
1913 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0x21; *a++ = 0x48; |
1914 |
*a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48; |
1915 |
*a++ = 0x23; *a++ = 0x16; *a++ = 0x7f; *a++ = 0x48; |
1916 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1917 |
|
1918 |
ok_unaligned_load1 = a; |
1919 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1920 |
|
1921 |
|
1922 |
|
1923 |
|
1924 |
/* |
1925 |
* 0 0x12...... |
1926 |
*/ |
1927 |
/* 03 14 20 40 addq t0,0,t2 */ |
1928 |
*a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40; |
1929 |
|
1930 |
|
1931 |
|
1932 |
*ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4; |
1933 |
*ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4; |
1934 |
*ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4; |
1935 |
|
1936 |
/* 03 10 60 40 addl t2,0,t2 */ |
1937 |
*a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40; |
1938 |
|
1939 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt); |
1940 |
break; |
1941 |
|
1942 |
case HI6_SQ: |
1943 |
/* TODO */ |
1944 |
break; |
1945 |
case HI6_SD: |
1946 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0); |
1947 |
if (bigendian) { |
1948 |
/* remember original 8 bytes of t0: */ |
1949 |
*a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */ |
1950 |
|
1951 |
/* swap lowest 4 bytes: */ |
1952 |
*a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */ |
1953 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1954 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */ |
1955 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1956 |
*a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */ |
1957 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */ |
1958 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1959 |
*a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */ |
1960 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */ |
1961 |
|
1962 |
/* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */ |
1963 |
*a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */ |
1964 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */ |
1965 |
*a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */ |
1966 |
|
1967 |
/* swap highest 4 bytes: */ |
1968 |
*a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */ |
1969 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1970 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */ |
1971 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1972 |
*a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */ |
1973 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */ |
1974 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1975 |
*a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */ |
1976 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */ |
1977 |
|
1978 |
/* or the results together: */ |
1979 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */ |
1980 |
} |
1981 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb4; /* stq to memory */ |
1982 |
break; |
1983 |
case HI6_SW: |
1984 |
if (alpha_rt < 0 || bigendian) { |
1985 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0); |
1986 |
alpha_rt = ALPHA_T0; |
1987 |
} |
1988 |
if (bigendian) { |
1989 |
*a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */ |
1990 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1991 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */ |
1992 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1993 |
*a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */ |
1994 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */ |
1995 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1996 |
*a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */ |
1997 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */ |
1998 |
} |
1999 |
/* stl to memory: stl rt,0(t3) */ |
2000 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5); |
2001 |
*a++ = 0xb0 | ((alpha_rt >> 3) & 3); |
2002 |
break; |
2003 |
case HI6_SH: |
2004 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0); |
2005 |
if (bigendian) { |
2006 |
*a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */ |
2007 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
2008 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */ |
2009 |
} |
2010 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x34; /* stw to memory */ |
2011 |
break; |
2012 |
case HI6_SB: |
2013 |
if (alpha_rt < 0) { |
2014 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0); |
2015 |
alpha_rt = ALPHA_T0; |
2016 |
} |
2017 |
/* stb to memory: stb rt,0(t3) */ |
2018 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5); |
2019 |
*a++ = 0x38 | ((alpha_rt >> 3) & 3); |
2020 |
break; |
2021 |
|
2022 |
case HI6_SWL: |
2023 |
/* a1 = 0..3 (or 0..7 for 64-bit stores): */ |
2024 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
2025 |
if (alpha_rs < 0) { |
2026 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
2027 |
alpha_rs = ALPHA_T0; |
2028 |
} |
2029 |
*a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22; |
2030 |
/* 02 30 20 46 and a1,alignment,t1 */ |
2031 |
*a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; |
2032 |
|
2033 |
/* ldl t0,0(t3) */ |
2034 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0; |
2035 |
|
2036 |
if (bigendian) { |
2037 |
/* TODO */ |
2038 |
bintrans_write_chunkreturn_fail(&a); |
2039 |
} |
2040 |
|
2041 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2); |
2042 |
|
2043 |
/* |
2044 |
* swl: memory = 0x12 0x34 0x56 0x78 |
2045 |
* register = 0x89abcdef |
2046 |
* offset (a1): memory becomes: |
2047 |
* 0 0x89 0x.. 0x.. 0x.. |
2048 |
* 1 0xab 0x89 0x.. 0x.. |
2049 |
* 2 0xcd 0xab 0x89 0x.. |
2050 |
* 3 0xef 0xcd 0xab 0x89 |
2051 |
*/ |
2052 |
|
2053 |
/* |
2054 |
a5 75 40 40 cmpeq t1,0x03,t4 |
2055 |
01 00 a0 e4 beq t4,20 <skip> |
2056 |
*/ |
2057 |
*a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40; |
2058 |
*a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
2059 |
|
2060 |
/* 01 10 60 40 addl t2,0,t0 */ |
2061 |
*a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40; |
2062 |
|
2063 |
ok_unaligned_load3 = a; |
2064 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
2065 |
|
2066 |
|
2067 |
|
2068 |
|
2069 |
*a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40; |
2070 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
2071 |
/* |
2072 |
2: |
2073 |
e8: 83 16 61 48 srl t2,0x8,t2 |
2074 |
ec: 23 f6 60 48 zapnot t2,0x7,t2 |
2075 |
f0: 21 16 3f 48 zapnot t0,0xf8,t0 |
2076 |
f4: 01 04 23 44 or t0,t2,t0 |
2077 |
*/ |
2078 |
*a++ = 0x83; *a++ = 0x16; *a++ = 0x61; *a++ = 0x48; |
2079 |
*a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48; |
2080 |
*a++ = 0x21; *a++ = 0x16; *a++ = 0x3f; *a++ = 0x48; |
2081 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
2082 |
|
2083 |
ok_unaligned_load2 = a; |
2084 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
2085 |
|
2086 |
|
2087 |
|
2088 |
*a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40; |
2089 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
2090 |
/* |
2091 |
1: |
2092 |
f8: 83 16 62 48 srl t2,0x10,t2 |
2093 |
fc: 23 76 60 48 zapnot t2,0x3,t2 |
2094 |
100: 21 96 3f 48 zapnot t0,0xfc,t0 |
2095 |
104: 01 04 23 44 or t0,t2,t0 |
2096 |
*/ |
2097 |
*a++ = 0x83; *a++ = 0x16; *a++ = 0x62; *a++ = 0x48; |
2098 |
*a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48; |
2099 |
*a++ = 0x21; *a++ = 0x96; *a++ = 0x3f; *a++ = 0x48; |
2100 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
2101 |
|
2102 |
ok_unaligned_load1 = a; |
2103 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
2104 |
|
2105 |
|
2106 |
|
2107 |
|
2108 |
|
2109 |
/* |
2110 |
0: |
2111 |
108: 83 16 63 48 srl t2,0x18,t2 |
2112 |
10c: 23 36 60 48 zapnot t2,0x1,t2 |
2113 |
110: 21 d6 3f 48 zapnot t0,0xfe,t0 |
2114 |
114: 01 04 23 44 or t0,t2,t0 |
2115 |
*/ |
2116 |
*a++ = 0x83; *a++ = 0x16; *a++ = 0x63; *a++ = 0x48; |
2117 |
*a++ = 0x23; *a++ = 0x36; *a++ = 0x60; *a++ = 0x48; |
2118 |
*a++ = 0x21; *a++ = 0xd6; *a++ = 0x3f; *a++ = 0x48; |
2119 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
2120 |
|
2121 |
|
2122 |
*ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4; |
2123 |
*ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4; |
2124 |
*ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4; |
2125 |
|
2126 |
/* sdl t0,0(t3) */ |
2127 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0; |
2128 |
break; |
2129 |
|
2130 |
case HI6_SWR: |
2131 |
/* a1 = 0..3 (or 0..7 for 64-bit stores): */ |
2132 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
2133 |
if (alpha_rs < 0) { |
2134 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
2135 |
alpha_rs = ALPHA_T0; |
2136 |
} |
2137 |
*a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22; |
2138 |
/* 02 30 20 46 and a1,alignment,t1 */ |
2139 |
*a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; |
2140 |
|
2141 |
/* ldl t0,0(t3) */ |
2142 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0; |
2143 |
|
2144 |
if (bigendian) { |
2145 |
/* TODO */ |
2146 |
bintrans_write_chunkreturn_fail(&a); |
2147 |
} |
2148 |
|
2149 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2); |
2150 |
|
2151 |
/* |
2152 |
* swr: memory = 0x12 0x34 0x56 0x78 |
2153 |
* register = 0x89abcdef |
2154 |
* offset (a1): memory becomes: |
2155 |
* 0 0xef 0xcd 0xab 0x89 |
2156 |
* 1 0x.. 0xef 0xcd 0xab |
2157 |
* 2 0x.. 0x.. 0xef 0xcd |
2158 |
* 3 0x.. 0x.. 0x.. 0xef |
2159 |
*/ |
2160 |
|
2161 |
|
2162 |
/* |
2163 |
a5 75 40 40 cmpeq t1,0x03,t4 |
2164 |
01 00 a0 e4 beq t4,20 <skip> |
2165 |
*/ |
2166 |
*a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40; |
2167 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
2168 |
|
2169 |
/* |
2170 |
118: 23 17 63 48 sll t2,0x18,t2 |
2171 |
11c: 21 f6 20 48 zapnot t0,0x7,t0 |
2172 |
120: 01 04 23 44 or t0,t2,t0 |
2173 |
*/ |
2174 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x63; *a++ = 0x48; |
2175 |
*a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48; |
2176 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
2177 |
|
2178 |
ok_unaligned_load3 = a; |
2179 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
2180 |
|
2181 |
|
2182 |
|
2183 |
|
2184 |
|
2185 |
*a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40; |
2186 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
2187 |
/* |
2188 |
2: |
2189 |
124: 23 17 62 48 sll t2,0x10,t2 |
2190 |
128: 21 76 20 48 zapnot t0,0x3,t0 |
2191 |
12c: 01 04 23 44 or t0,t2,t0 |
2192 |
*/ |
2193 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; |
2194 |
*a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48; |
2195 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
2196 |
|
2197 |
ok_unaligned_load2 = a; |
2198 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
2199 |
|
2200 |
|
2201 |
|
2202 |
*a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40; |
2203 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
2204 |
/* |
2205 |
1: |
2206 |
130: 23 17 61 48 sll t2,0x8,t2 |
2207 |
134: 21 36 20 48 zapnot t0,0x1,t0 |
2208 |
138: 01 04 23 44 or t0,t2,t0 |
2209 |
*/ |
2210 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; |
2211 |
*a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48; |
2212 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
2213 |
|
2214 |
ok_unaligned_load1 = a; |
2215 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
2216 |
|
2217 |
|
2218 |
|
2219 |
/* |
2220 |
0: |
2221 |
13c: 01 10 60 40 addl t2,0,t0 |
2222 |
*/ |
2223 |
*a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40; |
2224 |
|
2225 |
|
2226 |
*ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4; |
2227 |
*ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4; |
2228 |
*ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4; |
2229 |
|
2230 |
/* sdl t0,0(t3) */ |
2231 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0; |
2232 |
break; |
2233 |
|
2234 |
default: |
2235 |
; |
2236 |
} |
2237 |
|
2238 |
*addrp = a; |
2239 |
bintrans_write_pc_inc(addrp); |
2240 |
return 1; |
2241 |
} |
2242 |
|
2243 |
|
2244 |
/* |
2245 |
* bintrans_write_instruction__lui(): |
2246 |
*/ |
2247 |
static int bintrans_write_instruction__lui(unsigned char **addrp, |
2248 |
int rt, int imm) |
2249 |
{ |
2250 |
uint32_t *a; |
2251 |
|
2252 |
/* |
2253 |
* dc fe 3f 24 ldah t0,-292 |
2254 |
* 1f 04 ff 5f fnop |
2255 |
* 88 08 30 b4 stq t0,2184(a0) |
2256 |
*/ |
2257 |
if (rt != 0) { |
2258 |
int alpha_rt = map_MIPS_to_Alpha[rt]; |
2259 |
if (alpha_rt < 0) |
2260 |
alpha_rt = ALPHA_T0; |
2261 |
|
2262 |
a = (uint32_t *) *addrp; |
2263 |
*a++ = 0x241f0000 | (alpha_rt << 21) | ((uint32_t)imm & 0xffff); |
2264 |
*addrp = (unsigned char *) a; |
2265 |
|
2266 |
if (alpha_rt == ALPHA_T0) { |
2267 |
*a++ = 0x5fff041f; /* fnop */ |
2268 |
bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt); |
2269 |
} |
2270 |
} |
2271 |
|
2272 |
bintrans_write_pc_inc(addrp); |
2273 |
|
2274 |
return 1; |
2275 |
} |
2276 |
|
2277 |
|
2278 |
/* |
2279 |
* bintrans_write_instruction__mfmthilo(): |
2280 |
*/ |
2281 |
static int bintrans_write_instruction__mfmthilo(unsigned char **addrp, |
2282 |
int rd, int from_flag, int hi_flag) |
2283 |
{ |
2284 |
unsigned char *a; |
2285 |
int ofs; |
2286 |
|
2287 |
a = *addrp; |
2288 |
|
2289 |
/* |
2290 |
* 18 09 30 a4 ldq t0,hi(a0) (or lo) |
2291 |
* 18 09 30 b4 stq t0,rd(a0) |
2292 |
* |
2293 |
* (or if from_flag is cleared then move the other way, it's |
2294 |
* actually not rd then, but rs...) |
2295 |
*/ |
2296 |
|
2297 |
if (from_flag) { |
2298 |
if (rd != 0) { |
2299 |
/* mfhi or mflo */ |
2300 |
if (hi_flag) |
2301 |
ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu; |
2302 |
else |
2303 |
ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu; |
2304 |
*a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xa4; |
2305 |
|
2306 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd); |
2307 |
} |
2308 |
} else { |
2309 |
/* mthi or mtlo */ |
2310 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rd, ALPHA_T0); |
2311 |
|
2312 |
if (hi_flag) |
2313 |
ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu; |
2314 |
else |
2315 |
ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu; |
2316 |
*a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4; |
2317 |
} |
2318 |
|
2319 |
*addrp = a; |
2320 |
bintrans_write_pc_inc(addrp); |
2321 |
return 1; |
2322 |
} |
2323 |
|
2324 |
|
2325 |
/* |
2326 |
* bintrans_write_instruction__mfc_mtc(): |
2327 |
*/ |
2328 |
static int bintrans_write_instruction__mfc_mtc(struct memory *mem, |
2329 |
unsigned char **addrp, int coproc_nr, int flag64bit, int rt, |
2330 |
int rd, int mtcflag) |
2331 |
{ |
2332 |
uint32_t *a, *jump; |
2333 |
int ofs; |
2334 |
|
2335 |
/* |
2336 |
* NOTE: Only a few registers are readable without side effects. |
2337 |
*/ |
2338 |
if (rt == 0 && !mtcflag) |
2339 |
return 0; |
2340 |
|
2341 |
if (coproc_nr >= 1) |
2342 |
return 0; |
2343 |
|
2344 |
if (rd == COP0_RANDOM || rd == COP0_COUNT) |
2345 |
return 0; |
2346 |
|
2347 |
|
2348 |
/************************************************************* |
2349 |
* |
2350 |
* TODO: Check for kernel mode, or Coproc X usability bit! |
2351 |
* |
2352 |
*************************************************************/ |
2353 |
|
2354 |
a = (uint32_t *) *addrp; |
2355 |
|
2356 |
ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu; |
2357 |
*a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */ |
2358 |
|
2359 |
ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc; |
2360 |
*a++ = 0xa4410000 | (ofs & 0xffff); /* ldq t1,reg_rd(t0) */ |
2361 |
|
2362 |
if (mtcflag) { |
2363 |
/* mtc: */ |
2364 |
*addrp = (unsigned char *) a; |
2365 |
bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0); |
2366 |
a = (uint32_t *) *addrp; |
2367 |
|
2368 |
if (!flag64bit) { |
2369 |
*a++ = 0x40201001; /* addl t0,0,t0 */ |
2370 |
*a++ = 0x40401002; /* addl t1,0,t1 */ |
2371 |
} |
2372 |
|
2373 |
/* |
2374 |
* In the general case: Only allow mtc if it does NOT |
2375 |
* change the register!! |
2376 |
*/ |
2377 |
|
2378 |
switch (rd) { |
2379 |
case COP0_INDEX: |
2380 |
break; |
2381 |
|
2382 |
case COP0_EPC: |
2383 |
break; |
2384 |
|
2385 |
/* TODO: Some bits are not writable */ |
2386 |
case COP0_ENTRYLO0: |
2387 |
case COP0_ENTRYLO1: |
2388 |
break; |
2389 |
|
2390 |
case COP0_ENTRYHI: |
2391 |
/* |
2392 |
* Entryhi is ok to write to, as long as the |
2393 |
* ASID isn't changed. (That would require |
2394 |
* cache invalidations etc. Instead of checking |
2395 |
* for MMU3K vs others, we just assume that all the |
2396 |
* lowest 12 bits must be the same. |
2397 |
*/ |
2398 |
/* ff 0f bf 20 lda t4,0x0fff */ |
2399 |
/* 03 00 25 44 and t0,t4,t2 */ |
2400 |
/* 04 00 45 44 and t1,t4,t3 */ |
2401 |
/* a3 05 64 40 cmpeq t2,t3,t2 */ |
2402 |
/* 01 00 60 f4 bne t2,<ok> */ |
2403 |
*a++ = 0x20bf0fff; |
2404 |
*a++ = 0x44250003; |
2405 |
*a++ = 0x44450004; |
2406 |
*a++ = 0x406405a3; |
2407 |
jump = a; |
2408 |
*a++ = 0; /* later */ |
2409 |
*addrp = (unsigned char *) a; |
2410 |
bintrans_write_chunkreturn_fail(addrp); |
2411 |
a = (uint32_t *) *addrp; |
2412 |
*jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4); |
2413 |
break; |
2414 |
|
2415 |
case COP0_STATUS: |
2416 |
/* Only allow updates to the status register if |
2417 |
the interrupt enable bits were changed, but no |
2418 |
other bits! */ |
2419 |
if (mem->bintrans_32bit_only) { |
2420 |
/* R3000 etc. */ |
2421 |
/* t4 = 0x0fe70000; */ |
2422 |
*a++ = 0x20bf0000; |
2423 |
*a++ = 0x24a50fe7; |
2424 |
} else { |
2425 |
/* fe 00 bf 20 lda t4,0x00fe */ |
2426 |
/* ff ff a5 24 ldah t4,-1(t4) */ |
2427 |
*a++ = 0x20bf0000; |
2428 |
*a++ = 0x24a5ffff; |
2429 |
} |
2430 |
|
2431 |
/* 03 00 25 44 and t0,t4,t2 */ |
2432 |
/* 04 00 45 44 and t1,t4,t3 */ |
2433 |
/* a3 05 64 40 cmpeq t2,t3,t2 */ |
2434 |
/* 01 00 60 f4 bne t2,<ok> */ |
2435 |
*a++ = 0x44250003; |
2436 |
*a++ = 0x44450004; |
2437 |
*a++ = 0x406405a3; |
2438 |
jump = a; |
2439 |
*a++ = 0; /* later */ |
2440 |
*addrp = (unsigned char *) a; |
2441 |
bintrans_write_chunkreturn_fail(addrp); |
2442 |
a = (uint32_t *) *addrp; |
2443 |
*jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4); |
2444 |
|
2445 |
/* If enabling interrupt bits would cause an |
2446 |
exception, then don't do it: */ |
2447 |
ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu; |
2448 |
*a++ = 0xa4900000 | (ofs & 0xffff); /* ldq t3,coproc[0](a0) */ |
2449 |
ofs = ((size_t)&dummy_coproc.reg[COP0_CAUSE]) - (size_t)&dummy_coproc; |
2450 |
*a++ = 0xa4a40000 | (ofs & 0xffff); /* ldq t4,reg_rd(t3) */ |
2451 |
|
2452 |
/* 02 00 a1 44 and t4,t0,t1 */ |
2453 |
/* 83 16 41 48 srl t1,0x8,t2 */ |
2454 |
/* 04 f0 7f 44 and t2,0xff,t3 */ |
2455 |
*a++ = 0x44a10002; |
2456 |
*a++ = 0x48411683; |
2457 |
*a++ = 0x447ff004; |
2458 |
/* 01 00 80 e4 beq t3,<ok> */ |
2459 |
jump = a; |
2460 |
*a++ = 0; /* later */ |
2461 |
*addrp = (unsigned char *) a; |
2462 |
bintrans_write_chunkreturn_fail(addrp); |
2463 |
a = (uint32_t *) *addrp; |
2464 |
*jump = 0xe4800000 | (((size_t)a - (size_t)jump - 4) / 4); |
2465 |
break; |
2466 |
|
2467 |
default: |
2468 |
/* a3 05 22 40 cmpeq t0,t1,t2 */ |
2469 |
/* 01 00 60 f4 bne t2,<ok> */ |
2470 |
*a++ = 0x402205a3; |
2471 |
jump = a; |
2472 |
*a++ = 0; /* later */ |
2473 |
*addrp = (unsigned char *) a; |
2474 |
bintrans_write_chunkreturn_fail(addrp); |
2475 |
a = (uint32_t *) *addrp; |
2476 |
*jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4); |
2477 |
} |
2478 |
|
2479 |
*a++ = 0x40201402; /* addq t0,0,t1 */ |
2480 |
|
2481 |
ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu; |
2482 |
*a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */ |
2483 |
ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc; |
2484 |
*a++ = 0xb4410000 | (ofs & 0xffff); /* stq t1,reg_rd(t0) */ |
2485 |
} else { |
2486 |
/* mfc: */ |
2487 |
if (!flag64bit) { |
2488 |
*a++ = 0x40401002; /* addl t1,0,t1 */ |
2489 |
} |
2490 |
|
2491 |
*addrp = (unsigned char *) a; |
2492 |
bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T1, rt); |
2493 |
a = (uint32_t *) *addrp; |
2494 |
} |
2495 |
|
2496 |
*addrp = (unsigned char *) a; |
2497 |
|
2498 |
bintrans_write_pc_inc(addrp); |
2499 |
return 1; |
2500 |
} |
2501 |
|
2502 |
|
2503 |
/* |
2504 |
* bintrans_write_instruction__tlb_rfe_etc(): |
2505 |
*/ |
2506 |
static int bintrans_write_instruction__tlb_rfe_etc(unsigned char **addrp, |
2507 |
int itype) |
2508 |
{ |
2509 |
uint32_t *a; |
2510 |
int ofs = 0; |
2511 |
|
2512 |
switch (itype) { |
2513 |
case CALL_TLBWI: |
2514 |
case CALL_TLBWR: |
2515 |
case CALL_TLBP: |
2516 |
case CALL_TLBR: |
2517 |
case CALL_RFE: |
2518 |
case CALL_ERET: |
2519 |
case CALL_BREAK: |
2520 |
case CALL_SYSCALL: |
2521 |
break; |
2522 |
default: |
2523 |
return 0; |
2524 |
} |
2525 |
|
2526 |
a = (uint32_t *) *addrp; |
2527 |
|
2528 |
/* a0 = pointer to the cpu struct */ |
2529 |
|
2530 |
switch (itype) { |
2531 |
case CALL_TLBWI: |
2532 |
case CALL_TLBWR: |
2533 |
/* a1 = 0 for indexed, 1 for random */ |
2534 |
*a++ = 0x223f0000 | (itype == CALL_TLBWR); |
2535 |
break; |
2536 |
case CALL_TLBP: |
2537 |
case CALL_TLBR: |
2538 |
/* a1 = 0 for probe, 1 for read */ |
2539 |
*a++ = 0x223f0000 | (itype == CALL_TLBR); |
2540 |
break; |
2541 |
case CALL_BREAK: |
2542 |
case CALL_SYSCALL: |
2543 |
*a++ = 0x223f0000 | (itype == CALL_BREAK? EXCEPTION_BP : EXCEPTION_SYS); |
2544 |
break; |
2545 |
} |
2546 |
|
2547 |
/* Put PC into the cpu struct (both pc and pc_last). */ |
2548 |
*a++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */ |
2549 |
*a++ = 0xb4d00000 | ofs_pc_last;/* stq t5,"pc_last"(a0) */ |
2550 |
|
2551 |
/* Save a0 and the old return address on the stack: */ |
2552 |
*a++ = 0x23deff80; /* lda sp,-128(sp) */ |
2553 |
|
2554 |
*a++ = 0xb75e0000; /* stq ra,0(sp) */ |
2555 |
*a++ = 0xb61e0008; /* stq a0,8(sp) */ |
2556 |
*a++ = 0xb0fe0018; /* stl t6,24(sp) */ |
2557 |
*a++ = 0xb71e0020; /* stq t10,32(sp) */ |
2558 |
*a++ = 0xb73e0028; /* stq t11,40(sp) */ |
2559 |
*a++ = 0xb51e0030; /* stq t7,48(sp) */ |
2560 |
*a++ = 0xb6de0038; /* stq t8,56(sp) */ |
2561 |
*a++ = 0xb6fe0040; /* stq t9,64(sp) */ |
2562 |
|
2563 |
switch (itype) { |
2564 |
case CALL_TLBP: |
2565 |
case CALL_TLBR: |
2566 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbpr) - (size_t)&dummy_cpu; |
2567 |
break; |
2568 |
case CALL_TLBWR: |
2569 |
case CALL_TLBWI: |
2570 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbwri) - (size_t)&dummy_cpu; |
2571 |
break; |
2572 |
case CALL_RFE: |
2573 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_rfe) - (size_t)&dummy_cpu; |
2574 |
break; |
2575 |
case CALL_ERET: |
2576 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_eret) - (size_t)&dummy_cpu; |
2577 |
break; |
2578 |
case CALL_BREAK: |
2579 |
case CALL_SYSCALL: |
2580 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_simple_exception) - (size_t)&dummy_cpu; |
2581 |
break; |
2582 |
} |
2583 |
|
2584 |
*a++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */ |
2585 |
|
2586 |
/* Call bintrans_fast_tlbwr: */ |
2587 |
*a++ = 0x6b5b4000; /* jsr ra,(t12),<after> */ |
2588 |
|
2589 |
/* Restore the old return address and a0 from the stack: */ |
2590 |
*a++ = 0xa75e0000; /* ldq ra,0(sp) */ |
2591 |
*a++ = 0xa61e0008; /* ldq a0,8(sp) */ |
2592 |
*a++ = 0xa0fe0018; /* ldl t6,24(sp) */ |
2593 |
*a++ = 0xa71e0020; /* ldq t10,32(sp) */ |
2594 |
*a++ = 0xa73e0028; /* ldq t11,40(sp) */ |
2595 |
*a++ = 0xa51e0030; /* ldq t7,48(sp) */ |
2596 |
*a++ = 0xa6de0038; /* ldq t8,56(sp) */ |
2597 |
*a++ = 0xa6fe0040; /* ldq t9,64(sp) */ |
2598 |
|
2599 |
*a++ = 0x23de0080; /* lda sp,128(sp) */ |
2600 |
|
2601 |
/* Load PC from the cpu struct. */ |
2602 |
*a++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */ |
2603 |
|
2604 |
*addrp = (unsigned char *) a; |
2605 |
|
2606 |
switch (itype) { |
2607 |
case CALL_ERET: |
2608 |
case CALL_BREAK: |
2609 |
case CALL_SYSCALL: |
2610 |
break; |
2611 |
default: |
2612 |
bintrans_write_pc_inc(addrp); |
2613 |
} |
2614 |
|
2615 |
return 1; |
2616 |
} |
2617 |
|
2618 |
|
2619 |
/* |
2620 |
* bintrans_backend_init(): |
2621 |
* |
2622 |
* This is neccessary for broken 2.95.4 compilers on FreeBSD/Alpha 4.9, |
2623 |
* and probably a few others. (For Compaq's CC, and for gcc 3.x, this |
2624 |
* wouldn't be neccessary, and the old code would have worked.) |
2625 |
*/ |
2626 |
static void bintrans_backend_init(void) |
2627 |
{ |
2628 |
int size; |
2629 |
uint32_t *p, *q; |
2630 |
|
2631 |
|
2632 |
/* "runchunk": */ |
2633 |
size = 256; /* NOTE: This MUST be enough, or we fail */ |
2634 |
p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, |
2635 |
MAP_ANON | MAP_PRIVATE, -1, 0); |
2636 |
|
2637 |
/* If mmap() failed, try malloc(): */ |
2638 |
if (p == NULL) { |
2639 |
p = malloc(size); |
2640 |
if (p == NULL) { |
2641 |
fprintf(stderr, "bintrans_backend_init(): out of memory\n"); |
2642 |
exit(1); |
2643 |
} |
2644 |
} |
2645 |
|
2646 |
bintrans_runchunk = (void *)p; |
2647 |
|
2648 |
*p++ = 0x23deffa0; /* lda sp,-0x60(sp) */ |
2649 |
*p++ = 0xb75e0000; /* stq ra,0(sp) */ |
2650 |
*p++ = 0xb53e0008; /* stq s0,8(sp) */ |
2651 |
*p++ = 0xb55e0010; /* stq s1,16(sp) */ |
2652 |
*p++ = 0xb57e0018; /* stq s2,24(sp) */ |
2653 |
*p++ = 0xb59e0020; /* stq s3,32(sp) */ |
2654 |
*p++ = 0xb5be0028; /* stq s4,40(sp) */ |
2655 |
*p++ = 0xb5de0030; /* stq s5,48(sp) */ |
2656 |
*p++ = 0xb5fe0038; /* stq s6,56(sp) */ |
2657 |
*p++ = 0xb7be0058; /* stq gp,0x58(sp) */ |
2658 |
|
2659 |
*p++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */ |
2660 |
*p++ = 0xa0f00000 | ofs_n; /* ldl t6,"bintrans_instructions_executed"(a0) */ |
2661 |
*p++ = 0xa5100000 | ofs_a0; /* ldq t7,"a0"(a0) */ |
2662 |
*p++ = 0xa6d00000 | ofs_a1; /* ldq t8,"a1"(a0) */ |
2663 |
*p++ = 0xa6f00000 | ofs_s0; /* ldq t9,"s0"(a0) */ |
2664 |
*p++ = 0xa1300000 | ofs_ds; /* ldl s0,"delay_slot"(a0) */ |
2665 |
*p++ = 0xa5500000 | ofs_ja; /* ldq s1,"delay_jmpaddr"(a0) */ |
2666 |
*p++ = 0xa5700000 | ofs_sp; /* ldq s2,"gpr[sp]"(a0) */ |
2667 |
*p++ = 0xa5900000 | ofs_ra; /* ldq s3,"gpr[ra]"(a0) */ |
2668 |
*p++ = 0xa5b00000 | ofs_t0; /* ldq s4,"gpr[t0]"(a0) */ |
2669 |
*p++ = 0xa5d00000 | ofs_t1; /* ldq s5,"gpr[t1]"(a0) */ |
2670 |
*p++ = 0xa5f00000 | ofs_t2; /* ldq s6,"gpr[t2]"(a0) */ |
2671 |
*p++ = 0xa7100000 | ofs_tbl0; /* ldq t10,table0(a0) */ |
2672 |
*p++ = 0xa7300000 | ofs_v0; /* ldq t11,"gpr[v0]"(a0) */ |
2673 |
|
2674 |
*p++ = 0x6b514000; /* jsr ra,(a1),<back> */ |
2675 |
|
2676 |
*p++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */ |
2677 |
*p++ = 0xb0f00000 | ofs_n; /* stl t6,"bintrans_instructions_executed"(a0) */ |
2678 |
*p++ = 0xb5100000 | ofs_a0; /* stq t7,"a0"(a0) */ |
2679 |
*p++ = 0xb6d00000 | ofs_a1; /* stq t8,"a1"(a0) */ |
2680 |
*p++ = 0xb6f00000 | ofs_s0; /* stq t9,"s0"(a0) */ |
2681 |
*p++ = 0xb1300000 | ofs_ds; /* stl s0,"delay_slot"(a0) */ |
2682 |
*p++ = 0xb5500000 | ofs_ja; /* stq s1,"delay_jmpaddr"(a0) */ |
2683 |
*p++ = 0xb5700000 | ofs_sp; /* stq s2,"gpr[sp]"(a0) */ |
2684 |
*p++ = 0xb5900000 | ofs_ra; /* stq s3,"gpr[ra]"(a0) */ |
2685 |
*p++ = 0xb5b00000 | ofs_t0; /* stq s4,"gpr[t0]"(a0) */ |
2686 |
*p++ = 0xb5d00000 | ofs_t1; /* stq s5,"gpr[t1]"(a0) */ |
2687 |
*p++ = 0xb5f00000 | ofs_t2; /* stq s6,"gpr[t2]"(a0) */ |
2688 |
*p++ = 0xb7300000 | ofs_v0; /* stq t11,"gpr[v0]"(a0) */ |
2689 |
|
2690 |
*p++ = 0xa75e0000; /* ldq ra,0(sp) */ |
2691 |
*p++ = 0xa53e0008; /* ldq s0,8(sp) */ |
2692 |
*p++ = 0xa55e0010; /* ldq s1,16(sp) */ |
2693 |
*p++ = 0xa57e0018; /* ldq s2,24(sp) */ |
2694 |
*p++ = 0xa59e0020; /* ldq s3,32(sp) */ |
2695 |
*p++ = 0xa5be0028; /* ldq s4,40(sp) */ |
2696 |
*p++ = 0xa5de0030; /* ldq s5,48(sp) */ |
2697 |
*p++ = 0xa5fe0038; /* ldq s6,56(sp) */ |
2698 |
*p++ = 0xa7be0058; /* ldq gp,0x58(sp) */ |
2699 |
*p++ = 0x23de0060; /* lda sp,0x60(sp) */ |
2700 |
*p++ = 0x6bfa8001; /* ret */ |
2701 |
|
2702 |
|
2703 |
/* "jump to 32bit pc": */ |
2704 |
size = 128; /* WARNING! Don't make this too small. */ |
2705 |
p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, |
2706 |
MAP_ANON | MAP_PRIVATE, -1, 0); |
2707 |
|
2708 |
/* If mmap() failed, try malloc(): */ |
2709 |
if (p == NULL) { |
2710 |
p = malloc(size); |
2711 |
if (p == NULL) { |
2712 |
fprintf(stderr, "bintrans_backend_init(): out of memory\n"); |
2713 |
exit(1); |
2714 |
} |
2715 |
} |
2716 |
|
2717 |
bintrans_jump_to_32bit_pc = (void *)p; |
2718 |
|
2719 |
/* Don't execute too many instructions: */ |
2720 |
*p++ = 0x205f0000 | (N_SAFE_BINTRANS_LIMIT-1); /* lda t1,safe-1 */ |
2721 |
|
2722 |
*p++ = 0x40e20da1; /* cmple t6,t1,t0 */ |
2723 |
q = p; /* *q is updated later */ |
2724 |
*p++ = 0xe4200001; /* beq ret (far below) */ |
2725 |
|
2726 |
*p++ = 0x40c01411; /* addq t5,0,a1 */ |
2727 |
|
2728 |
/* |
2729 |
* Special case for 32-bit addressing: |
2730 |
* |
2731 |
* t1 = 1023; |
2732 |
* t2 = ((a1 >> 22) & t1) * sizeof(void *); |
2733 |
* t3 = ((a1 >> 12) & t1) * sizeof(void *); |
2734 |
* t1 = a1 & 4095; |
2735 |
*/ |
2736 |
*p++ = 0x205f1ff8; /* lda t1,1023 * 8 */ |
2737 |
*p++ = 0x4a227683; /* srl a1,19,t2 */ |
2738 |
*p++ = 0x4a213684; /* srl a1, 9,t3 */ |
2739 |
*p++ = 0x44620003; /* and t2,t1,t2 */ |
2740 |
|
2741 |
/* |
2742 |
* t10 is vaddr_to_hostaddr_table0 |
2743 |
* |
2744 |
* a3 = tbl0[t2] (load entry from tbl0) |
2745 |
*/ |
2746 |
*p++ = 0x43030412; /* addq t10,t2,a2 */ |
2747 |
*p++ = 0x44820004; /* and t3,t1,t3 */ |
2748 |
*p++ = 0xa6720000; /* ldq a3,0(a2) */ |
2749 |
*p++ = 0x205f0ffc; /* lda t1,0xffc */ |
2750 |
|
2751 |
/* |
2752 |
* a3 = tbl1[t3] (load entry from tbl1 (which is a3)) |
2753 |
*/ |
2754 |
*p++ = 0x42640413; /* addq a3,t3,a3 */ |
2755 |
*p++ = 0x46220002; /* and a1,t1,t1 */ |
2756 |
|
2757 |
*p++ = 0xa6730000 | ofs_c0; /* ldq a3,chunks[0](a3) */ |
2758 |
|
2759 |
/* |
2760 |
* NULL? Then just return. |
2761 |
*/ |
2762 |
*p++ = 0xf6600001; /* bne a3,<ok> */ |
2763 |
*p++ = 0x6bfa8001; /* ret */ |
2764 |
|
2765 |
*p++ = 0x40530402; /* addq t1,a3,t1 */ |
2766 |
*p++ = 0xa0220000; /* ldl t0,0(t1) */ |
2767 |
|
2768 |
/* No translation? Then return. */ |
2769 |
*p++ = 0xe4200003; /* beq t0,<skip> */ |
2770 |
|
2771 |
*p++ = 0xa4700000 | ofs_cb; /* ldq t2,chunk_base_address(a0) */ |
2772 |
|
2773 |
*p++ = 0x40230401; /* addq t0,t2,t0 */ |
2774 |
*p++ = 0x6be10000; /* jmp (t0) */ |
2775 |
|
2776 |
/* Now, update *q to point here: */ |
2777 |
*q = 0xe4200000 | (((size_t)p - (size_t)q)/4 - 1); /* beq ret */ |
2778 |
|
2779 |
/* Return to the main translation loop. */ |
2780 |
*p++ = 0x6bfa8001; /* ret */ |
2781 |
} |
2782 |
|