--- trunk/src/bintrans_alpha.c 2007/10/08 16:18:31 11 +++ trunk/src/bintrans_alpha.c 2007/10/08 16:18:38 12 @@ -25,7 +25,7 @@ * SUCH DAMAGE. * * - * $Id: bintrans_alpha.c,v 1.119 2005/06/22 10:12:25 debug Exp $ + * $Id: bintrans_alpha.c,v 1.125 2005/07/31 08:47:56 debug Exp $ * * Alpha specific code for dynamic binary translation. * @@ -60,7 +60,7 @@ * t7 a0 (mips register 4) (64-bit) * t8 a1 (mips register 5) (64-bit) * t9 s0 (mips register 16) (64-bit) - * t10 table0 cached (for load/store) + * t10 load table base cached * t11 v0 (mips register 2) (64-bit) * s0 delay_slot (32-bit int) * s1 delay_jmpaddr (64-bit) @@ -68,7 +68,7 @@ * s3 ra (mips register 31) (64-bit) * s4 t0 (mips register 8) (64-bit) * s5 t1 (mips register 9) (64-bit) - * s6 t2 (mips register 10) (64-bit) + * s6 store table base cached */ #define MIPSREG_PC -3 @@ -105,7 +105,7 @@ static int map_MIPS_to_Alpha[32] = { ALPHA_ZERO, -1, ALPHA_T11, -1, /* 0 .. 3 */ ALPHA_T7, ALPHA_T8, -1, -1, /* 4 .. 7 */ - ALPHA_S4, ALPHA_S5, ALPHA_S6, -1, /* 8 .. 11 */ + ALPHA_S4, ALPHA_S5, -1, -1, /* 8 .. 11 */ -1, -1, -1, -1, /* 12 .. 15 */ ALPHA_T9, -1, -1, -1, /* 16 .. 19 */ -1, -1, -1, -1, /* 20 .. 23 */ @@ -150,31 +150,6 @@ } -/* - * lda sp,-128(sp) some margin - * stq ra,0(sp) - * stq s0,8(sp) - * stq s1,16(sp) - * stq s2,24(sp) - * stq s3,32(sp) - * stq s4,40(sp) - * stq s5,48(sp) - * stq s6,56(sp) - * - * jsr ra,(a1), - * back: - * - * ldq ra,0(sp) - * ldq s0,8(sp) - * ldq s1,16(sp) - * ldq s2,24(sp) - * ldq s3,32(sp) - * ldq s4,40(sp) - * ldq s5,48(sp) - * ldq s6,56(sp) - * lda sp,128(sp) - * ret - */ /* note: offsetof (in stdarg.h) could possibly be used, but I'm not sure if it will take care of the compiler problems... */ #define ofs_pc (((size_t)&dummy_cpu.pc) - ((size_t)&dummy_cpu)) @@ -195,139 +170,52 @@ #define ofs_c0 ((size_t)&dummy_vth32_table.bintrans_chunks[0] - (size_t)&dummy_vth32_table) #define ofs_cb (((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu) +#define ofs_h_l (((size_t)&dummy_cpu.cd.mips.host_load) - ((size_t)&dummy_cpu)) +#define ofs_h_s (((size_t)&dummy_cpu.cd.mips.host_store) - ((size_t)&dummy_cpu)) -static uint32_t bintrans_alpha_load_32bit[18] = { - /* - * t1 = 1023; - * t2 = ((a1 >> 22) & t1) * sizeof(void *); - * t3 = ((a1 >> 12) & t1) * sizeof(void *); - * t1 = a1 & 4095; - * - * f8 1f 5f 20 lda t1,1023 * 8 - * 83 76 22 4a srl a1,19,t2 - * 84 36 21 4a srl a1, 9,t3 - * 03 00 62 44 and t2,t1,t2 - */ - 0x205f1ff8, - 0x4a227683, - 0x4a213684, - 0x44620003, - - /* - * t10 is vaddr_to_hostaddr_table0 - * - * a3 = tbl0[t2] (load entry from tbl0) - * 12 04 03 43 addq t10,t2,a2 - */ - 0x43030412, - - /* 04 00 82 44 and t3,t1,t3 */ - 0x44820004, - - /* 00 00 72 a6 ldq a3,0(a2) */ - 0xa6720000, - - /* 24 37 80 48 sll t3,0x1,t3 */ - 0x48803724, - - /* ff 0f 5f 20 lda t1,4095 */ - 0x205f0fff, - /* - * a3 = tbl1[t3] (load entry from tbl1 (which is a3)) - * 13 04 64 42 addq a3,t3,a3 - */ - 0x42640413, - - /* 02 00 22 46 and a1,t1,t1 */ - 0x46220002, - - /* 00 00 73 a6 ldq a3,0(a3) */ - 0xa6730000, +static uint32_t bintrans_alpha_load_32bit[18] = { + 0x4a21f622, /* zapnot a1,0xf,t1 */ + 0x209f0fff, /* lda t3,4095 */ + 0x48419682, /* srl t1,0xc,t1 t1 = addr >> 12 */ + 0x46240004, /* and a1,t3,t3 t3 = addr & 4095 */ + 0x40580642, /* s8addq t1,t10,t1 &host_load[t1] */ + 0xa6620000, /* ldq a3,0(t1) a3 = host_load[t1] */ /* NULL? Then return failure at once. */ - /* beq a3, return */ - 0xe6600002, + 0xe6600002, /* beq a3, return */ /* The rest of the load/store code was written with t3 as the address. */ - /* Add the offset within the page: */ - /* 04 04 62 42 addq a3,t1,t3 */ - 0x42620404, - - 0x6be50000, /* jmp (t4) */ + 0x42640404, /* addq a3,t3,t3 */ + 0x6be58000, /* ret (t4) */ /* return: */ 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16), /* ldah t0,256 */ 0x44270407, /* or t0,t6,t6 */ - 0x6bfa8001 /* ret */ + 0x6bfa8000 /* ret */ }; static uint32_t bintrans_alpha_store_32bit[18] = { - /* - * t1 = 1023; - * t2 = ((a1 >> 22) & t1) * sizeof(void *); - * t3 = ((a1 >> 12) & t1) * sizeof(void *); - * t1 = a1 & 4095; - * - * f8 1f 5f 20 lda t1,1023 * 8 - * 83 76 22 4a srl a1,19,t2 - * 84 36 21 4a srl a1, 9,t3 - * 03 00 62 44 and t2,t1,t2 - */ - 0x205f1ff8, - 0x4a227683, - 0x4a213684, - 0x44620003, - - /* - * t10 is vaddr_to_hostaddr_table0 - * - * a3 = tbl0[t2] (load entry from tbl0) - * 12 04 03 43 addq t10,t2,a2 - */ - 0x43030412, - - /* 04 00 82 44 and t3,t1,t3 */ - 0x44820004, - - /* 00 00 72 a6 ldq a3,0(a2) */ - 0xa6720000, - - /* 24 37 80 48 sll t3,0x1,t3 */ - 0x48803724, - - /* ff 0f 5f 20 lda t1,4095 */ - 0x205f0fff, - - /* - * a3 = tbl1[t3] (load entry from tbl1 (which is a3)) - * 13 04 64 42 addq a3,t3,a3 - */ - 0x42640413, - - /* 02 00 22 46 and a1,t1,t1 */ - 0x46220002, - - /* 00 00 73 a6 ldq a3,8(a3) */ - 0xa6730008, + 0x4a21f622, /* zapnot a1,0xf,t1 */ + 0x209f0fff, /* lda t3,4095 */ + 0x48419682, /* srl t1,0xc,t1 t1 = addr >> 12 */ + 0x46240004, /* and a1,t3,t3 t3 = addr & 4095 */ + 0x404f0642, /* s8addq t1,s6,t1 &host_store[t1] */ + 0xa6620000, /* ldq a3,0(t1) a3 = host_store[t1] */ /* NULL? Then return failure at once. */ /* beq a3, return */ 0xe6600002, /* The rest of the load/store code was written with t3 as the address. */ - - /* Add the offset within the page: */ - /* 04 04 62 42 addq a3,t1,t3 */ - 0x42620404, - - 0x6be50000, /* jmp (t4) */ + 0x42640404, /* addq a3,t3,t3 */ + 0x6be58000, /* ret (t4) */ /* return: */ 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16), /* ldah t0,256 */ 0x44270407, /* or t0,t6,t6 */ - 0x6bfa8001 /* ret */ + 0x6bfa8000 /* ret */ }; static void (*bintrans_runchunk)(struct cpu *, unsigned char *); @@ -369,7 +257,7 @@ static void bintrans_write_chunkreturn(unsigned char **addrp) { uint32_t *a = (uint32_t *) *addrp; - *a++ = 0x6bfa8001; /* ret */ + *a++ = 0x6bfa8000; /* ret */ *addrp = (unsigned char *) a; } @@ -384,7 +272,7 @@ /* 07 04 27 44 or t0,t6,t6 */ *a++ = 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16); *a++ = 0x44270407; - *a++ = 0x6bfa8001; /* ret */ + *a++ = 0x6bfa8000; /* ret */ *addrp = (unsigned char *) a; } @@ -483,7 +371,8 @@ /* * bintrans_write_instruction__addiu_etc(): */ -static int bintrans_write_instruction__addiu_etc(unsigned char **addrp, +static int bintrans_write_instruction__addiu_etc( + struct memory *mem, unsigned char **addrp, int rt, int rs, int imm, int instruction_type) { uint32_t *a; @@ -641,7 +530,8 @@ /* * bintrans_write_instruction__addu_etc(): */ -static int bintrans_write_instruction__addu_etc(unsigned char **addrp, +static int bintrans_write_instruction__addu_etc( + struct memory *mem, unsigned char **addrp, int rd, int rs, int rt, int sa, int instruction_type) { unsigned char *a, *unmodified = NULL; @@ -1110,12 +1000,12 @@ * Perform the jump by setting cpu->delay_slot = 0 * and pc = cpu->delay_jmpaddr. */ + bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T3); + bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_DELAY_JMPADDR, ALPHA_T5); + /* 00 00 3f 21 lda s0,0 */ *a++ = 0; *a++ = 0; *a++ = 0x3f; *a++ = 0x21; - bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_DELAY_JMPADDR, ALPHA_T0); - bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T3); - bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, MIPSREG_PC); } if (potential_chunk_p == NULL) { @@ -1172,21 +1062,21 @@ * 04 00 82 44 and t3,t1,t3 * a3 05 24 40 cmpeq t0,t3,t2 * 01 00 60 f4 bne t2,7c - * 01 80 fa 6b ret + * 00 80 fa 6b ret */ *a++ = 0x00; *a++ = 0xf0; *a++ = 0x5f; *a++ = 0x20; /* lda */ *a++ = 0x01; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and */ *a++ = 0x04; *a++ = 0x00; *a++ = 0x82; *a++ = 0x44; /* and */ *a++ = 0xa3; *a++ = 0x05; *a++ = 0x24; *a++ = 0x40; /* cmpeq */ *a++ = 0x01; *a++ = 0x00; *a++ = 0x60; *a++ = 0xf4; /* bne */ - *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ + *a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ /* Don't execute too many instructions. (see comment below) */ *a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255; *a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */ *a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */ *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */ - *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ + *a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ /* 15 bits at a time, which means max 60 bits, but that should be enough. the top 4 bits are probably @@ -1253,7 +1143,7 @@ *a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */ /* Failure, then return to the main loop. */ - *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ + *a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ } } else { /* @@ -1275,7 +1165,7 @@ *a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */ *a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */ *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */ - *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ + *a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ } /* @@ -1357,7 +1247,7 @@ *a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */ /* "Failure", then let's return to the main loop. */ - *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ + *a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ } } @@ -1377,12 +1267,14 @@ */ static int bintrans_write_instruction__loadstore( struct memory *mem, unsigned char **addrp, - int rt, int imm, int rs, int instruction_type, int bigendian) + int rt, int imm, int rs, int instruction_type, int bigendian, + int do_alignment_check) { unsigned char *a, *fail, *generic64bit = NULL, *generic64bitA = NULL; unsigned char *doloadstore = NULL, *ok_unaligned_load3, *ok_unaligned_load2, *ok_unaligned_load1; uint32_t *b; + int need_imm_lda = 0; int ofs, alignment, load = 0, alpha_rs, alpha_rt, unaligned = 0; /* TODO: Not yet: */ @@ -1425,16 +1317,21 @@ /* * a1 = gpr[rs] + imm; * - * 88 08 30 a4 ldq t0,2184(a0) - * 34 12 21 22 lda a1,4660(t0) + * rs_ofs 30 a4 ldq a1,rs(a0) + * imm 21 22 lda a1,imm(a1) */ alpha_rs = map_MIPS_to_Alpha[rs]; if (alpha_rs < 0) { - bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); - alpha_rs = ALPHA_T0; + bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_A1); + alpha_rs = ALPHA_A1; + } else + need_imm_lda = 1; + + if (imm != 0 || need_imm_lda) { + *a++ = imm; *a++ = (imm >> 8); + *a++ = 0x20 + alpha_rs; *a++ = 0x22; } - *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22; alignment = 0; switch (instruction_type) { @@ -1477,7 +1374,7 @@ */ *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; *a++ = 0x31; *a++ = 0x05; *a++ = 0x22; *a++ = 0x42; - } else if (alignment > 0) { + } else if (alignment > 0 && do_alignment_check) { /* * Check alignment: * @@ -1486,7 +1383,7 @@ * 02 f0 20 46 and a1,0x7,t1 * 02 f0 21 46 and a1,0xf,t1 * 01 00 40 e4 beq t1, - * 01 80 fa 6b ret + * 00 80 fa 6b ret */ *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; fail = a; @@ -2607,6 +2504,10 @@ case CALL_ERET: case CALL_BREAK: case CALL_SYSCALL: + /* Increase the nr of instructions: */ + a = (uint32_t *) *addrp; + *a++ = 0x20e70001; /* lda t6,1(t6) */ + *addrp = (unsigned char *) a; break; default: bintrans_write_pc_inc(addrp); @@ -2667,8 +2568,8 @@ *p++ = 0xa5900000 | ofs_ra; /* ldq s3,"gpr[ra]"(a0) */ *p++ = 0xa5b00000 | ofs_t0; /* ldq s4,"gpr[t0]"(a0) */ *p++ = 0xa5d00000 | ofs_t1; /* ldq s5,"gpr[t1]"(a0) */ - *p++ = 0xa5f00000 | ofs_t2; /* ldq s6,"gpr[t2]"(a0) */ - *p++ = 0xa7100000 | ofs_tbl0; /* ldq t10,table0(a0) */ + *p++ = 0xa5f00000 | ofs_h_s; /* ldq s6,host_store(a0) */ + *p++ = 0xa7100000 | ofs_h_l; /* ldq t10,host_load(a0) */ *p++ = 0xa7300000 | ofs_v0; /* ldq t11,"gpr[v0]"(a0) */ *p++ = 0x6b514000; /* jsr ra,(a1), */ @@ -2684,7 +2585,6 @@ *p++ = 0xb5900000 | ofs_ra; /* stq s3,"gpr[ra]"(a0) */ *p++ = 0xb5b00000 | ofs_t0; /* stq s4,"gpr[t0]"(a0) */ *p++ = 0xb5d00000 | ofs_t1; /* stq s5,"gpr[t1]"(a0) */ - *p++ = 0xb5f00000 | ofs_t2; /* stq s6,"gpr[t2]"(a0) */ *p++ = 0xb7300000 | ofs_v0; /* stq t11,"gpr[v0]"(a0) */ *p++ = 0xa75e0000; /* ldq ra,0(sp) */ @@ -2697,7 +2597,7 @@ *p++ = 0xa5fe0038; /* ldq s6,56(sp) */ *p++ = 0xa7be0058; /* ldq gp,0x58(sp) */ *p++ = 0x23de0060; /* lda sp,0x60(sp) */ - *p++ = 0x6bfa8001; /* ret */ + *p++ = 0x6bfa8000; /* ret */ /* "jump to 32bit pc": */ @@ -2723,27 +2623,25 @@ q = p; /* *q is updated later */ *p++ = 0xe4200001; /* beq ret (far below) */ - *p++ = 0x40c01411; /* addq t5,0,a1 */ - /* * Special case for 32-bit addressing: * * t1 = 1023; - * t2 = ((a1 >> 22) & t1) * sizeof(void *); - * t3 = ((a1 >> 12) & t1) * sizeof(void *); - * t1 = a1 & 4095; + * t2 = ((pc >> 22) & t1) * sizeof(void *); + * t3 = ((pc >> 12) & t1) * sizeof(void *); + * t1 = pc & 4095; */ *p++ = 0x205f1ff8; /* lda t1,1023 * 8 */ - *p++ = 0x4a227683; /* srl a1,19,t2 */ - *p++ = 0x4a213684; /* srl a1, 9,t3 */ + *p++ = 0x48c27683; /* srl t5,19,t2 */ + *p++ = 0x48c13684; /* srl t5, 9,t3 */ + *p++ = 0x44620003; /* and t2,t1,t2 */ - /* - * t10 is vaddr_to_hostaddr_table0 - * - * a3 = tbl0[t2] (load entry from tbl0) - */ - *p++ = 0x43030412; /* addq t10,t2,a2 */ + /* ldq a2, vaddr_to_hostaddr_table0(a0) */ + *p++ = 0xa6500000 | ofs_tbl0; + + /* a3 = tbl0[t2] (load entry from tbl0) */ + *p++ = 0x40720412; /* addq t2,a2,a2 */ *p++ = 0x44820004; /* and t3,t1,t3 */ *p++ = 0xa6720000; /* ldq a3,0(a2) */ *p++ = 0x205f0ffc; /* lda t1,0xffc */ @@ -2752,10 +2650,11 @@ * a3 = tbl1[t3] (load entry from tbl1 (which is a3)) */ *p++ = 0x42640413; /* addq a3,t3,a3 */ - *p++ = 0x46220002; /* and a1,t1,t1 */ *p++ = 0xa6730000 | ofs_c0; /* ldq a3,chunks[0](a3) */ + *p++ = 0x44c20002; /* and t5,t1,t1 */ + /* * NULL? Then just return. */ @@ -2765,11 +2664,11 @@ *p++ = 0x40530402; /* addq t1,a3,t1 */ *p++ = 0xa0220000; /* ldl t0,0(t1) */ - /* No translation? Then return. */ - *p++ = 0xe4200003; /* beq t0, */ - *p++ = 0xa4700000 | ofs_cb; /* ldq t2,chunk_base_address(a0) */ + /* No translation? Then return. */ + *p++ = 0xe4200002; /* beq t0, */ + *p++ = 0x40230401; /* addq t0,t2,t0 */ *p++ = 0x6be10000; /* jmp (t0) */