--- trunk/src/cpu_arm_instr.c 2007/10/08 16:18:27 10 +++ trunk/src/cpu_arm_instr.c 2007/10/08 16:18:38 12 @@ -25,11 +25,11 @@ * SUCH DAMAGE. * * - * $Id: cpu_arm_instr.c,v 1.20 2005/06/27 09:20:19 debug Exp $ + * $Id: cpu_arm_instr.c,v 1.51 2005/08/16 05:37:10 debug Exp $ * * ARM instructions. * - * Individual functions should keep track of cpu->cd.arm.n_translated_instrs. + * Individual functions should keep track of cpu->n_translated_instrs. * (If no instruction was executed, then it should be decreased. If, say, 4 * instructions were combined into one function and executed, then it should * be increased by 3.) @@ -53,9 +53,6 @@ * all of these functions. */ -#define X(n) void arm_instr_ ## n(struct cpu *cpu, \ - struct arm_instr_call *ic) - #define Y(n) void arm_instr_ ## n ## __eq(struct cpu *cpu, \ struct arm_instr_call *ic) \ { if (cpu->cd.arm.flags & ARM_FLAG_Z) \ @@ -134,32 +131,6 @@ #define cond_instr(n) ( arm_cond_instr_ ## n [condition_code] ) -/* This is for marking a physical page as containing combined instructions: */ -#define combined (cpu->cd.arm.cur_physpage->flags |= ARM_COMBINATIONS) - - -void arm_translate_instruction(struct cpu *cpu, struct arm_instr_call *ic); - - -/* - * nothing: Do nothing. - * - * The difference between this function and the "nop" instruction is that - * this function does not increase the program counter or the number of - * translated instructions. It is used to "get out" of running in translated - * mode. - */ -X(nothing) -{ - cpu->cd.arm.running_translated = 0; - cpu->cd.arm.n_translated_instrs --; - cpu->cd.arm.next_ic --; -} - - -static struct arm_instr_call nothing_call = { instr(nothing), {0,0,0} }; - - /*****************************************************************************/ @@ -178,24 +149,18 @@ */ X(b) { - int low_pc; - uint32_t old_pc; - - /* fatal("b: arg[0] = 0x%08x, pc=0x%08x\n", ic->arg[0], cpu->pc); */ + uint32_t low_pc; /* Calculate new PC from this instruction + arg[0] */ low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((IC_ENTRIES_PER_PAGE-1) << 2); + cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << 2); cpu->cd.arm.r[ARM_PC] += (low_pc << 2); - old_pc = cpu->cd.arm.r[ARM_PC]; - /* fatal("b: 3: old_pc=0x%08x\n", old_pc); */ cpu->cd.arm.r[ARM_PC] += (int32_t)ic->arg[0]; cpu->pc = cpu->cd.arm.r[ARM_PC]; - /* fatal("b: 2: pc=0x%08x\n", cpu->pc); */ - fatal("b: different page! TODO\n"); - exit(1); + /* Find the new physical page and update the translation pointers: */ + arm_pc_to_pointers(cpu); } Y(b) @@ -216,24 +181,64 @@ * bl: Branch and Link (to a different translated page) * * arg[0] = relative address - * - * TODO: Implement this. - * TODO: How about function call trace? */ X(bl) { - fatal("bl different page: TODO\n"); - exit(1); + uint32_t lr, low_pc; + + /* Figure out what the return (link) address will be: */ + low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) + cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); + lr = cpu->cd.arm.r[ARM_PC]; + lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << 2); + lr += (low_pc << 2); + + /* Link: */ + cpu->cd.arm.r[ARM_LR] = lr; + + /* Calculate new PC from this instruction + arg[0] */ + cpu->pc = cpu->cd.arm.r[ARM_PC] = lr - 4 + (int32_t)ic->arg[0]; + + /* Find the new physical page and update the translation pointers: */ + arm_pc_to_pointers(cpu); } Y(bl) /* + * bl_trace: Branch and Link (to a different translated page), with trace + * + * Same as for bl. + */ +X(bl_trace) +{ + uint32_t lr, low_pc; + + /* Figure out what the return (link) address will be: */ + low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) + cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); + lr = cpu->cd.arm.r[ARM_PC]; + lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << 2); + lr += (low_pc << 2); + + /* Link: */ + cpu->cd.arm.r[ARM_LR] = lr; + + /* Calculate new PC from this instruction + arg[0] */ + cpu->pc = cpu->cd.arm.r[ARM_PC] = lr - 4 + (int32_t)ic->arg[0]; + + cpu_functioncall_trace(cpu, cpu->pc); + + /* Find the new physical page and update the translation pointers: */ + arm_pc_to_pointers(cpu); +} +Y(bl_trace) + + +/* * bl_samepage: A branch + link within the same page * * arg[0] = pointer to new arm_instr_call - * - * TODO: How about function call trace? */ X(bl_samepage) { @@ -243,7 +248,7 @@ low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); lr = cpu->cd.arm.r[ARM_PC]; - lr &= ~((IC_ENTRIES_PER_PAGE-1) << 2); + lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << 2); lr += (low_pc << 2); /* Link: */ @@ -256,222 +261,148 @@ /* - * mov: Set a 32-bit register to a 32-bit value. + * bl_samepage_trace: Branch and Link (to the same page), with trace * - * arg[0] = pointer to uint32_t in host memory - * arg[1] = 32-bit value + * Same as for bl_samepage. */ -X(mov) +X(bl_samepage_trace) { - *((uint32_t *)ic->arg[0]) = ic->arg[1]; -} -Y(mov) + uint32_t tmp_pc, lr, low_pc; + /* Figure out what the return (link) address will be: */ + low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) + cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); + lr = cpu->cd.arm.r[ARM_PC]; + lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << 2); + lr += (low_pc << 2); -/* - * clear: Set a 32-bit register to 0. (A "mov" to fixed value zero.) - * - * arg[0] = pointer to uint32_t in host memory - */ -X(clear) -{ - *((uint32_t *)ic->arg[0]) = 0; -} -Y(clear) + /* Link: */ + cpu->cd.arm.r[ARM_LR] = lr; + /* Branch: */ + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic->arg[0]; -/* - * load_byte_imm: Load an 8-bit byte from emulated memory and store it in - * a 32-bit word in host memory. - * - * arg[0] = pointer to uint32_t in host memory of base address - * arg[1] = 32-bit offset - * arg[2] = pointer to uint32_t in host memory where to store the value - */ -X(load_byte_imm) -{ - unsigned char data[1]; - uint32_t addr = *((uint32_t *)ic->arg[0]) + ic->arg[1]; - if (!cpu->memory_rw(cpu, cpu->mem, addr, data, sizeof(data), - MEM_READ, CACHE_DATA)) { - fatal("load failed: TODO\n"); - exit(1); - } - *((uint32_t *)ic->arg[2]) = data[0]; + low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) + cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); + tmp_pc = cpu->cd.arm.r[ARM_PC]; + tmp_pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << 2); + tmp_pc += (low_pc << 2); + cpu_functioncall_trace(cpu, tmp_pc); } -Y(load_byte_imm) +Y(bl_samepage_trace) /* - * load_byte_w_imm: - * Load an 8-bit byte from emulated memory and store it in - * a 32-bit word in host memory, with address writeback. + * mov_pc: "mov pc,reg" * - * arg[0] = pointer to uint32_t in host memory of base address - * arg[1] = 32-bit offset - * arg[2] = pointer to uint32_t in host memory where to store the value + * arg[0] = pointer to uint32_t in host memory of source register */ -X(load_byte_w_imm) +X(mov_pc) { - unsigned char data[1]; - uint32_t addr = *((uint32_t *)ic->arg[0]) + ic->arg[1]; - if (!cpu->memory_rw(cpu, cpu->mem, addr, data, sizeof(data), - MEM_READ, CACHE_DATA)) { - fatal("load failed: TODO\n"); - exit(1); - } - *((uint32_t *)ic->arg[2]) = data[0]; - *((uint32_t *)ic->arg[0]) = addr; -} -Y(load_byte_w_imm) + uint32_t old_pc = cpu->cd.arm.r[ARM_PC]; + uint32_t mask_within_page = ((ARM_IC_ENTRIES_PER_PAGE-1) << 2) | 3; + /* Update the PC register: */ + cpu->pc = cpu->cd.arm.r[ARM_PC] = *((uint32_t *)ic->arg[0]); -/* - * load_byte_wpost_imm: - * Load an 8-bit byte from emulated memory and store it in - * a 32-bit word in host memory, with address writeback AFTER the load. - * - * arg[0] = pointer to uint32_t in host memory of base address - * arg[1] = 32-bit offset - * arg[2] = pointer to uint32_t in host memory where to store the value - */ -X(load_byte_wpost_imm) -{ - unsigned char data[1]; - uint32_t addr = *((uint32_t *)ic->arg[0]); - if (!cpu->memory_rw(cpu, cpu->mem, addr, data, sizeof(data), - MEM_READ, CACHE_DATA)) { - fatal("load failed: TODO\n"); - exit(1); + /* + * Is this a return to code within the same page? Then there is no + * need to update all pointers, just next_ic. + */ + if ((old_pc & ~mask_within_page) == (cpu->pc & ~mask_within_page)) { + cpu->cd.arm.next_ic = cpu->cd.arm.cur_ic_page + + ((cpu->pc & mask_within_page) >> 2); + } else { + /* Find the new physical page and update pointers: */ + arm_pc_to_pointers(cpu); } - *((uint32_t *)ic->arg[2]) = data[0]; - *((uint32_t *)ic->arg[0]) = addr + ic->arg[1]; } -Y(load_byte_wpost_imm) +Y(mov_pc) /* - * store_byte_imm: Load a word from a 32-bit word in host memory, and store - * the lowest 8 bits of that word at an emulated memory - * address. + * ret_trace: "mov pc,lr" with trace enabled * - * arg[0] = pointer to uint32_t in host memory of base address - * arg[1] = 32-bit offset - * arg[2] = pointer to uint32_t in host memory where to load the value from + * arg[0] = ignored (similar to mov_pc above) */ -X(store_byte_imm) +X(ret_trace) { - unsigned char data[1]; - uint32_t addr = *((uint32_t *)ic->arg[0]) + ic->arg[1]; - data[0] = *((uint32_t *)ic->arg[2]); - if (!cpu->memory_rw(cpu, cpu->mem, addr, data, sizeof(data), - MEM_WRITE, CACHE_DATA)) { - fatal("store failed: TODO\n"); - exit(1); - } -} -Y(store_byte_imm) + uint32_t old_pc = cpu->cd.arm.r[ARM_PC]; + uint32_t mask_within_page = ((ARM_IC_ENTRIES_PER_PAGE-1) << 2) | 3; + /* Update the PC register: */ + cpu->pc = cpu->cd.arm.r[ARM_PC] = cpu->cd.arm.r[ARM_LR]; -/* - * store_byte_wpost_imm: - * Load a word from a 32-bit word in host memory, and store - * the lowest 8 bits of that word at an emulated memory address. - * Then add the immediate offset to the address, and write back - * to the first word. - * - * arg[0] = pointer to uint32_t in host memory of base address - * arg[1] = 32-bit offset - * arg[2] = pointer to uint32_t in host memory where to load the value from - */ -X(store_byte_wpost_imm) -{ - unsigned char data[1]; - uint32_t addr = *((uint32_t *)ic->arg[0]); - data[0] = *((uint32_t *)ic->arg[2]); - if (!cpu->memory_rw(cpu, cpu->mem, addr, data, sizeof(data), - MEM_WRITE, CACHE_DATA)) { - fatal("store failed: TODO\n"); - exit(1); + cpu_functioncall_trace_return(cpu); + + /* + * Is this a return to code within the same page? Then there is no + * need to update all pointers, just next_ic. + */ + if ((old_pc & ~mask_within_page) == (cpu->pc & ~mask_within_page)) { + cpu->cd.arm.next_ic = cpu->cd.arm.cur_ic_page + + ((cpu->pc & mask_within_page) >> 2); + } else { + /* Find the new physical page and update pointers: */ + arm_pc_to_pointers(cpu); } - *((uint32_t *)ic->arg[0]) = addr + ic->arg[1]; } -Y(store_byte_wpost_imm) +Y(ret_trace) /* - * load_word_imm: - * Load a 32-bit word from emulated memory and store it in - * a 32-bit word in host memory. + * mov_regreg: * - * arg[0] = pointer to uint32_t in host memory of base address - * arg[1] = 32-bit offset - * arg[2] = pointer to uint32_t in host memory where to store the value + * arg[0] = pointer to uint32_t in host memory of destination register + * arg[1] = pointer to uint32_t in host memory of source register */ -X(load_word_imm) +X(mov_regreg) { - unsigned char data[sizeof(uint32_t)]; - uint32_t addr = *((uint32_t *)ic->arg[0]) + ic->arg[1]; - if (!cpu->memory_rw(cpu, cpu->mem, addr, data, sizeof(data), - MEM_READ, CACHE_DATA)) { - fatal("load word failed: TODO\n"); - exit(1); - } - /* TODO: Big endian */ - *((uint32_t *)ic->arg[2]) = data[0] + (data[1] << 8) + - (data[2] << 16) + (data[3] << 24); + *((uint32_t *)ic->arg[0]) = *((uint32_t *)ic->arg[1]); } -Y(load_word_imm) +Y(mov_regreg) /* - * load_word_w_imm: - * Load a 32-bit word from emulated memory and store it in - * a 32-bit word in host memory, with address writeback. + * mov: Set a 32-bit register to a 32-bit value. * - * arg[0] = pointer to uint32_t in host memory of base address - * arg[1] = 32-bit offset - * arg[2] = pointer to uint32_t in host memory where to store the value + * arg[0] = pointer to uint32_t in host memory + * arg[1] = 32-bit value */ -X(load_word_w_imm) +X(mov) { - unsigned char data[sizeof(uint32_t)]; - uint32_t addr = *((uint32_t *)ic->arg[0]) + ic->arg[1]; - if (!cpu->memory_rw(cpu, cpu->mem, addr, data, sizeof(data), - MEM_READ, CACHE_DATA)) { - fatal("load word failed: TODO\n"); - exit(1); - } - /* TODO: Big endian */ - *((uint32_t *)ic->arg[2]) = data[0] + (data[1] << 8) + - (data[2] << 16) + (data[3] << 24); - *((uint32_t *)ic->arg[0]) = addr; + *((uint32_t *)ic->arg[0]) = ic->arg[1]; } -Y(load_word_w_imm) +Y(mov) /* - * store_word_imm: Load a 32-bit word from host memory and store it - * in emulated memory. + * clear: Set a 32-bit register to 0. (A "mov" to fixed value zero.) * - * arg[0] = pointer to uint32_t in host memory of base address - * arg[1] = 32-bit offset - * arg[2] = pointer to uint32_t in host memory where to load the value from. + * arg[0] = pointer to uint32_t in host memory */ -X(store_word_imm) +X(clear) { - unsigned char data[sizeof(uint32_t)]; - uint32_t addr = *((uint32_t *)ic->arg[0]) + ic->arg[1]; - uint32_t x = *((uint32_t *)ic->arg[2]); - /* TODO: Big endian */ - data[0] = x; data[1] = x >> 8; data[2] = x >> 16; data[3] = x >> 24; - if (!cpu->memory_rw(cpu, cpu->mem, addr, data, sizeof(data), - MEM_WRITE, CACHE_DATA)) { - fatal("store word failed: TODO\n"); - exit(1); - } + *((uint32_t *)ic->arg[0]) = 0; } -Y(store_word_imm) +Y(clear) + + +#include "tmp_arm_include.c" + + +#define A__NAME arm_instr_store_w0_byte_u1_p0_imm_fixinc1 +#define A__NAME__general arm_instr_store_w0_byte_u1_p0_imm_fixinc1__general +#define A__B +#define A__U +#define A__NOCONDITIONS +#define A__FIXINC 1 +#include "cpu_arm_instr_loadstore.c" +#undef A__NOCONDITIONS +#undef A__B +#undef A__U +#undef A__NAME__general +#undef A__NAME /* @@ -490,7 +421,7 @@ low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((IC_ENTRIES_PER_PAGE-1) << 2); + cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << 2); cpu->cd.arm.r[ARM_PC] += (low_pc << 2); addr = cpu->cd.arm.r[ARM_PC] + 8 + ic->arg[1]; @@ -520,7 +451,7 @@ low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((IC_ENTRIES_PER_PAGE-1) << 2); + cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << 2); cpu->cd.arm.r[ARM_PC] += (low_pc << 2); addr = cpu->cd.arm.r[ARM_PC] + 8 + ic->arg[1]; @@ -537,6 +468,151 @@ /* + * bdt_load: Block Data Transfer, Load + * + * arg[0] = pointer to uint32_t in host memory, pointing to the base register + * arg[1] = 32-bit instruction word. Most bits are read from this. + */ +X(bdt_load) +{ + unsigned char data[4]; + uint32_t *np = (uint32_t *)ic->arg[0]; + uint32_t addr = *np; + uint32_t iw = ic->arg[1]; /* xxxx100P USWLnnnn llllllll llllllll */ + int p_bit = iw & 0x01000000; + int u_bit = iw & 0x00800000; + int s_bit = iw & 0x00400000; + int w_bit = iw & 0x00200000; + int i; + + if (s_bit) { + fatal("bdt: TODO: s-bit\n"); + exit(1); + } + + for (i=(u_bit? 0 : 15); i>=0 && i<=15; i+=(u_bit? 1 : -1)) + if ((iw >> i) & 1) { + /* Load register i: */ + if (p_bit) { + if (u_bit) + addr += sizeof(uint32_t); + else + addr -= sizeof(uint32_t); + } + if (!cpu->memory_rw(cpu, cpu->mem, addr, data, + sizeof(data), MEM_READ, CACHE_DATA)) { + fatal("bdt: load failed: TODO\n"); + exit(1); + } + if (cpu->byte_order == EMUL_LITTLE_ENDIAN) { + cpu->cd.arm.r[i] = data[0] + + (data[1] << 8) + (data[2] << 16) + + (data[3] << 24); + } else { + cpu->cd.arm.r[i] = data[3] + + (data[2] << 8) + (data[1] << 16) + + (data[0] << 24); + } + /* NOTE: Special case: */ + if (i == ARM_PC) { + cpu->cd.arm.r[ARM_PC] &= ~3; + cpu->pc = cpu->cd.arm.r[ARM_PC]; + /* TODO: There is no need to update the + pointers if this is a return to the + same page! */ + /* Find the new physical page and update the + translation pointers: */ + arm_pc_to_pointers(cpu); + } + if (!p_bit) { + if (u_bit) + addr += sizeof(uint32_t); + else + addr -= sizeof(uint32_t); + } + } + + if (w_bit) + *np = addr; +} +Y(bdt_load) + + +/* + * bdt_store: Block Data Transfer, Store + * + * arg[0] = pointer to uint32_t in host memory, pointing to the base register + * arg[1] = 32-bit instruction word. Most bits are read from this. + */ +X(bdt_store) +{ + unsigned char data[4]; + uint32_t *np = (uint32_t *)ic->arg[0]; + uint32_t addr = *np; + uint32_t iw = ic->arg[1]; /* xxxx100P USWLnnnn llllllll llllllll */ + int p_bit = iw & 0x01000000; + int u_bit = iw & 0x00800000; + int s_bit = iw & 0x00400000; + int w_bit = iw & 0x00200000; + int i; + + if (s_bit) { + fatal("bdt: TODO: s-bit\n"); + exit(1); + } + + if (iw & 0x8000) { + /* Synchronize the program counter: */ + uint32_t low_pc = ((size_t)ic - (size_t) + cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); + cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << 2); + cpu->cd.arm.r[ARM_PC] += (low_pc << 2); + cpu->pc = cpu->cd.arm.r[ARM_PC]; + } + + for (i=(u_bit? 0 : 15); i>=0 && i<=15; i+=(u_bit? 1 : -1)) + if ((iw >> i) & 1) { + /* Store register i: */ + uint32_t value = cpu->cd.arm.r[i]; + if (i == ARM_PC) + value += 12; /* TODO: 8 on some ARMs? */ + if (p_bit) { + if (u_bit) + addr += sizeof(uint32_t); + else + addr -= sizeof(uint32_t); + } + if (cpu->byte_order == EMUL_LITTLE_ENDIAN) { + data[0] = value; + data[1] = value >> 8; + data[2] = value >> 16; + data[3] = value >> 24; + } else { + data[0] = value >> 24; + data[1] = value >> 16; + data[2] = value >> 8; + data[3] = value; + } + if (!cpu->memory_rw(cpu, cpu->mem, addr, data, + sizeof(data), MEM_WRITE, CACHE_DATA)) { + fatal("bdt: store failed: TODO\n"); + exit(1); + } + if (!p_bit) { + if (u_bit) + addr += sizeof(uint32_t); + else + addr -= sizeof(uint32_t); + } + } + + if (w_bit) + *np = addr; +} +Y(bdt_store) + + +/* * cmps: Compare a 32-bit register to a 32-bit value. (Subtraction.) * * arg[0] = pointer to uint32_t in host memory @@ -549,9 +625,11 @@ a = *((uint32_t *)ic->arg[0]); b = ic->arg[1]; - c = a - b; cpu->cd.arm.flags &= ~(ARM_FLAG_Z | ARM_FLAG_N | ARM_FLAG_V | ARM_FLAG_C); + c = a - b; + if (a > b) + cpu->cd.arm.flags |= ARM_FLAG_C; if (c == 0) cpu->cd.arm.flags |= ARM_FLAG_Z; if ((int32_t)c < 0) { @@ -559,17 +637,19 @@ n = 1; } else n = 0; - v = !n; if ((int32_t)a >= (int32_t)b) v = n; + else + v = !n; if (v) cpu->cd.arm.flags |= ARM_FLAG_V; - if (a > b) - cpu->cd.arm.flags |= ARM_FLAG_C; } Y(cmps) +#include "cpu_arm_instr_cmps.c" + + /* * sub: Subtract an immediate value from a 32-bit word, and store the * result in a 32-bit word. @@ -610,6 +690,9 @@ Y(add_self) +#include "tmp_arm_include_self.c" + + /*****************************************************************************/ @@ -623,37 +706,83 @@ *((uint32_t *)ic[0].arg[0]) = ic[0].arg[1]; *((uint32_t *)ic[1].arg[0]) = ic[1].arg[1]; cpu->cd.arm.next_ic ++; - cpu->cd.arm.n_translated_instrs ++; + cpu->n_translated_instrs ++; } -/*****************************************************************************/ +/* + * fill_loop_test: + * + * A byte-fill loop. Fills at most one page at a time. If the page was not + * in the host_store table, then the original sequence (beginning with + * cmps r2,#0) is executed instead. + * + * Z:cmps r2,#0 ic[0] + * strb rX,[rY],#1 ic[1] + * sub r2,r2,#1 ic[2] + * bgt Z ic[3] + */ +X(fill_loop_test) +{ + uint32_t addr, a, n, ofs, maxlen; + unsigned char *page; + addr = *((uint32_t *)ic[1].arg[0]); + n = cpu->cd.arm.r[2] + 1; + ofs = addr & 0xfff; + maxlen = 4096 - ofs; + if (n > maxlen) + n = maxlen; -X(to_be_translated) -{ - /* Translate the instruction... */ - arm_translate_instruction(cpu, ic); + page = cpu->cd.arm.host_store[addr >> 12]; + if (page == NULL) { + arm_cmps_0[2](cpu, ic); + return; + } + + /* printf("x = %x, n = %i\n", *((uint32_t *)ic[1].arg[2]), n); */ + memset(page + ofs, *((uint32_t *)ic[1].arg[2]), n); + + *((uint32_t *)ic[1].arg[0]) = addr + n; + + cpu->cd.arm.r[2] -= n; + cpu->n_translated_instrs += (4 * n); + + a = cpu->cd.arm.r[2]; + + cpu->cd.arm.flags &= + ~(ARM_FLAG_Z | ARM_FLAG_N | ARM_FLAG_V | ARM_FLAG_C); + if (a != 0) + cpu->cd.arm.flags |= ARM_FLAG_C; + else + cpu->cd.arm.flags |= ARM_FLAG_Z; + if ((int32_t)a < 0) + cpu->cd.arm.flags |= ARM_FLAG_N; + + cpu->n_translated_instrs --; - /* ... and execute it: */ - ic->f(cpu, ic); + if ((int32_t)a > 0) + cpu->cd.arm.next_ic --; + else + cpu->cd.arm.next_ic += 3; } +/*****************************************************************************/ + + X(end_of_page) { - printf("end_of_page()! pc=0x%08x\n", cpu->cd.arm.r[ARM_PC]); - - /* Update the PC: Offset 0, but then go to next page: */ - cpu->cd.arm.r[ARM_PC] &= ~((IC_ENTRIES_PER_PAGE-1) << 2); - cpu->cd.arm.r[ARM_PC] += (IC_ENTRIES_PER_PAGE << 2); + /* Update the PC: (offset 0, but on the next page) */ + cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << 2); + cpu->cd.arm.r[ARM_PC] += (ARM_IC_ENTRIES_PER_PAGE << 2); cpu->pc = cpu->cd.arm.r[ARM_PC]; - /* Find the new (physical) page: */ - /* TODO */ + /* Find the new physical page and update the translation pointers: */ + arm_pc_to_pointers(cpu); - printf("TODO: end_of_page()! new pc=0x%08x\n", cpu->cd.arm.r[ARM_PC]); - exit(1); + /* end_of_page doesn't count as an executed instruction: */ + cpu->n_translated_instrs --; } @@ -665,10 +794,11 @@ * * Combine two or more instructions, if possible, into a single function call. */ -void arm_combine_instructions(struct cpu *cpu, struct arm_instr_call *ic) +void arm_combine_instructions(struct cpu *cpu, struct arm_instr_call *ic, + uint32_t addr) { int n_back; - n_back = (cpu->pc >> 2) & (IC_ENTRIES_PER_PAGE-1); + n_back = (addr >> 2) & (ARM_IC_ENTRIES_PER_PAGE-1); if (n_back >= 1) { /* Double "mov": */ @@ -695,36 +825,66 @@ } } } + + if (n_back >= 3) { + if (ic[-3].f == arm_cmps_0[2] && + ic[-2].f == instr(store_w0_byte_u1_p0_imm) && + ic[-2].arg[1] == 1 && + ic[-1].f == arm_sub_self_1[2] && + ic[ 0].f == instr(b_samepage__gt) && + ic[ 0].arg[0] == (size_t)&ic[-3]) { + ic[-3].f = instr(fill_loop_test); + combined; + } + } + + /* TODO: Combine forward as well */ } +/*****************************************************************************/ + + /* - * arm_translate_instruction(): + * arm_instr_to_be_translated(): * - * Translate an instruction word into an arm_instr_call. + * Translate an instruction word into an arm_instr_call. ic is filled in with + * valid data for the translated instruction, or a "nothing" instruction if + * there was a translation failure. The newly translated instruction is then + * executed. */ -void arm_translate_instruction(struct cpu *cpu, struct arm_instr_call *ic) +X(to_be_translated) { uint32_t addr, low_pc, iword, imm; + unsigned char *page; unsigned char ib[4]; int condition_code, main_opcode, secondary_opcode, s_bit, r16, r12, r8; int p_bit, u_bit, b_bit, w_bit, l_bit; void (*samepage_function)(struct cpu *, struct arm_instr_call *); - /* Make sure that PC is in synch: */ + /* Figure out the (virtual) address of the instruction: */ low_pc = ((size_t)ic - (size_t)cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((IC_ENTRIES_PER_PAGE-1) << 2); - cpu->cd.arm.r[ARM_PC] += (low_pc << 2); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + addr = cpu->cd.arm.r[ARM_PC] & ~((ARM_IC_ENTRIES_PER_PAGE-1) << + ARM_INSTR_ALIGNMENT_SHIFT); + addr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc = cpu->cd.arm.r[ARM_PC] = addr; + addr &= ~0x3; /* Read the instruction word from memory: */ - addr = cpu->pc & ~0x3; + page = cpu->cd.arm.host_load[addr >> 12]; - if (!cpu->memory_rw(cpu, cpu->mem, addr, &ib[0], - sizeof(ib), MEM_READ, CACHE_INSTRUCTION)) { - fatal("arm_translate_instruction(): read failed: TODO\n"); - goto bad; + if (page != NULL) { + /* fatal("TRANSLATION HIT!\n"); */ + memcpy(ib, page + (addr & 0xfff), sizeof(ib)); + } else { + /* fatal("TRANSLATION MISS!\n"); */ + if (!cpu->memory_rw(cpu, cpu->mem, addr, &ib[0], + sizeof(ib), MEM_READ, CACHE_INSTRUCTION)) { + fatal("to_be_translated(): " + "read failed: TODO\n"); + goto bad; + } } if (cpu->byte_order == EMUL_LITTLE_ENDIAN) @@ -735,6 +895,12 @@ /* fatal("{ ARM translating pc=0x%08x iword=0x%08x }\n", addr, iword); */ + +#define DYNTRANS_TO_BE_TRANSLATED_HEAD +#include "cpu_dyntrans.c" +#undef DYNTRANS_TO_BE_TRANSLATED_HEAD + + /* The idea of taking bits 27..24 was found here: http://armphetamine.sourceforge.net/oldinfo.html */ condition_code = iword >> 28; @@ -766,8 +932,34 @@ case 0x2: case 0x3: if ((main_opcode & 2) == 0) { - fatal("REGISTER FORM! TODO\n"); - goto bad; + if ((iword & 0x0ffffff0) == 0x01a0f000) { + /* Hardcoded: mov pc, rX */ + if ((iword & 15) == ARM_PC) { + fatal("mov pc,pc?\n"); + goto bad; + } + ic->f = cond_instr(mov_pc); + ic->arg[0] = (size_t) + (&cpu->cd.arm.r[iword & 15]); + if ((iword & 15) == ARM_LR && + cpu->machine->show_trace_tree) + ic->f = cond_instr(ret_trace); + } else if ((iword & 0x0fff0ff0) == 0x01a00000) { + /* Hardcoded: mov reg,reg */ + if ((iword & 15) == ARM_PC) { + fatal("mov reg,pc?\n"); + goto bad; + } + ic->f = cond_instr(mov_regreg); + ic->arg[0] = (size_t) + (&cpu->cd.arm.r[r12]); + ic->arg[1] = (size_t) + (&cpu->cd.arm.r[iword & 15]); + } else { + fatal("REGISTER FORM! TODO\n"); + goto bad; + } + break; } imm = iword & 0xff; r8 <<= 1; @@ -777,21 +969,33 @@ case 0x2: /* SUB */ case 0x4: /* ADD */ if (s_bit) { - fatal("sub s_bit: TODO\n"); + fatal("add/sub s_bit: TODO\n"); + goto bad; + } + if (r12 == ARM_PC || r16 == ARM_PC) { + fatal("add/sub: PC\n"); goto bad; } switch (secondary_opcode) { case 0x2: - if (r12 == r16) + ic->f = cond_instr(sub); + if (r12 == r16) { ic->f = cond_instr(sub_self); - else - ic->f = cond_instr(sub); + if (imm == 1 && r12 != ARM_PC) + ic->f = arm_sub_self_1[r12]; + if (imm == 4 && r12 != ARM_PC) + ic->f = arm_sub_self_4[r12]; + } break; case 0x4: - if (r12 == r16) + ic->f = cond_instr(add); + if (r12 == r16) { ic->f = cond_instr(add_self); - else - ic->f = cond_instr(add); + if (imm == 1 && r12 != ARM_PC) + ic->f = arm_add_self_1[r12]; + if (imm == 4 && r12 != ARM_PC) + ic->f = arm_add_self_4[r12]; + } break; } ic->arg[0] = (size_t)(&cpu->cd.arm.r[r12]); @@ -806,6 +1010,8 @@ ic->f = cond_instr(cmps); ic->arg[0] = (size_t)(&cpu->cd.arm.r[r16]); ic->arg[1] = imm; + if (imm == 0 && r16 != ARM_PC) + ic->f = arm_cmps_0[r16]; break; case 0xd: /* MOV */ if (s_bit) { @@ -832,17 +1038,21 @@ case 0x6: /* xxxx011P UBWLnnnn ddddcccc ctt0mmmm Register */ case 0x7: p_bit = main_opcode & 1; + ic->f = load_store_instr[((iword >> 16) & 0x3f0) + + condition_code]; + imm = iword & 0xfff; + if (!u_bit) + imm = (int32_t)0-imm; if (main_opcode < 6) { /* Immediate: */ - imm = iword & 0xfff; - if (!u_bit) - imm = (int32_t)0-imm; ic->arg[0] = (size_t)(&cpu->cd.arm.r[r16]); ic->arg[1] = (size_t)(imm); ic->arg[2] = (size_t)(&cpu->cd.arm.r[r12]); } if (main_opcode == 4 && b_bit) { /* Post-index, immediate: */ + if (imm == 1 && !w_bit && l_bit) + ic->f = instr(store_w0_byte_u1_p0_imm_fixinc1); if (w_bit) { fatal("load/store: T-bit\n"); goto bad; @@ -851,24 +1061,13 @@ fatal("load/store writeback PC: error\n"); goto bad; } - if (l_bit) - ic->f = cond_instr(load_byte_wpost_imm); - else - ic->f = cond_instr(store_byte_wpost_imm); } else if (main_opcode == 5) { /* Pre-index, immediate: */ /* ldr(b) Rd,[Rn,#imm] */ if (l_bit) { - if (r12 == ARM_PC) + if (r12 == ARM_PC) { fatal("WARNING: ldr to pc register?\n"); - if (w_bit) { - ic->f = b_bit? - cond_instr(load_byte_w_imm) : - cond_instr(load_word_w_imm); - } else { - ic->f = b_bit? - cond_instr(load_byte_imm) : - cond_instr(load_word_imm); + goto bad; } if (r16 == ARM_PC) { if (w_bit) { @@ -880,17 +1079,10 @@ cond_instr(load_word_imm_pcrel); } } else { - if (w_bit) { - fatal("w bit store etc\n"); - goto bad; - } if (r12 == ARM_PC) { fatal("TODO: store pc\n"); goto bad; } - ic->f = b_bit? - cond_instr(store_byte_imm) : - cond_instr(store_word_imm); if (r16 == ARM_PC) { fatal("TODO: store pc rel\n"); goto bad; @@ -902,27 +1094,49 @@ } break; + case 0x8: /* Multiple load/store... (Block data transfer) */ + case 0x9: /* xxxx100P USWLnnnn llllllll llllllll */ + if (l_bit) + ic->f = cond_instr(bdt_load); + else + ic->f = cond_instr(bdt_store); + ic->arg[0] = (size_t)(&cpu->cd.arm.r[r16]); + ic->arg[1] = (size_t)iword; + if (r16 == ARM_PC) { + fatal("TODO: bdt with PC as base\n"); + goto bad; + } + break; + case 0xa: /* B: branch */ case 0xb: /* BL: branch+link */ if (main_opcode == 0x0a) { ic->f = cond_instr(b); samepage_function = cond_instr(b_samepage); } else { - ic->f = cond_instr(bl); - samepage_function = cond_instr(bl_samepage); + if (cpu->machine->show_trace_tree) { + ic->f = cond_instr(bl_trace); + samepage_function = + cond_instr(bl_samepage_trace); + } else { + ic->f = cond_instr(bl); + samepage_function = cond_instr(bl_samepage); + } } ic->arg[0] = (iword & 0x00ffffff) << 2; /* Sign-extend: */ if (ic->arg[0] & 0x02000000) ic->arg[0] |= 0xfc000000; - /* Branches are calculated as PC + 8 + offset: */ + /* + * Branches are calculated as PC + 8 + offset. + */ ic->arg[0] = (int32_t)(ic->arg[0] + 8); /* Special case: branch within the same page: */ { uint32_t mask_within_page = - ((IC_ENTRIES_PER_PAGE-1) << 2) | 3; + ((ARM_IC_ENTRIES_PER_PAGE-1) << 2) | 3; uint32_t old_pc = addr; uint32_t new_pc = old_pc + (int32_t)ic->arg[0]; if ((old_pc & ~mask_within_page) == @@ -939,31 +1153,8 @@ } - /* - * If we end up here, then an instruction was translated. Now it is - * time to check for combinations of instructions that can be - * converted into a single function call. - */ - - /* Single-stepping doesn't work with combinations: */ - if (single_step || cpu->machine->instruction_trace) - return; - - arm_combine_instructions(cpu, ic); - - return; - - -bad: /* - * Nothing was translated. (Unimplemented or illegal instruction.) - */ - quiet_mode = 0; - fatal("arm_translate_instruction(): TODO: " - "unimplemented ARM instruction:\n"); - arm_cpu_disassemble_instr(cpu, ib, 1, 0, 0); - cpu->running = 0; - cpu->dead = 1; - cpu->cd.arm.running_translated = 0; - *ic = nothing_call; +#define DYNTRANS_TO_BE_TRANSLATED_TAIL +#include "cpu_dyntrans.c" +#undef DYNTRANS_TO_BE_TRANSLATED_TAIL }