--- trunk/src/cpus/cpu_arm_instr.c 2007/10/08 16:19:11 18 +++ trunk/src/cpus/cpu_arm_instr.c 2007/10/08 16:19:23 20 @@ -25,7 +25,7 @@ * SUCH DAMAGE. * * - * $Id: cpu_arm_instr.c,v 1.39 2005/10/27 14:01:13 debug Exp $ + * $Id: cpu_arm_instr.c,v 1.54 2005/11/19 18:53:07 debug Exp $ * * ARM instructions. * @@ -125,69 +125,65 @@ * functions, otherwise they will simply call the X function. */ +uint8_t condition_hi[16] = { 0,0,1,1, 0,0,0,0, 0,0,1,1, 0,0,0,0 }; +uint8_t condition_ge[16] = { 1,0,1,0, 1,0,1,0, 0,1,0,1, 0,1,0,1 }; +uint8_t condition_gt[16] = { 1,0,1,0, 0,0,0,0, 0,1,0,1, 0,0,0,0 }; + #define Y(n) void arm_instr_ ## n ## __eq(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (cpu->cd.arm.cpsr & ARM_FLAG_Z) \ + { if (cpu->cd.arm.flags & ARM_F_Z) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __ne(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (!(cpu->cd.arm.cpsr & ARM_FLAG_Z)) \ + { if (!(cpu->cd.arm.flags & ARM_F_Z)) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __cs(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (cpu->cd.arm.cpsr & ARM_FLAG_C) \ + { if (cpu->cd.arm.flags & ARM_F_C) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __cc(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (!(cpu->cd.arm.cpsr & ARM_FLAG_C)) \ + { if (!(cpu->cd.arm.flags & ARM_F_C)) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __mi(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (cpu->cd.arm.cpsr & ARM_FLAG_N) \ + { if (cpu->cd.arm.flags & ARM_F_N) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __pl(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (!(cpu->cd.arm.cpsr & ARM_FLAG_N)) \ + { if (!(cpu->cd.arm.flags & ARM_F_N)) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __vs(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (cpu->cd.arm.cpsr & ARM_FLAG_V) \ + { if (cpu->cd.arm.flags & ARM_F_V) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __vc(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (!(cpu->cd.arm.cpsr & ARM_FLAG_V)) \ + { if (!(cpu->cd.arm.flags & ARM_F_V)) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __hi(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (cpu->cd.arm.cpsr & ARM_FLAG_C && \ - !(cpu->cd.arm.cpsr & ARM_FLAG_Z)) \ + { if (condition_hi[cpu->cd.arm.flags]) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __ls(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (cpu->cd.arm.cpsr & ARM_FLAG_Z || \ - !(cpu->cd.arm.cpsr & ARM_FLAG_C)) \ + { if (!condition_hi[cpu->cd.arm.flags]) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __ge(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) == \ - ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0)) \ + { if (condition_ge[cpu->cd.arm.flags]) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __lt(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) != \ - ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0)) \ + { if (!condition_ge[cpu->cd.arm.flags]) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __gt(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) == \ - ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0) && \ - !(cpu->cd.arm.cpsr & ARM_FLAG_Z)) \ + { if (condition_gt[cpu->cd.arm.flags]) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __le(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) != \ - ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0) || \ - (cpu->cd.arm.cpsr & ARM_FLAG_Z)) \ + { if (!condition_gt[cpu->cd.arm.flags]) \ arm_instr_ ## n (cpu, ic); } \ void (*arm_cond_instr_ ## n [16])(struct cpu *, \ struct arm_instr_call *) = { \ @@ -215,10 +211,9 @@ uint32_t low_pc; low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); fatal("Invalid ARM instruction: pc=0x%08x\n", (int)cpu->pc); @@ -236,16 +231,7 @@ */ X(b) { - uint32_t low_pc; - - /* Calculate new PC from this instruction + arg[0] */ - low_pc = ((size_t)ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (int32_t)ic->arg[0]; - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc = (uint32_t)((cpu->pc & 0xfffff000) + (int32_t)ic->arg[0]); /* Find the new physical page and update the translation pointers: */ quick_pc_to_pointers(cpu); @@ -257,12 +243,82 @@ * b_samepage: Branch (to within the same translated page) * * arg[0] = pointer to new arm_instr_call + * arg[1] = pointer to the next instruction. + * + * NOTE: This instruction is manually inlined. */ -X(b_samepage) -{ +X(b_samepage) { cpu->cd.arm.next_ic = (struct arm_instr_call *) ic->arg[0]; } -Y(b_samepage) +X(b_samepage__eq) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_Z? 0 : 1]; +} +X(b_samepage__ne) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_Z? 1 : 0]; +} +X(b_samepage__cs) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_C? 0 : 1]; +} +X(b_samepage__cc) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_C? 1 : 0]; +} +X(b_samepage__mi) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_N? 0 : 1]; +} +X(b_samepage__pl) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_N? 1 : 0]; +} +X(b_samepage__vs) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_V? 0 : 1]; +} +X(b_samepage__vc) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_V? 1 : 0]; +} +X(b_samepage__hi) { + cpu->cd.arm.next_ic = (condition_hi[cpu->cd.arm.flags])? + (struct arm_instr_call *) ic->arg[0] : + (struct arm_instr_call *) ic->arg[1]; +} +X(b_samepage__ls) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[condition_hi[cpu->cd.arm.flags]]; +} +X(b_samepage__ge) { + cpu->cd.arm.next_ic = (condition_ge[cpu->cd.arm.flags])? + (struct arm_instr_call *) ic->arg[0] : + (struct arm_instr_call *) ic->arg[1]; +} +X(b_samepage__lt) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[condition_ge[cpu->cd.arm.flags]]; +} +X(b_samepage__gt) { + cpu->cd.arm.next_ic = (condition_gt[cpu->cd.arm.flags])? + (struct arm_instr_call *) ic->arg[0] : + (struct arm_instr_call *) ic->arg[1]; +} +X(b_samepage__le) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[condition_gt[cpu->cd.arm.flags]]; +} +void (*arm_cond_instr_b_samepage[16])(struct cpu *, + struct arm_instr_call *) = { + arm_instr_b_samepage__eq, arm_instr_b_samepage__ne, + arm_instr_b_samepage__cs, arm_instr_b_samepage__cc, + arm_instr_b_samepage__mi, arm_instr_b_samepage__pl, + arm_instr_b_samepage__vs, arm_instr_b_samepage__vc, + arm_instr_b_samepage__hi, arm_instr_b_samepage__ls, + arm_instr_b_samepage__ge, arm_instr_b_samepage__lt, + arm_instr_b_samepage__gt, arm_instr_b_samepage__le, + arm_instr_b_samepage, arm_instr_nop }; /* @@ -272,7 +328,7 @@ */ X(bx) { - cpu->pc = cpu->cd.arm.r[ARM_PC] = reg(ic->arg[0]); + cpu->pc = reg(ic->arg[0]); if (cpu->pc & 1) { fatal("thumb: TODO\n"); exit(1); @@ -292,7 +348,7 @@ */ X(bx_trace) { - cpu->pc = cpu->cd.arm.r[ARM_PC] = cpu->cd.arm.r[ARM_LR]; + cpu->pc = cpu->cd.arm.r[ARM_LR]; if (cpu->pc & 1) { fatal("thumb: TODO\n"); exit(1); @@ -314,20 +370,11 @@ */ X(bl) { - uint32_t lr, low_pc; - - /* Figure out what the return (link) address will be: */ - low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - lr = cpu->cd.arm.r[ARM_PC]; - lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - - /* Link: */ - cpu->cd.arm.r[ARM_LR] = lr; + uint32_t pc = ((uint32_t)cpu->pc & 0xfffff000) + (int32_t)ic->arg[1]; + cpu->cd.arm.r[ARM_LR] = pc + 4; /* Calculate new PC from this instruction + arg[0] */ - cpu->pc = cpu->cd.arm.r[ARM_PC] = lr - 4 + (int32_t)ic->arg[0]; + cpu->pc = pc + (int32_t)ic->arg[0]; /* Find the new physical page and update the translation pointers: */ quick_pc_to_pointers(cpu); @@ -342,19 +389,9 @@ */ X(blx) { - uint32_t lr, low_pc; - - /* Figure out what the return (link) address will be: */ - low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - lr = cpu->cd.arm.r[ARM_PC]; - lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - - /* Link: */ + uint32_t lr = ((uint32_t)cpu->pc & 0xfffff000) + (int32_t)ic->arg[2]; cpu->cd.arm.r[ARM_LR] = lr; - - cpu->pc = cpu->cd.arm.r[ARM_PC] = reg(ic->arg[0]); + cpu->pc = reg(ic->arg[0]); if (cpu->pc & 1) { fatal("thumb: TODO\n"); exit(1); @@ -374,20 +411,11 @@ */ X(bl_trace) { - uint32_t lr, low_pc; - - /* Figure out what the return (link) address will be: */ - low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - lr = cpu->cd.arm.r[ARM_PC]; - lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - - /* Link: */ - cpu->cd.arm.r[ARM_LR] = lr; + uint32_t pc = ((uint32_t)cpu->pc & 0xfffff000) + (int32_t)ic->arg[1]; + cpu->cd.arm.r[ARM_LR] = pc + 4; /* Calculate new PC from this instruction + arg[0] */ - cpu->pc = cpu->cd.arm.r[ARM_PC] = lr - 4 + (int32_t)ic->arg[0]; + cpu->pc = pc + (int32_t)ic->arg[0]; cpu_functioncall_trace(cpu, cpu->pc); @@ -404,19 +432,8 @@ */ X(bl_samepage) { - uint32_t lr, low_pc; - - /* Figure out what the return (link) address will be: */ - low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - lr = cpu->cd.arm.r[ARM_PC]; - lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - - /* Link: */ - cpu->cd.arm.r[ARM_LR] = lr; - - /* Branch: */ + cpu->cd.arm.r[ARM_LR] = + ((uint32_t)cpu->pc & 0xfffff000) + (int32_t)ic->arg[2]; cpu->cd.arm.next_ic = (struct arm_instr_call *) ic->arg[0]; } Y(bl_samepage) @@ -429,33 +446,53 @@ */ X(bl_samepage_trace) { - uint32_t tmp_pc, lr, low_pc; + uint32_t low_pc, lr = (cpu->pc & 0xfffff000) + ic->arg[2]; - /* Figure out what the return (link) address will be: */ - low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - lr = cpu->cd.arm.r[ARM_PC]; - lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - - /* Link: */ + /* Link and branch: */ cpu->cd.arm.r[ARM_LR] = lr; - - /* Branch: */ cpu->cd.arm.next_ic = (struct arm_instr_call *) ic->arg[0]; + /* Synchronize the program counter: */ low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - tmp_pc = cpu->cd.arm.r[ARM_PC]; - tmp_pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - tmp_pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu_functioncall_trace(cpu, tmp_pc); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); + + /* ... and show trace: */ + cpu_functioncall_trace(cpu, cpu->pc); } Y(bl_samepage_trace) -#include "cpu_arm_instr_misc.c" +/* + * clz: Count leading zeroes. + * + * arg[0] = ptr to rm + * arg[1] = ptr to rd + */ +X(clz) +{ + uint32_t rm = reg(ic->arg[0]); + int i = 32, n = 0, j; + while (i>0) { + if (rm & 0xff000000) { + for (j=0; j<8; j++) { + if (rm & 0x80000000) + break; + n ++; + rm <<= 1; + } + break; + } else { + rm <<= 8; + i -= 8; + n += 8; + } + } + reg(ic->arg[1]) = n; +} +Y(clz) /* @@ -474,11 +511,11 @@ { uint32_t result; result = reg(ic->arg[1]) * reg(ic->arg[2]); - cpu->cd.arm.cpsr &= ~(ARM_FLAG_Z | ARM_FLAG_N); + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); if (result == 0) - cpu->cd.arm.cpsr |= ARM_FLAG_Z; + cpu->cd.arm.flags |= ARM_F_Z; if (result & 0x80000000) - cpu->cd.arm.cpsr |= ARM_FLAG_N; + cpu->cd.arm.flags |= ARM_F_N; reg(ic->arg[0]) = result; } Y(muls) @@ -509,11 +546,11 @@ rs = (iw >> 8) & 15; rm = iw & 15; cpu->cd.arm.r[rd] = cpu->cd.arm.r[rm] * cpu->cd.arm.r[rs] + cpu->cd.arm.r[rn]; - cpu->cd.arm.cpsr &= ~(ARM_FLAG_Z | ARM_FLAG_N); + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); if (cpu->cd.arm.r[rd] == 0) - cpu->cd.arm.cpsr |= ARM_FLAG_Z; + cpu->cd.arm.flags |= ARM_F_Z; if (cpu->cd.arm.r[rd] & 0x80000000) - cpu->cd.arm.cpsr |= ARM_FLAG_N; + cpu->cd.arm.flags |= ARM_F_N; } Y(mlas) @@ -528,7 +565,7 @@ /* xxxx0000 1UAShhhh llllssss 1001mmmm */ uint32_t iw; uint64_t tmp; int u_bit, a_bit; iw = ic->arg[0]; - u_bit = (iw >> 22) & 1; a_bit = (iw >> 21) & 1; + u_bit = iw & 0x00400000; a_bit = iw & 0x00200000; tmp = cpu->cd.arm.r[iw & 15]; if (u_bit) tmp = (int64_t)(int32_t)tmp @@ -563,6 +600,19 @@ /* + * mov_reg_pc: Move the PC register to a normal register. + * + * arg[0] = offset compared to start of current page + 8 + * arg[1] = ptr to destination register + */ +X(mov_reg_pc) +{ + reg(ic->arg[1]) = ((uint32_t)cpu->pc&0xfffff000) + (int32_t)ic->arg[0]; +} +Y(mov_reg_pc) + + +/* * ret_trace: "mov pc,lr" with trace enabled * ret: "mov pc,lr" without trace enabled * @@ -571,13 +621,13 @@ X(ret_trace) { uint32_t old_pc, mask_within_page; - old_pc = cpu->cd.arm.r[ARM_PC]; + old_pc = cpu->pc; mask_within_page = ((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT) | ((1 << ARM_INSTR_ALIGNMENT_SHIFT) - 1); /* Update the PC register: */ - cpu->pc = cpu->cd.arm.r[ARM_PC] = cpu->cd.arm.r[ARM_LR]; + cpu->pc = cpu->cd.arm.r[ARM_LR]; cpu_functioncall_trace_return(cpu); @@ -596,7 +646,7 @@ Y(ret_trace) X(ret) { - cpu->pc = cpu->cd.arm.r[ARM_PC] = cpu->cd.arm.r[ARM_LR]; + cpu->pc = cpu->cd.arm.r[ARM_LR]; quick_pc_to_pointers(cpu); } Y(ret) @@ -620,12 +670,17 @@ (ic->arg[0] & ARM_FLAG_MODE)); uint32_t new_value = ic->arg[0]; + cpu->cd.arm.cpsr &= 0x0fffffff; + cpu->cd.arm.cpsr |= (cpu->cd.arm.flags << 28); + if (switch_register_banks) arm_save_register_bank(cpu); cpu->cd.arm.cpsr &= ~mask; cpu->cd.arm.cpsr |= (new_value & mask); + cpu->cd.arm.flags = cpu->cd.arm.cpsr >> 28; + if (switch_register_banks) arm_load_register_bank(cpu); } @@ -667,11 +722,10 @@ /* Synchronize the program counter: */ uint32_t old_pc, low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - old_pc = cpu->pc = cpu->cd.arm.r[ARM_PC]; -printf("msr_spsr: old pc = 0x%08x\n", old_pc); + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); + old_pc = cpu->pc; + printf("msr_spsr: old pc = 0x%08x\n", old_pc); } exit(1); } @@ -692,13 +746,15 @@ */ X(mrs) { + cpu->cd.arm.cpsr &= 0x0fffffff; + cpu->cd.arm.cpsr |= (cpu->cd.arm.flags << 28); reg(ic->arg[0]) = cpu->cd.arm.cpsr; } Y(mrs) /* - * mrs: Move from status/flag register to a normal register. + * mrs: Move from saved status/flag register to a normal register. * * arg[0] = pointer to rd */ @@ -727,24 +783,18 @@ * arg[0] = copy of the instruction word */ X(mcr_mrc) { - uint32_t low_pc; - low_pc = ((size_t)ic - (size_t) + uint32_t low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); arm_mcr_mrc(cpu, ic->arg[0]); } Y(mcr_mrc) X(cdp) { - uint32_t low_pc; - low_pc = ((size_t)ic - (size_t) + uint32_t low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); arm_cdp(cpu, ic->arg[0]); } Y(cdp) @@ -755,8 +805,9 @@ */ X(openfirmware) { + /* TODO: sync pc? */ of_emul(cpu); - cpu->pc = cpu->cd.arm.r[ARM_PC] = cpu->cd.arm.r[ARM_LR]; + cpu->pc = cpu->cd.arm.r[ARM_LR]; if (cpu->machine->show_trace_tree) cpu_functioncall_trace_return(cpu); quick_pc_to_pointers(cpu); @@ -764,6 +815,18 @@ /* + * reboot: + */ +X(reboot) +{ + cpu->running = 0; + cpu->running_translated = 0; + cpu->n_translated_instrs --; + cpu->cd.arm.next_ic = ¬hing_call; +} + + +/* * swi_useremul: Syscall. * * arg[0] = swi number @@ -773,10 +836,10 @@ /* Synchronize the program counter: */ uint32_t old_pc, low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - old_pc = cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); + old_pc = cpu->pc; useremul_syscall(cpu, ic->arg[0]); @@ -798,20 +861,28 @@ */ X(swi) { - /* Synchronize the program counter: */ - uint32_t low_pc = ((size_t)ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; - + /* Synchronize the program counter first: */ + cpu->pc &= 0xfffff000; + cpu->pc += ic->arg[0]; arm_exception(cpu, ARM_EXCEPTION_SWI); } Y(swi) /* + * und: Undefined instruction. + */ +X(und) +{ + /* Synchronize the program counter first: */ + cpu->pc &= 0xfffff000; + cpu->pc += ic->arg[0]; + arm_exception(cpu, ARM_EXCEPTION_UND); +} +Y(und) + + +/* * swp, swpb: Swap (word or byte). * * arg[0] = ptr to rd @@ -822,13 +893,12 @@ { uint32_t addr = reg(ic->arg[2]), data, data2; unsigned char d[4]; + /* Synchronize the program counter: */ uint32_t low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); if (!cpu->memory_rw(cpu, cpu->mem, addr, d, sizeof(d), MEM_READ, CACHE_DATA)) { @@ -850,13 +920,12 @@ { uint32_t addr = reg(ic->arg[2]), data; unsigned char d[1]; + /* Synchronize the program counter: */ uint32_t low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); if (!cpu->memory_rw(cpu, cpu->mem, addr, d, sizeof(d), MEM_READ, CACHE_DATA)) { @@ -877,11 +946,15 @@ extern void (*arm_load_store_instr[1024])(struct cpu *, struct arm_instr_call *); +X(store_w1_word_u1_p0_imm); X(store_w0_byte_u1_p0_imm); X(store_w0_word_u1_p0_imm); +X(store_w0_word_u1_p1_imm); +X(load_w1_word_u1_p0_imm); X(load_w0_word_u1_p0_imm); X(load_w0_byte_u1_p1_imm); X(load_w0_byte_u1_p1_reg); +X(load_w1_byte_u1_p1_imm); extern void (*arm_load_store_instr_pc[1024])(struct cpu *, struct arm_instr_call *); @@ -893,15 +966,24 @@ struct arm_instr_call *); extern uint32_t (*arm_r[8192])(struct cpu *, struct arm_instr_call *); +extern void arm_r_r3_t0_c0(void); extern void (*arm_dpi_instr[2 * 2 * 2 * 16 * 16])(struct cpu *, struct arm_instr_call *); +extern void (*arm_dpi_instr_regshort[2 * 16 * 16])(struct cpu *, + struct arm_instr_call *); X(cmps); +X(teqs); +X(tsts); X(sub); X(add); X(subs); +X(eor_regshort); +X(cmps_regshort); +#include "cpu_arm_instr_misc.c" + /* * bdt_load: Block Data Transfer, Load @@ -931,10 +1013,8 @@ /* Synchronize the program counter: */ low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << - ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); if (s_bit) { /* Load to USR registers: */ @@ -1070,6 +1150,7 @@ arm_save_register_bank(cpu); cpu->cd.arm.cpsr = new_cpsr; + cpu->cd.arm.flags = cpu->cd.arm.cpsr >> 28; if (switch_register_banks) arm_load_register_bank(cpu); @@ -1077,8 +1158,7 @@ /* NOTE: Special case: Loading the PC */ if (iw & 0x8000) { - cpu->cd.arm.r[ARM_PC] &= ~3; - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc = cpu->cd.arm.r[ARM_PC] & 0xfffffffc; if (cpu->machine->show_trace_tree) cpu_functioncall_trace_return(cpu); /* TODO: There is no need to update the @@ -1119,10 +1199,8 @@ /* Synchronize the program counter: */ low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << - ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); for (i=(u_bit? 0 : 15); i>=0 && i<=15; i+=(u_bit? 1 : -1)) { if (!((iw >> i) & 1)) { @@ -1151,8 +1229,9 @@ } } + /* NOTE/TODO: 8 vs 12 on some ARMs */ if (i == ARM_PC) - value += 12; /* NOTE/TODO: 8 on some ARMs */ + value = cpu->pc + 12; if (p_bit) { if (u_bit) @@ -1210,158 +1289,17 @@ /* Various load/store multiple instructions: */ -#include "tmp_arm_multi.c" +uint32_t *multi_opcode[256]; +void (**multi_opcode_f[256])(struct cpu *, struct arm_instr_call *); +X(multi_0x08b15018); +X(multi_0x08ac000c__ge); +X(multi_0x08a05018); /*****************************************************************************/ /* - * fill_loop_test: - * - * A byte-fill loop. Fills at most one page at a time. If the page was not - * in the host_store table, then the original sequence (beginning with - * cmps rZ,#0) is executed instead. - * - * L: cmps rZ,#0 ic[0] - * strb rX,[rY],#1 ic[1] - * sub rZ,rZ,#1 ic[2] - * bgt L ic[3] - * - * A maximum of 4 pages are filled before returning. - */ -X(fill_loop_test) -{ - int max_pages_left = 4; - uint32_t addr, a, n, ofs, maxlen; - uint32_t *rzp = (uint32_t *)(size_t)ic[0].arg[0]; - unsigned char *page; - -restart_loop: - addr = reg(ic[1].arg[0]); - page = cpu->cd.arm.host_store[addr >> 12]; - if (page == NULL) { - instr(cmps)(cpu, ic); - return; - } - - n = reg(rzp) + 1; - ofs = addr & 0xfff; - maxlen = 4096 - ofs; - if (n > maxlen) - n = maxlen; - - /* printf("x = %x, n = %i\n", reg(ic[1].arg[2]), n); */ - memset(page + ofs, reg(ic[1].arg[2]), n); - - reg(ic[1].arg[0]) = addr + n; - - reg(rzp) -= n; - cpu->n_translated_instrs += (4 * n); - - a = reg(rzp); - - cpu->cd.arm.cpsr &= - ~(ARM_FLAG_Z | ARM_FLAG_N | ARM_FLAG_V | ARM_FLAG_C); - if (a != 0) - cpu->cd.arm.cpsr |= ARM_FLAG_C; - else - cpu->cd.arm.cpsr |= ARM_FLAG_Z; - if ((int32_t)a < 0) - cpu->cd.arm.cpsr |= ARM_FLAG_N; - - if (max_pages_left-- > 0 && (int32_t)a > 0) - goto restart_loop; - - cpu->n_translated_instrs --; - - if ((int32_t)a > 0) - cpu->cd.arm.next_ic = ic; - else - cpu->cd.arm.next_ic = &ic[4]; -} - - -/* - * fill_loop_test2: - * - * A word-fill loop. Fills at most one page at a time. If the page was not - * in the host_store table, then the original sequence (beginning with - * cmps rZ,#0) is executed instead. - * - * L: str rX,[rY],#4 ic[0] - * subs rZ,rZ,#4 ic[1] - * bgt L ic[2] - * - * A maximum of 5 pages are filled before returning. - */ -X(fill_loop_test2) -{ - int max_pages_left = 5; - unsigned char x1,x2,x3,x4; - uint32_t addr, a, n, x, ofs, maxlen; - uint32_t *rzp = (uint32_t *)(size_t)ic[1].arg[0]; - unsigned char *page; - - x = reg(ic[0].arg[2]); - x1 = x; x2 = x >> 8; x3 = x >> 16; x4 = x >> 24; - if (x1 != x2 || x1 != x3 || x1 != x4) { - instr(store_w0_word_u1_p0_imm)(cpu, ic); - return; - } - -restart_loop: - addr = reg(ic[0].arg[0]); - page = cpu->cd.arm.host_store[addr >> 12]; - if (page == NULL || (addr & 3) != 0) { - instr(store_w0_word_u1_p0_imm)(cpu, ic); - return; - } - - /* printf("addr = 0x%08x, page = %p\n", addr, page); - printf("*rzp = 0x%08x\n", reg(rzp)); */ - - n = reg(rzp) / 4; - if (n == 0) - n++; - /* n = nr of _words_ */ - ofs = addr & 0xfff; - maxlen = 4096 - ofs; - if (n*4 > maxlen) - n = maxlen / 4; - - /* printf("x = %x, n = %i\n", x1, n); */ - memset(page + ofs, x1, n * 4); - - reg(ic[0].arg[0]) = addr + n * 4; - - reg(rzp) -= (n * 4); - cpu->n_translated_instrs += (3 * n); - - a = reg(rzp); - - cpu->cd.arm.cpsr &= - ~(ARM_FLAG_Z | ARM_FLAG_N | ARM_FLAG_V | ARM_FLAG_C); - if (a != 0) - cpu->cd.arm.cpsr |= ARM_FLAG_C; - else - cpu->cd.arm.cpsr |= ARM_FLAG_Z; - if ((int32_t)a < 0) - cpu->cd.arm.cpsr |= ARM_FLAG_N; - - if (max_pages_left-- > 0 && (int32_t)a > 0) - goto restart_loop; - - cpu->n_translated_instrs --; - - if ((int32_t)a > 0) - cpu->cd.arm.next_ic = ic; - else - cpu->cd.arm.next_ic = &ic[3]; -} - - -/* * netbsd_memset: * * The core of a NetBSD/arm memset. @@ -1381,8 +1319,8 @@ instr(subs)(cpu, ic); - if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) != - ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0)) { + if (((cpu->cd.arm.flags & ARM_F_N)?1:0) != + ((cpu->cd.arm.flags & ARM_F_V)?1:0)) { cpu->n_translated_instrs += 16; /* Skip the store multiples: */ cpu->cd.arm.next_ic = &ic[17]; @@ -1411,9 +1349,9 @@ /* Branch back if greater: */ cpu->n_translated_instrs += 1; - } while (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) == - ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0) && - !(cpu->cd.arm.cpsr & ARM_FLAG_Z)); + } while (((cpu->cd.arm.flags & ARM_F_N)?1:0) == + ((cpu->cd.arm.flags & ARM_F_V)?1:0) && + !(cpu->cd.arm.flags & ARM_F_Z)); /* Continue at the instruction after the bgt: */ cpu->cd.arm.next_ic = &ic[18]; @@ -1471,8 +1409,8 @@ /* Loop while greater or equal: */ cpu->n_translated_instrs ++; - } while (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) == - ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0)); + } while (((cpu->cd.arm.flags & ARM_F_N)?1:0) == + ((cpu->cd.arm.flags & ARM_F_V)?1:0)); /* Continue at the instruction after the bge: */ cpu->cd.arm.next_ic = &ic[6]; @@ -1494,6 +1432,8 @@ { uint32_t r1 = cpu->cd.arm.r[1]; cpu->n_translated_instrs += ((r1 >> 5) * 3); + cpu->cd.arm.r[0] += r1; + cpu->cd.arm.r[1] = 0; cpu->cd.arm.next_ic = &ic[4]; } @@ -1545,12 +1485,488 @@ cpu->cd.arm.r[3] = page[t & 0xfff]; t = cpu->cd.arm.r[3] & cpu->cd.arm.r[ARM_IP]; - cpu->cd.arm.cpsr &= ~(ARM_FLAG_Z | ARM_FLAG_N); + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); if (t == 0) - cpu->cd.arm.cpsr |= ARM_FLAG_Z; + cpu->cd.arm.flags |= ARM_F_Z; + + cpu->n_translated_instrs += 2; + cpu->cd.arm.next_ic = &ic[3]; +} + + +/* + * strlen: + * + * S: e5f03001 ldrb rY,[rX,#1]! + * e3530000 cmps rY,#0 + * 1afffffc bne S + */ +X(strlen) +{ + unsigned int n_loops = 0; + uint32_t rY, rX = reg(ic[0].arg[0]); + unsigned char *p; + + do { + rX ++; + p = cpu->cd.arm.host_load[rX >> 12]; + if (p == NULL) { + cpu->n_translated_instrs += (n_loops * 3); + instr(load_w1_byte_u1_p1_imm)(cpu, ic); + return; + } + + rY = reg(ic[0].arg[2]) = p[rX & 0xfff]; /* load */ + reg(ic[0].arg[0]) = rX; /* writeback */ + n_loops ++; + + /* Compare rY to zero: */ + cpu->cd.arm.flags = ARM_F_C; + if (rY == 0) + cpu->cd.arm.flags |= ARM_F_Z; + } while (rY != 0); + + cpu->n_translated_instrs += (n_loops * 3) - 1; + cpu->cd.arm.next_ic = &ic[3]; +} + +/* + * xchg: + * + * e02YX00X eor rX,rY,rX + * e02XY00Y eor rY,rX,rY + * e02YX00X eor rX,rY,rX + */ +X(xchg) +{ + uint32_t tmp = reg(ic[0].arg[0]); cpu->n_translated_instrs += 2; cpu->cd.arm.next_ic = &ic[3]; + reg(ic[0].arg[0]) = reg(ic[1].arg[0]); + reg(ic[1].arg[0]) = tmp; +} + + +/* + * netbsd_copyin: + * + * e4b0a004 ldrt sl,[r0],#4 + * e4b0b004 ldrt fp,[r0],#4 + * e4b06004 ldrt r6,[r0],#4 + * e4b07004 ldrt r7,[r0],#4 + * e4b08004 ldrt r8,[r0],#4 + * e4b09004 ldrt r9,[r0],#4 + */ +X(netbsd_copyin) +{ + uint32_t r0 = cpu->cd.arm.r[0], ofs = (r0 & 0xffc), index = r0 >> 12; + unsigned char *p = cpu->cd.arm.host_load[index]; + uint32_t *p32 = (uint32_t *) p, *q32; + int ok = cpu->cd.arm.is_userpage[index >> 5] & (1 << (index & 31)); + + if (ofs > 0x1000 - 6*4 || !ok || p == NULL) { + instr(load_w1_word_u1_p0_imm)(cpu, ic); + return; + } + q32 = &cpu->cd.arm.r[6]; + ofs >>= 2; + q32[0] = p32[ofs+2]; + q32[1] = p32[ofs+3]; + q32[2] = p32[ofs+4]; + q32[3] = p32[ofs+5]; + q32[4] = p32[ofs+0]; + q32[5] = p32[ofs+1]; + cpu->cd.arm.r[0] = r0 + 24; + cpu->n_translated_instrs += 5; + cpu->cd.arm.next_ic = &ic[6]; +} + + +/* + * netbsd_copyout: + * + * e4a18004 strt r8,[r1],#4 + * e4a19004 strt r9,[r1],#4 + * e4a1a004 strt sl,[r1],#4 + * e4a1b004 strt fp,[r1],#4 + * e4a16004 strt r6,[r1],#4 + * e4a17004 strt r7,[r1],#4 + */ +X(netbsd_copyout) +{ + uint32_t r1 = cpu->cd.arm.r[1], ofs = (r1 & 0xffc), index = r1 >> 12; + unsigned char *p = cpu->cd.arm.host_store[index]; + uint32_t *p32 = (uint32_t *) p, *q32; + int ok = cpu->cd.arm.is_userpage[index >> 5] & (1 << (index & 31)); + + if (ofs > 0x1000 - 6*4 || !ok || p == NULL) { + instr(store_w1_word_u1_p0_imm)(cpu, ic); + return; + } + q32 = &cpu->cd.arm.r[6]; + ofs >>= 2; + p32[ofs ] = q32[2]; + p32[ofs+1] = q32[3]; + p32[ofs+2] = q32[4]; + p32[ofs+3] = q32[5]; + p32[ofs+4] = q32[0]; + p32[ofs+5] = q32[1]; + cpu->cd.arm.r[1] = r1 + 24; + cpu->n_translated_instrs += 5; + cpu->cd.arm.next_ic = &ic[6]; +} + + +/* + * cmps by 0, followed by beq (inside the same page): + */ +X(cmps0_beq_samepage) +{ + uint32_t a = reg(ic->arg[0]); + cpu->n_translated_instrs ++; + if (a == 0) { + cpu->cd.arm.flags = ARM_F_Z | ARM_F_C; + } else { + /* Semi-ugly hack which sets the negative-bit if a < 0: */ + cpu->cd.arm.flags = ARM_F_C | ((a >> 28) & 8); + } + if (a == 0) + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + else + cpu->cd.arm.next_ic = &ic[2]; +} + + +/* + * cmps followed by beq (inside the same page): + */ +X(cmps_beq_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if (c == 0) { + cpu->cd.arm.flags |= ARM_F_Z; + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + } else { + cpu->cd.arm.next_ic = &ic[2]; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + } +} + + +/* + * cmps followed by beq (not the same page): + */ +X(cmps_0_beq) +{ + uint32_t a = reg(ic->arg[0]); + cpu->n_translated_instrs ++; + if (a == 0) { + cpu->cd.arm.flags = ARM_F_Z | ARM_F_C; + cpu->pc = (uint32_t)(((uint32_t)cpu->pc & 0xfffff000) + + (int32_t)ic[1].arg[0]); + quick_pc_to_pointers(cpu); + } else { + /* Semi-ugly hack which sets the negative-bit if a < 0: */ + cpu->cd.arm.flags = ARM_F_C | ((a >> 28) & 8); + cpu->cd.arm.next_ic = &ic[2]; + } +} +X(cmps_pos_beq) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if ((int32_t)a < 0 && (int32_t)c >= 0) + cpu->cd.arm.flags |= ARM_F_V; + if (c == 0) { + cpu->cd.arm.flags |= ARM_F_Z; + cpu->pc = (uint32_t)(((uint32_t)cpu->pc & 0xfffff000) + + (int32_t)ic[1].arg[0]); + quick_pc_to_pointers(cpu); + } else { + cpu->cd.arm.next_ic = &ic[2]; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + } +} +X(cmps_neg_beq) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if ((int32_t)a >= 0 && (int32_t)c < 0) + cpu->cd.arm.flags |= ARM_F_V; + if (c == 0) { + cpu->cd.arm.flags |= ARM_F_Z; + cpu->pc = (uint32_t)(((uint32_t)cpu->pc & 0xfffff000) + + (int32_t)ic[1].arg[0]); + quick_pc_to_pointers(cpu); + } else { + cpu->cd.arm.next_ic = &ic[2]; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + } +} + + +/* + * cmps by 0, followed by bne (inside the same page): + */ +X(cmps0_bne_samepage) +{ + uint32_t a = reg(ic->arg[0]); + cpu->n_translated_instrs ++; + if (a == 0) { + cpu->cd.arm.flags = ARM_F_Z | ARM_F_C; + } else { + /* Semi-ugly hack which sets the negative-bit if a < 0: */ + cpu->cd.arm.flags = ARM_F_C | ((a >> 28) & 8); + } + if (a == 0) + cpu->cd.arm.next_ic = &ic[2]; + else + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; +} + + +/* + * cmps followed by bne (inside the same page): + */ +X(cmps_bne_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if (c == 0) { + cpu->cd.arm.flags |= ARM_F_Z; + cpu->cd.arm.next_ic = &ic[2]; + } else { + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + } +} + + +/* + * cmps followed by bcc (inside the same page): + */ +X(cmps_bcc_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + else if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if (a >= b) + cpu->cd.arm.next_ic = &ic[2]; + else + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; +} + + +/* + * cmps (reg) followed by bcc (inside the same page): + */ +X(cmps_reg_bcc_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = reg(ic->arg[1]), c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + else if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if (a >= b) + cpu->cd.arm.next_ic = &ic[2]; + else + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; +} + + +/* + * cmps followed by bhi (inside the same page): + */ +X(cmps_bhi_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + else if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if (a > b) + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + else + cpu->cd.arm.next_ic = &ic[2]; +} + + +/* + * cmps (reg) followed by bhi (inside the same page): + */ +X(cmps_reg_bhi_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = reg(ic->arg[1]), c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + else if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if (a > b) + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + else + cpu->cd.arm.next_ic = &ic[2]; +} + + +/* + * cmps followed by bgt (inside the same page): + */ +X(cmps_bgt_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + else if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if ((int32_t)a > (int32_t)b) + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + else + cpu->cd.arm.next_ic = &ic[2]; +} + + +/* + * cmps followed by ble (inside the same page): + */ +X(cmps_ble_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + else if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if ((int32_t)a <= (int32_t)b) + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + else + cpu->cd.arm.next_ic = &ic[2]; +} + + +/* + * teqs followed by beq (inside the same page): + */ +X(teqs_beq_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a ^ b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); + if (c == 0) { + cpu->cd.arm.flags |= ARM_F_Z; + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic[1].arg[0]; + } else { + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + cpu->cd.arm.next_ic = &ic[2]; + } +} + + +/* + * tsts followed by beq (inside the same page): + * (arg[1] must not have its highest bit set)) + */ +X(tsts_lo_beq_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a & b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); + if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (c == 0) + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic[1].arg[0]; + else + cpu->cd.arm.next_ic = &ic[2]; +} + + +/* + * teqs followed by bne (inside the same page): + */ +X(teqs_bne_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a ^ b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); + if (c == 0) { + cpu->cd.arm.flags |= ARM_F_Z; + } else { + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + } + if (c == 0) + cpu->cd.arm.next_ic = &ic[2]; + else + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic[1].arg[0]; +} + + +/* + * tsts followed by bne (inside the same page): + * (arg[1] must not have its highest bit set)) + */ +X(tsts_lo_bne_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a & b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); + if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (c == 0) + cpu->cd.arm.next_ic = &ic[2]; + else + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic[1].arg[0]; } @@ -1560,11 +1976,8 @@ X(end_of_page) { /* Update the PC: (offset 0, but on the next page) */ - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (ARM_IC_ENTRIES_PER_PAGE - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (ARM_IC_ENTRIES_PER_PAGE << ARM_INSTR_ALIGNMENT_SHIFT); /* Find the new physical page and update the translation pointers: */ quick_pc_to_pointers(cpu); @@ -1583,9 +1996,10 @@ * Check for the core of a NetBSD/arm memset; large memsets use a sequence * of 16 store-multiple instructions, each storing 2 registers at a time. */ -void arm_combine_netbsd_memset(struct cpu *cpu, struct arm_instr_call *ic, - int low_addr) +void arm_combine_netbsd_memset(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) { +#ifdef HOST_LITTLE_ENDIAN int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) & (ARM_IC_ENTRIES_PER_PAGE-1); @@ -1602,6 +2016,7 @@ combined; } } +#endif } @@ -1611,9 +2026,10 @@ * Check for the core of a NetBSD/arm memcpy; large memcpys use a * sequence of ldmia instructions. */ -void arm_combine_netbsd_memcpy(struct cpu *cpu, struct arm_instr_call *ic, - int low_addr) +void arm_combine_netbsd_memcpy(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) { +#ifdef HOST_LITTLE_ENDIAN int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) & (ARM_IC_ENTRIES_PER_PAGE-1); @@ -1630,6 +2046,7 @@ combined; } } +#endif } @@ -1638,8 +2055,8 @@ * * Check for the core of a NetBSD/arm cache clean. (There are two variants.) */ -void arm_combine_netbsd_cacheclean(struct cpu *cpu, struct arm_instr_call *ic, - int low_addr) +void arm_combine_netbsd_cacheclean(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) { int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) & (ARM_IC_ENTRIES_PER_PAGE-1); @@ -1662,8 +2079,8 @@ * * Check for the core of a NetBSD/arm cache clean. (Second variant.) */ -void arm_combine_netbsd_cacheclean2(struct cpu *cpu, struct arm_instr_call *ic, - int low_addr) +void arm_combine_netbsd_cacheclean2(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) { int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) & (ARM_IC_ENTRIES_PER_PAGE-1); @@ -1685,64 +2102,312 @@ /* * arm_combine_netbsd_scanc(): */ -void arm_combine_netbsd_scanc(struct cpu *cpu, struct arm_instr_call *ic, - int low_addr) +void arm_combine_netbsd_scanc(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) { int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) & (ARM_IC_ENTRIES_PER_PAGE-1); - if (n_back >= 2) { - if (ic[-2].f == instr(load_w0_byte_u1_p1_imm) && - ic[-1].f == instr(load_w0_byte_u1_p1_reg)) { - ic[-2].f = instr(netbsd_scanc); - combined; - } + if (n_back < 2) + return; + + if (ic[-2].f == instr(load_w0_byte_u1_p1_imm) && + ic[-2].arg[0] == (size_t)(&cpu->cd.arm.r[1]) && + ic[-2].arg[1] == 0 && + ic[-2].arg[2] == (size_t)(&cpu->cd.arm.r[3]) && + ic[-1].f == instr(load_w0_byte_u1_p1_reg) && + ic[-1].arg[0] == (size_t)(&cpu->cd.arm.r[2]) && + ic[-1].arg[1] == (size_t)arm_r_r3_t0_c0 && + ic[-1].arg[2] == (size_t)(&cpu->cd.arm.r[3])) { + ic[-2].f = instr(netbsd_scanc); + combined; } } /* - * arm_combine_test2(): + * arm_combine_strlen(): */ -void arm_combine_test2(struct cpu *cpu, struct arm_instr_call *ic, int low_addr) +void arm_combine_strlen(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) { int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) & (ARM_IC_ENTRIES_PER_PAGE-1); - if (n_back >= 2) { - if (ic[-2].f == instr(store_w0_word_u1_p0_imm) && - ic[-2].arg[1] == 4 && - ic[-1].f == instr(subs) && - ic[-1].arg[0] == ic[-1].arg[2] && ic[-1].arg[1] == 4 && - ic[ 0].f == instr(b_samepage__gt) && - ic[ 0].arg[0] == (size_t)&ic[-2]) { - ic[-2].f = instr(fill_loop_test2); -printf("YO test2\n"); - combined; - } + if (n_back < 2) + return; + + if (ic[-2].f == instr(load_w1_byte_u1_p1_imm) && + ic[-2].arg[1] == 1 && + ic[-2].arg[2] == (size_t)(&cpu->cd.arm.r[3]) && + ic[-1].f == instr(cmps) && + ic[-1].arg[0] == (size_t)(&cpu->cd.arm.r[3]) && + ic[-1].arg[1] == 0) { + ic[-2].f = instr(strlen); + combined; } } -#if 0 - /* TODO: This is another test hack. */ +/* + * arm_combine_xchg(): + */ +void arm_combine_xchg(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) +{ + int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) + & (ARM_IC_ENTRIES_PER_PAGE-1); + size_t a, b; - if (n_back >= 3) { - if (ic[-3].f == instr(cmps) && - ic[-3].arg[0] == ic[-1].arg[0] && - ic[-3].arg[1] == 0 && - ic[-2].f == instr(store_w0_byte_u1_p0_imm) && - ic[-2].arg[1] == 1 && - ic[-1].f == instr(sub) && - ic[-1].arg[0] == ic[-1].arg[2] && ic[-1].arg[1] == 1 && - ic[ 0].f == instr(b_samepage__gt) && - ic[ 0].arg[0] == (size_t)&ic[-3]) { - ic[-3].f = instr(fill_loop_test); + if (n_back < 2) + return; + + a = ic[-2].arg[0]; b = ic[-1].arg[0]; + + if (ic[-2].f == instr(eor_regshort) && + ic[-1].f == instr(eor_regshort) && + ic[-2].arg[0] == a && ic[-2].arg[1] == b && ic[-2].arg[2] == b && + ic[-1].arg[0] == b && ic[-1].arg[1] == a && ic[-1].arg[2] == a && + ic[ 0].arg[0] == a && ic[ 0].arg[1] == b && ic[ 0].arg[2] == b) { + ic[-2].f = instr(xchg); + combined; + } +} + + +/* + * arm_combine_netbsd_copyin(): + */ +void arm_combine_netbsd_copyin(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) +{ +#ifdef HOST_LITTLE_ENDIAN + int i, n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) + & (ARM_IC_ENTRIES_PER_PAGE-1); + + if (n_back < 5) + return; + + for (i=-5; i<0; i++) { + if (ic[i].f != instr(load_w1_word_u1_p0_imm) || + ic[i].arg[0] != (size_t)(&cpu->cd.arm.r[0]) || + ic[i].arg[1] != 4) + return; + } + + if (ic[-5].arg[2] == (size_t)(&cpu->cd.arm.r[10]) && + ic[-4].arg[2] == (size_t)(&cpu->cd.arm.r[11]) && + ic[-3].arg[2] == (size_t)(&cpu->cd.arm.r[6]) && + ic[-2].arg[2] == (size_t)(&cpu->cd.arm.r[7]) && + ic[-1].arg[2] == (size_t)(&cpu->cd.arm.r[8])) { + ic[-5].f = instr(netbsd_copyin); + combined; + } +#endif +} + + +/* + * arm_combine_netbsd_copyout(): + */ +void arm_combine_netbsd_copyout(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) +{ +#ifdef HOST_LITTLE_ENDIAN + int i, n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) + & (ARM_IC_ENTRIES_PER_PAGE-1); + + if (n_back < 5) + return; + + for (i=-5; i<0; i++) { + if (ic[i].f != instr(store_w1_word_u1_p0_imm) || + ic[i].arg[0] != (size_t)(&cpu->cd.arm.r[1]) || + ic[i].arg[1] != 4) + return; + } + + if (ic[-5].arg[2] == (size_t)(&cpu->cd.arm.r[8]) && + ic[-4].arg[2] == (size_t)(&cpu->cd.arm.r[9]) && + ic[-3].arg[2] == (size_t)(&cpu->cd.arm.r[10]) && + ic[-2].arg[2] == (size_t)(&cpu->cd.arm.r[11]) && + ic[-1].arg[2] == (size_t)(&cpu->cd.arm.r[6])) { + ic[-5].f = instr(netbsd_copyout); + combined; + } +#endif +} + + +/* + * arm_combine_cmps_b(): + */ +void arm_combine_cmps_b(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) +{ + int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) + & (ARM_IC_ENTRIES_PER_PAGE-1); + if (n_back < 1) + return; + if (ic[0].f == instr(b__eq)) { + if (ic[-1].f == instr(cmps)) { + if (ic[-1].arg[1] == 0) + ic[-1].f = instr(cmps_0_beq); + else if (ic[-1].arg[1] & 0x80000000) + ic[-1].f = instr(cmps_neg_beq); + else + ic[-1].f = instr(cmps_pos_beq); combined; } + return; } - /* TODO: Combine forward as well */ -#endif + if (ic[0].f == instr(b_samepage__eq)) { + if (ic[-1].f == instr(cmps)) { + if (ic[-1].arg[1] == 0) + ic[-1].f = instr(cmps0_beq_samepage); + else + ic[-1].f = instr(cmps_beq_samepage); + combined; + } + if (ic[-1].f == instr(tsts) && + !(ic[-1].arg[1] & 0x80000000)) { + ic[-1].f = instr(tsts_lo_beq_samepage); + combined; + } + if (ic[-1].f == instr(teqs)) { + ic[-1].f = instr(teqs_beq_samepage); + combined; + } + return; + } + if (ic[0].f == instr(b_samepage__ne)) { + if (ic[-1].f == instr(cmps)) { + if (ic[-1].arg[1] == 0) + ic[-1].f = instr(cmps0_bne_samepage); + else + ic[-1].f = instr(cmps_bne_samepage); + combined; + } + if (ic[-1].f == instr(tsts) && + !(ic[-1].arg[1] & 0x80000000)) { + ic[-1].f = instr(tsts_lo_bne_samepage); + combined; + } + if (ic[-1].f == instr(teqs)) { + ic[-1].f = instr(teqs_bne_samepage); + combined; + } + return; + } + if (ic[0].f == instr(b_samepage__cc)) { + if (ic[-1].f == instr(cmps)) { + ic[-1].f = instr(cmps_bcc_samepage); + combined; + } + if (ic[-1].f == instr(cmps_regshort)) { + ic[-1].f = instr(cmps_reg_bcc_samepage); + combined; + } + return; + } + if (ic[0].f == instr(b_samepage__hi)) { + if (ic[-1].f == instr(cmps)) { + ic[-1].f = instr(cmps_bhi_samepage); + combined; + } + if (ic[-1].f == instr(cmps_regshort)) { + ic[-1].f = instr(cmps_reg_bhi_samepage); + combined; + } + return; + } + if (ic[0].f == instr(b_samepage__gt)) { + if (ic[-1].f == instr(cmps)) { + ic[-1].f = instr(cmps_bgt_samepage); + combined; + } + return; + } + if (ic[0].f == instr(b_samepage__le)) { + if (ic[-1].f == instr(cmps)) { + ic[-1].f = instr(cmps_ble_samepage); + combined; + } + return; + } +} + + +/*****************************************************************************/ + + +static void arm_switch_clear(struct arm_instr_call *ic, int rd, + int condition_code) +{ + switch (rd) { + case 0: ic->f = cond_instr(clear_r0); break; + case 1: ic->f = cond_instr(clear_r1); break; + case 2: ic->f = cond_instr(clear_r2); break; + case 3: ic->f = cond_instr(clear_r3); break; + case 4: ic->f = cond_instr(clear_r4); break; + case 5: ic->f = cond_instr(clear_r5); break; + case 6: ic->f = cond_instr(clear_r6); break; + case 7: ic->f = cond_instr(clear_r7); break; + case 8: ic->f = cond_instr(clear_r8); break; + case 9: ic->f = cond_instr(clear_r9); break; + case 10: ic->f = cond_instr(clear_r10); break; + case 11: ic->f = cond_instr(clear_r11); break; + case 12: ic->f = cond_instr(clear_r12); break; + case 13: ic->f = cond_instr(clear_r13); break; + case 14: ic->f = cond_instr(clear_r14); break; + } +} + + +static void arm_switch_mov1(struct arm_instr_call *ic, int rd, + int condition_code) +{ + switch (rd) { + case 0: ic->f = cond_instr(mov1_r0); break; + case 1: ic->f = cond_instr(mov1_r1); break; + case 2: ic->f = cond_instr(mov1_r2); break; + case 3: ic->f = cond_instr(mov1_r3); break; + case 4: ic->f = cond_instr(mov1_r4); break; + case 5: ic->f = cond_instr(mov1_r5); break; + case 6: ic->f = cond_instr(mov1_r6); break; + case 7: ic->f = cond_instr(mov1_r7); break; + case 8: ic->f = cond_instr(mov1_r8); break; + case 9: ic->f = cond_instr(mov1_r9); break; + case 10: ic->f = cond_instr(mov1_r10); break; + case 11: ic->f = cond_instr(mov1_r11); break; + case 12: ic->f = cond_instr(mov1_r12); break; + case 13: ic->f = cond_instr(mov1_r13); break; + case 14: ic->f = cond_instr(mov1_r14); break; + } +} + + +static void arm_switch_add1(struct arm_instr_call *ic, int rd, + int condition_code) +{ + switch (rd) { + case 0: ic->f = cond_instr(add1_r0); break; + case 1: ic->f = cond_instr(add1_r1); break; + case 2: ic->f = cond_instr(add1_r2); break; + case 3: ic->f = cond_instr(add1_r3); break; + case 4: ic->f = cond_instr(add1_r4); break; + case 5: ic->f = cond_instr(add1_r5); break; + case 6: ic->f = cond_instr(add1_r6); break; + case 7: ic->f = cond_instr(add1_r7); break; + case 8: ic->f = cond_instr(add1_r8); break; + case 9: ic->f = cond_instr(add1_r9); break; + case 10: ic->f = cond_instr(add1_r10); break; + case 11: ic->f = cond_instr(add1_r11); break; + case 12: ic->f = cond_instr(add1_r12); break; + case 13: ic->f = cond_instr(add1_r13); break; + case 14: ic->f = cond_instr(add1_r14); break; + } +} /*****************************************************************************/ @@ -1762,25 +2427,25 @@ unsigned char *page; unsigned char ib[4]; int condition_code, main_opcode, secondary_opcode, s_bit, rn, rd, r8; - int p_bit, u_bit, b_bit, w_bit, l_bit, regform, rm, c, t, any_pc_reg; + int p_bit, u_bit, w_bit, l_bit, regform, rm, c, t, any_pc_reg; void (*samepage_function)(struct cpu *, struct arm_instr_call *); /* Figure out the address of the instruction: */ low_pc = ((size_t)ic - (size_t)cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - addr = cpu->cd.arm.r[ARM_PC] & ~((ARM_IC_ENTRIES_PER_PAGE-1) << + addr = cpu->pc & ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); addr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC] = addr; + cpu->pc = addr; addr &= ~((1 << ARM_INSTR_ALIGNMENT_SHIFT) - 1); /* Read the instruction word from memory: */ page = cpu->cd.arm.host_load[addr >> 12]; if (page != NULL) { - /* fatal("TRANSLATION HIT!\n"); */ + /* fatal("TRANSLATION HIT! 0x%08x\n", addr); */ memcpy(ib, page + (addr & 0xfff), sizeof(ib)); } else { - /* fatal("TRANSLATION MISS!\n"); */ + /* fatal("TRANSLATION MISS! 0x%08x\n", addr); */ if (!cpu->memory_rw(cpu, cpu->mem, addr, &ib[0], sizeof(ib), MEM_READ, CACHE_INSTRUCTION)) { fatal("to_be_translated(): " @@ -1805,10 +2470,9 @@ condition_code = iword >> 28; main_opcode = (iword >> 24) & 15; secondary_opcode = (iword >> 21) & 15; - u_bit = (iword >> 23) & 1; - b_bit = (iword >> 22) & 1; - w_bit = (iword >> 21) & 1; - s_bit = l_bit = (iword >> 20) & 1; + u_bit = iword & 0x00800000; + w_bit = iword & 0x00200000; + s_bit = l_bit = iword & 0x00100000; rn = (iword >> 16) & 15; rd = (iword >> 12) & 15; r8 = (iword >> 8) & 15; @@ -1873,6 +2537,10 @@ ic->arg[0] = iword; break; } + if ((iword & 0x0f900ff0) == 0x01000050) { + fatal("TODO: q{,d}{add,sub}\n"); + goto bad; + } if ((iword & 0x0ff000d0) == 0x01200010) { /* bx or blx */ if (iword & 0x20) @@ -1897,6 +2565,32 @@ ic->arg[2] = (size_t)(&cpu->cd.arm.r[rn]); break; } + if ((iword & 0x0fff0ff0) == 0x016f0f10) { + ic->f = cond_instr(clz); + ic->arg[0] = (size_t)(&cpu->cd.arm.r[rm]); + ic->arg[1] = (size_t)(&cpu->cd.arm.r[rd]); + break; + } + if ((iword & 0x0ff00090) == 0x01000080) { + /* TODO: smlaXX */ + goto bad; + } + if ((iword & 0x0ff00090) == 0x01400080) { + /* TODO: smlalY */ + goto bad; + } + if ((iword & 0x0ff000b0) == 0x01200080) { + /* TODO: smlawY */ + goto bad; + } + if ((iword & 0x0ff0f090) == 0x01600080) { + /* TODO: smulXY */ + goto bad; + } + if ((iword & 0x0ff0f0b0) == 0x012000a0) { + /* TODO: smulwY */ + goto bad; + } if ((iword & 0x0fb0fff0) == 0x0120f000 || (iword & 0x0fb0f000) == 0x0320f000) { /* msr: move to [S|C]PSR from a register or @@ -1992,56 +2686,35 @@ break; } - /* "mov reg,reg": */ - if ((iword & 0x0fff0ff0) == 0x01a00000 && - (iword&15) != ARM_PC && rd != ARM_PC) { - ic->f = cond_instr(mov_reg_reg); - ic->arg[0] = (size_t)(&cpu->cd.arm.r[iword & 15]); + /* "mov reg,reg" or "mov reg,pc": */ + if ((iword & 0x0fff0ff0) == 0x01a00000 && rd != ARM_PC) { + if (rm != ARM_PC) { + ic->f = cond_instr(mov_reg_reg); + ic->arg[0] = (size_t)(&cpu->cd.arm.r[rm]); + } else { + ic->f = cond_instr(mov_reg_pc); + ic->arg[0] = (addr & 0xfff) + 8; + } ic->arg[1] = (size_t)(&cpu->cd.arm.r[rd]); break; } /* "mov reg,#0": */ - if ((iword & 0x0fff0fff) == 0x03a03000 && rd != ARM_PC) { - switch (rd) { - case 0: ic->f = cond_instr(clear_r0); break; - case 1: ic->f = cond_instr(clear_r1); break; - case 2: ic->f = cond_instr(clear_r2); break; - case 3: ic->f = cond_instr(clear_r3); break; - case 4: ic->f = cond_instr(clear_r4); break; - case 5: ic->f = cond_instr(clear_r5); break; - case 6: ic->f = cond_instr(clear_r6); break; - case 7: ic->f = cond_instr(clear_r7); break; - case 8: ic->f = cond_instr(clear_r8); break; - case 9: ic->f = cond_instr(clear_r9); break; - case 10: ic->f = cond_instr(clear_r10); break; - case 11: ic->f = cond_instr(clear_r11); break; - case 12: ic->f = cond_instr(clear_r12); break; - case 13: ic->f = cond_instr(clear_r13); break; - case 14: ic->f = cond_instr(clear_r14); break; - } + if ((iword & 0x0fff0fff) == 0x03a00000 && rd != ARM_PC) { + arm_switch_clear(ic, rd, condition_code); break; } /* "mov reg,#1": */ - if ((iword & 0x0fff0fff) == 0x03a03001 && rd != ARM_PC) { - switch (rd) { - case 0: ic->f = cond_instr(mov1_r0); break; - case 1: ic->f = cond_instr(mov1_r1); break; - case 2: ic->f = cond_instr(mov1_r2); break; - case 3: ic->f = cond_instr(mov1_r3); break; - case 4: ic->f = cond_instr(mov1_r4); break; - case 5: ic->f = cond_instr(mov1_r5); break; - case 6: ic->f = cond_instr(mov1_r6); break; - case 7: ic->f = cond_instr(mov1_r7); break; - case 8: ic->f = cond_instr(mov1_r8); break; - case 9: ic->f = cond_instr(mov1_r9); break; - case 10: ic->f = cond_instr(mov1_r10); break; - case 11: ic->f = cond_instr(mov1_r11); break; - case 12: ic->f = cond_instr(mov1_r12); break; - case 13: ic->f = cond_instr(mov1_r13); break; - case 14: ic->f = cond_instr(mov1_r14); break; - } + if ((iword & 0x0fff0fff) == 0x03a00001 && rd != ARM_PC) { + arm_switch_mov1(ic, rd, condition_code); + break; + } + + /* "add reg,reg,#1": */ + if ((iword & 0x0ff00fff) == 0x02800001 && rd != ARM_PC + && rn == rd) { + arm_switch_add1(ic, rd, condition_code); break; } @@ -2072,18 +2745,32 @@ ic->arg[1] = imm; } + /* mvn #imm ==> mov #~imm */ + if (secondary_opcode == 0xf && !regform) { + secondary_opcode = 0xd; + ic->arg[1] = ~ic->arg[1]; + } + ic->arg[0] = (size_t)(&cpu->cd.arm.r[rn]); ic->arg[2] = (size_t)(&cpu->cd.arm.r[rd]); any_pc_reg = 0; if (rn == ARM_PC || rd == ARM_PC) any_pc_reg = 1; - ic->f = arm_dpi_instr[condition_code + - 16 * secondary_opcode + (s_bit? 256 : 0) + - (any_pc_reg? 512 : 0) + (regform? 1024 : 0)]; + if (!any_pc_reg && regform && (iword & 0xfff) < ARM_PC) { + ic->arg[1] = (size_t)(&cpu->cd.arm.r[rm]); + ic->f = arm_dpi_instr_regshort[condition_code + + 16 * secondary_opcode + (s_bit? 256 : 0)]; + } else + ic->f = arm_dpi_instr[condition_code + + 16 * secondary_opcode + (s_bit? 256 : 0) + + (any_pc_reg? 512 : 0) + (regform? 1024 : 0)]; + if (ic->f == instr(eor_regshort)) + cpu->cd.arm.combination_check = arm_combine_xchg; if (iword == 0xe113000c) - cpu->combination_check = arm_combine_netbsd_scanc; + cpu->cd.arm.combination_check = + arm_combine_netbsd_scanc; break; case 0x4: /* Load and store... */ @@ -2112,6 +2799,54 @@ fatal("Not a Load/store TODO\n"); goto bad; } + /* Special case: pc-relative load within the same page: */ + if (rn == ARM_PC && rd != ARM_PC && main_opcode < 6) { + int ofs = (addr & 0xfff) + 8, max = 0xffc; + int b_bit = iword & 0x00400000; + if (b_bit) + max = 0xfff; + if (u_bit) + ofs += (iword & 0xfff); + else + ofs -= (iword & 0xfff); + /* NOTE/TODO: This assumes 4KB pages, + it will not work with 1KB pages. */ + if (ofs >= 0 && ofs <= max) { + unsigned char *p; + unsigned char c[4]; + int len = b_bit? 1 : 4; + uint32_t x, a = (addr & 0xfffff000) | ofs; + /* ic->f = cond_instr(mov); */ + ic->f = arm_dpi_instr[condition_code + 16*0xd]; + ic->arg[2] = (size_t)(&cpu->cd.arm.r[rd]); + p = cpu->cd.arm.host_load[a >> 12]; + if (p != NULL) { + memcpy(c, p + (a & 0xfff), len); + } else { + if (!cpu->memory_rw(cpu, cpu->mem, a, + c, len, MEM_READ, CACHE_DATA)) { + fatal("to_be_translated(): " + "read failed X: TODO\n"); + goto bad; + } + } + if (cpu->byte_order == EMUL_LITTLE_ENDIAN) + x = c[0] + (c[1]<<8) + + (c[2]<<16) + (c[3]<<24); + else + x = c[3] + (c[2]<<8) + + (c[1]<<16) + (c[0]<<24); + if (b_bit) + x = c[0]; + ic->arg[1] = x; + } + } + if (iword == 0xe4b09004) + cpu->cd.arm.combination_check = + arm_combine_netbsd_copyin; + if (iword == 0xe4a17004) + cpu->cd.arm.combination_check = + arm_combine_netbsd_copyout; break; case 0x8: /* Multiple load/store... (Block data transfer) */ @@ -2162,13 +2897,11 @@ if (main_opcode == 0x0a) { ic->f = cond_instr(b); samepage_function = cond_instr(b_samepage); - /* if (iword == 0xcafffffc) - cpu->combination_check = arm_combine_test2; */ if (iword == 0xcaffffed) - cpu->combination_check = + cpu->cd.arm.combination_check = arm_combine_netbsd_memset; if (iword == 0xaafffff9) - cpu->combination_check = + cpu->cd.arm.combination_check = arm_combine_netbsd_memcpy; } else { if (cpu->machine->show_trace_tree) { @@ -2181,6 +2914,11 @@ } } + /* arg 1 = offset of current instruction */ + /* arg 2 = offset of the following instruction */ + ic->arg[1] = addr & 0xffc; + ic->arg[2] = (addr & 0xffc) + 4; + ic->arg[0] = (iword & 0x00ffffff) << 2; /* Sign-extend: */ if (ic->arg[0] & 0x02000000) @@ -2190,7 +2928,12 @@ */ ic->arg[0] = (int32_t)(ic->arg[0] + 8); - /* Special case: branch within the same page: */ + /* + * Special case: branch within the same page: + * + * arg[0] = addr of the arm_instr_call of the target + * arg[1] = addr of the next arm_instr_call. + */ { uint32_t mask_within_page = ((ARM_IC_ENTRIES_PER_PAGE-1) << @@ -2205,14 +2948,49 @@ cpu->cd.arm.cur_ic_page + ((new_pc & mask_within_page) >> ARM_INSTR_ALIGNMENT_SHIFT)); + ic->arg[1] = (size_t) ( + cpu->cd.arm.cur_ic_page + + (((addr & mask_within_page) + 4) >> + ARM_INSTR_ALIGNMENT_SHIFT)); + } else if (main_opcode == 0x0a) { + /* Special hack for a plain "b": */ + ic->arg[0] += ic->arg[1]; } } -#if 0 - /* Hm. This doesn't really increase performance. */ + if (main_opcode == 0xa && (condition_code <= 1 + || condition_code == 3 || condition_code == 8 + || condition_code == 12 || condition_code == 13)) + cpu->cd.arm.combination_check = arm_combine_cmps_b; + + if (iword == 0x1afffffc) + cpu->cd.arm.combination_check = arm_combine_strlen; + + /* Hm. Does this really increase performance? */ if (iword == 0x8afffffa) - cpu->combination_check = arm_combine_netbsd_cacheclean2; -#endif + cpu->cd.arm.combination_check = + arm_combine_netbsd_cacheclean2; + break; + + case 0xc: + case 0xd: + /* + * xxxx1100 0100nnnn ddddcccc oooommmm MCRR c,op,Rd,Rn,CRm + * xxxx1100 0101nnnn ddddcccc oooommmm MRRC c,op,Rd,Rn,CRm + */ + if ((iword & 0x0fe00000) == 0x0c400000) { + fatal("MCRR/MRRC: TODO\n"); + goto bad; + } + + /* + * TODO: LDC/STC + * + * For now, treat as Undefined instructions. This causes e.g. + * Linux/ARM to emulate these instructions (floating point). + */ + ic->f = cond_instr(und); + ic->arg[0] = addr & 0xfff; break; case 0xe: @@ -2226,14 +3004,19 @@ ic->f = cond_instr(cdp); } if (iword == 0xee070f9a) - cpu->combination_check = arm_combine_netbsd_cacheclean; + cpu->cd.arm.combination_check = + arm_combine_netbsd_cacheclean; break; case 0xf: /* SWI: */ /* Default handler: */ ic->f = cond_instr(swi); - if (iword == 0xef8c64be) { + ic->arg[0] = addr & 0xfff; + if (iword == 0xef8c64eb) { + /* Hack for rebooting a machine: */ + ic->f = instr(reboot); + } else if (iword == 0xef8c64be) { /* Hack for openfirmware prom emulation: */ ic->f = instr(openfirmware); } else if (cpu->machine->userland_emul != NULL) {