--- trunk/src/cpus/cpu_arm_instr.c 2007/10/08 16:19:01 16 +++ trunk/src/cpus/cpu_arm_instr.c 2007/10/08 16:20:58 32 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 Anders Gavare. All rights reserved. + * Copyright (C) 2005-2006 Anders Gavare. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -25,7 +25,7 @@ * SUCH DAMAGE. * * - * $Id: cpu_arm_instr.c,v 1.29 2005/10/11 03:31:28 debug Exp $ + * $Id: cpu_arm_instr.c,v 1.69 2006/09/09 09:04:32 debug Exp $ * * ARM instructions. * @@ -33,7 +33,77 @@ * (If no instruction was executed, then it should be decreased. If, say, 4 * instructions were combined into one function and executed, then it should * be increased by 3.) + * + * Note: cpu->pc is prefered over r[ARM_PC]. r[ARM_PC] is only used in a + * few places, and should always be kept in synch with the real + * program counter. + */ + + +/* #define GATHER_BDT_STATISTICS */ + + +#ifdef GATHER_BDT_STATISTICS +/* + * update_bdt_statistics(): + * + * Gathers statistics about load/store multiple instructions. + * + * NOTE/TODO: Perhaps it would be more memory efficient to swap the high + * and low parts of the instruction word, so that the lllllll bits become + * the high bits; this would cause fewer host pages to be used. Anyway, the + * current implementation works on hosts with lots of RAM. + * + * The resulting file, bdt_statistics.txt, should then be processed like + * this to give a new cpu_arm_multi.txt: + * + * uniq -c bdt_statistics.txt|sort -nr|head -256|cut -f 2 > cpu_arm_multi.txt */ +static void update_bdt_statistics(uint32_t iw) +{ + static FILE *f = NULL; + static long long *counts; + static char *counts_used; + static long long n = 0; + + if (f == NULL) { + size_t s = (1 << 24) * sizeof(long long); + f = fopen("bdt_statistics.txt", "w"); + if (f == NULL) { + fprintf(stderr, "update_bdt_statistics(): :-(\n"); + exit(1); + } + counts = zeroed_alloc(s); + counts_used = zeroed_alloc(65536); + } + + /* Drop the s-bit: xxxx100P USWLnnnn llllllll llllllll */ + iw = ((iw & 0x01800000) >> 1) | (iw & 0x003fffff); + + counts_used[iw & 0xffff] = 1; + counts[iw] ++; + + n ++; + if ((n % 500000) == 0) { + int i; + long long j; + fatal("[ update_bdt_statistics(): n = %lli ]\n", (long long) n); + fseek(f, 0, SEEK_SET); + for (i=0; i<0x1000000; i++) + if (counts_used[i & 0xffff] && counts[i] != 0) { + /* Recreate the opcode: */ + uint32_t opcode = ((i & 0x00c00000) << 1) + | (i & 0x003fffff) | 0x08000000; + for (j=0; jcd.arm.cpsr & ARM_FLAG_Z) \ + { if (cpu->cd.arm.flags & ARM_F_Z) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __ne(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (!(cpu->cd.arm.cpsr & ARM_FLAG_Z)) \ + { if (!(cpu->cd.arm.flags & ARM_F_Z)) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __cs(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (cpu->cd.arm.cpsr & ARM_FLAG_C) \ + { if (cpu->cd.arm.flags & ARM_F_C) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __cc(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (!(cpu->cd.arm.cpsr & ARM_FLAG_C)) \ + { if (!(cpu->cd.arm.flags & ARM_F_C)) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __mi(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (cpu->cd.arm.cpsr & ARM_FLAG_N) \ + { if (cpu->cd.arm.flags & ARM_F_N) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __pl(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (!(cpu->cd.arm.cpsr & ARM_FLAG_N)) \ + { if (!(cpu->cd.arm.flags & ARM_F_N)) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __vs(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (cpu->cd.arm.cpsr & ARM_FLAG_V) \ + { if (cpu->cd.arm.flags & ARM_F_V) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __vc(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (!(cpu->cd.arm.cpsr & ARM_FLAG_V)) \ + { if (!(cpu->cd.arm.flags & ARM_F_V)) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __hi(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (cpu->cd.arm.cpsr & ARM_FLAG_C && \ - !(cpu->cd.arm.cpsr & ARM_FLAG_Z)) \ + { if (condition_hi[cpu->cd.arm.flags]) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __ls(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (cpu->cd.arm.cpsr & ARM_FLAG_Z || \ - !(cpu->cd.arm.cpsr & ARM_FLAG_C)) \ + { if (!condition_hi[cpu->cd.arm.flags]) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __ge(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) == \ - ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0)) \ + { if (condition_ge[cpu->cd.arm.flags]) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __lt(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) != \ - ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0)) \ + { if (!condition_ge[cpu->cd.arm.flags]) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __gt(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) == \ - ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0) && \ - !(cpu->cd.arm.cpsr & ARM_FLAG_Z)) \ + { if (condition_gt[cpu->cd.arm.flags]) \ arm_instr_ ## n (cpu, ic); } \ void arm_instr_ ## n ## __le(struct cpu *cpu, \ struct arm_instr_call *ic) \ - { if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) != \ - ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0) || \ - (cpu->cd.arm.cpsr & ARM_FLAG_Z)) \ + { if (!condition_gt[cpu->cd.arm.flags]) \ arm_instr_ ## n (cpu, ic); } \ void (*arm_cond_instr_ ## n [16])(struct cpu *, \ struct arm_instr_call *) = { \ @@ -135,48 +205,44 @@ /* - * nop: Do nothing. * invalid: Invalid instructions end up here. */ -X(nop) { } X(invalid) { uint32_t low_pc; low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - fatal("Invalid ARM instruction: pc=0x%08x\n", (int)cpu->pc); + fatal("FATAL ERROR: An internal error occured in the ARM" + " dyntrans code. Please contact the author with detailed" + " repro steps on how to trigger this bug. pc = 0x%08"PRIx32"\n", + (uint32_t)cpu->pc); - cpu->running = 0; - cpu->running_translated = 0; - cpu->n_translated_instrs --; cpu->cd.arm.next_ic = ¬hing_call; } /* + * nop: Do nothing. + */ +X(nop) +{ +} + + +/* * b: Branch (to a different translated page) * * arg[0] = relative offset */ X(b) { - uint32_t low_pc; - - /* Calculate new PC from this instruction + arg[0] */ - low_pc = ((size_t)ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (int32_t)ic->arg[0]; - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc = (uint32_t)((cpu->pc & 0xfffff000) + (int32_t)ic->arg[0]); /* Find the new physical page and update the translation pointers: */ - arm_pc_to_pointers(cpu); + quick_pc_to_pointers(cpu); } Y(b) @@ -185,12 +251,82 @@ * b_samepage: Branch (to within the same translated page) * * arg[0] = pointer to new arm_instr_call + * arg[1] = pointer to the next instruction. + * + * NOTE: This instruction is manually inlined. */ -X(b_samepage) -{ +X(b_samepage) { cpu->cd.arm.next_ic = (struct arm_instr_call *) ic->arg[0]; } -Y(b_samepage) +X(b_samepage__eq) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_Z? 0 : 1]; +} +X(b_samepage__ne) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_Z? 1 : 0]; +} +X(b_samepage__cs) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_C? 0 : 1]; +} +X(b_samepage__cc) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_C? 1 : 0]; +} +X(b_samepage__mi) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_N? 0 : 1]; +} +X(b_samepage__pl) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_N? 1 : 0]; +} +X(b_samepage__vs) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_V? 0 : 1]; +} +X(b_samepage__vc) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[cpu->cd.arm.flags & ARM_F_V? 1 : 0]; +} +X(b_samepage__hi) { + cpu->cd.arm.next_ic = (condition_hi[cpu->cd.arm.flags])? + (struct arm_instr_call *) ic->arg[0] : + (struct arm_instr_call *) ic->arg[1]; +} +X(b_samepage__ls) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[condition_hi[cpu->cd.arm.flags]]; +} +X(b_samepage__ge) { + cpu->cd.arm.next_ic = (condition_ge[cpu->cd.arm.flags])? + (struct arm_instr_call *) ic->arg[0] : + (struct arm_instr_call *) ic->arg[1]; +} +X(b_samepage__lt) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[condition_ge[cpu->cd.arm.flags]]; +} +X(b_samepage__gt) { + cpu->cd.arm.next_ic = (condition_gt[cpu->cd.arm.flags])? + (struct arm_instr_call *) ic->arg[0] : + (struct arm_instr_call *) ic->arg[1]; +} +X(b_samepage__le) { + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic->arg[condition_gt[cpu->cd.arm.flags]]; +} +void (*arm_cond_instr_b_samepage[16])(struct cpu *, + struct arm_instr_call *) = { + arm_instr_b_samepage__eq, arm_instr_b_samepage__ne, + arm_instr_b_samepage__cs, arm_instr_b_samepage__cc, + arm_instr_b_samepage__mi, arm_instr_b_samepage__pl, + arm_instr_b_samepage__vs, arm_instr_b_samepage__vc, + arm_instr_b_samepage__hi, arm_instr_b_samepage__ls, + arm_instr_b_samepage__ge, arm_instr_b_samepage__lt, + arm_instr_b_samepage__gt, arm_instr_b_samepage__le, + arm_instr_b_samepage, arm_instr_nop }; /* @@ -200,7 +336,7 @@ */ X(bx) { - cpu->pc = cpu->cd.arm.r[ARM_PC] = reg(ic->arg[0]); + cpu->pc = reg(ic->arg[0]); if (cpu->pc & 1) { fatal("thumb: TODO\n"); exit(1); @@ -208,7 +344,7 @@ cpu->pc &= ~3; /* Find the new physical page and update the translation pointers: */ - arm_pc_to_pointers(cpu); + quick_pc_to_pointers(cpu); } Y(bx) @@ -220,7 +356,7 @@ */ X(bx_trace) { - cpu->pc = cpu->cd.arm.r[ARM_PC] = cpu->cd.arm.r[ARM_LR]; + cpu->pc = cpu->cd.arm.r[ARM_LR]; if (cpu->pc & 1) { fatal("thumb: TODO\n"); exit(1); @@ -230,7 +366,7 @@ cpu_functioncall_trace_return(cpu); /* Find the new physical page and update the translation pointers: */ - arm_pc_to_pointers(cpu); + quick_pc_to_pointers(cpu); } Y(bx_trace) @@ -242,23 +378,14 @@ */ X(bl) { - uint32_t lr, low_pc; - - /* Figure out what the return (link) address will be: */ - low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - lr = cpu->cd.arm.r[ARM_PC]; - lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - - /* Link: */ - cpu->cd.arm.r[ARM_LR] = lr; + uint32_t pc = ((uint32_t)cpu->pc & 0xfffff000) + (int32_t)ic->arg[1]; + cpu->cd.arm.r[ARM_LR] = pc + 4; /* Calculate new PC from this instruction + arg[0] */ - cpu->pc = cpu->cd.arm.r[ARM_PC] = lr - 4 + (int32_t)ic->arg[0]; + cpu->pc = pc + (int32_t)ic->arg[0]; /* Find the new physical page and update the translation pointers: */ - arm_pc_to_pointers(cpu); + quick_pc_to_pointers(cpu); } Y(bl) @@ -270,19 +397,9 @@ */ X(blx) { - uint32_t lr, low_pc; - - /* Figure out what the return (link) address will be: */ - low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - lr = cpu->cd.arm.r[ARM_PC]; - lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - - /* Link: */ + uint32_t lr = ((uint32_t)cpu->pc & 0xfffff000) + (int32_t)ic->arg[2]; cpu->cd.arm.r[ARM_LR] = lr; - - cpu->pc = cpu->cd.arm.r[ARM_PC] = reg(ic->arg[0]); + cpu->pc = reg(ic->arg[0]); if (cpu->pc & 1) { fatal("thumb: TODO\n"); exit(1); @@ -290,7 +407,7 @@ cpu->pc &= ~3; /* Find the new physical page and update the translation pointers: */ - arm_pc_to_pointers(cpu); + quick_pc_to_pointers(cpu); } Y(blx) @@ -302,25 +419,16 @@ */ X(bl_trace) { - uint32_t lr, low_pc; - - /* Figure out what the return (link) address will be: */ - low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - lr = cpu->cd.arm.r[ARM_PC]; - lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - - /* Link: */ - cpu->cd.arm.r[ARM_LR] = lr; + uint32_t pc = ((uint32_t)cpu->pc & 0xfffff000) + (int32_t)ic->arg[1]; + cpu->cd.arm.r[ARM_LR] = pc + 4; /* Calculate new PC from this instruction + arg[0] */ - cpu->pc = cpu->cd.arm.r[ARM_PC] = lr - 4 + (int32_t)ic->arg[0]; + cpu->pc = pc + (int32_t)ic->arg[0]; cpu_functioncall_trace(cpu, cpu->pc); /* Find the new physical page and update the translation pointers: */ - arm_pc_to_pointers(cpu); + quick_pc_to_pointers(cpu); } Y(bl_trace) @@ -332,19 +440,8 @@ */ X(bl_samepage) { - uint32_t lr, low_pc; - - /* Figure out what the return (link) address will be: */ - low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - lr = cpu->cd.arm.r[ARM_PC]; - lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - - /* Link: */ - cpu->cd.arm.r[ARM_LR] = lr; - - /* Branch: */ + cpu->cd.arm.r[ARM_LR] = + ((uint32_t)cpu->pc & 0xfffff000) + (int32_t)ic->arg[2]; cpu->cd.arm.next_ic = (struct arm_instr_call *) ic->arg[0]; } Y(bl_samepage) @@ -357,33 +454,56 @@ */ X(bl_samepage_trace) { - uint32_t tmp_pc, lr, low_pc; + uint32_t low_pc, lr = (cpu->pc & 0xfffff000) + ic->arg[2]; - /* Figure out what the return (link) address will be: */ - low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - lr = cpu->cd.arm.r[ARM_PC]; - lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - - /* Link: */ + /* Link and branch: */ cpu->cd.arm.r[ARM_LR] = lr; - - /* Branch: */ cpu->cd.arm.next_ic = (struct arm_instr_call *) ic->arg[0]; + /* Synchronize the program counter: */ low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - tmp_pc = cpu->cd.arm.r[ARM_PC]; - tmp_pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - tmp_pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu_functioncall_trace(cpu, tmp_pc); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); + + /* ... and show trace: */ + cpu_functioncall_trace(cpu, cpu->pc); } Y(bl_samepage_trace) /* + * clz: Count leading zeroes. + * + * arg[0] = ptr to rm + * arg[1] = ptr to rd + */ +X(clz) +{ + uint32_t rm = reg(ic->arg[0]); + int i = 32, n = 0, j; + while (i>0) { + if (rm & 0xff000000) { + for (j=0; j<8; j++) { + if (rm & 0x80000000) + break; + n ++; + rm <<= 1; + } + break; + } else { + rm <<= 8; + i -= 8; + n += 8; + } + } + reg(ic->arg[1]) = n; +} +Y(clz) + + +/* * mul: Multiplication * * arg[0] = ptr to rd @@ -397,12 +517,13 @@ Y(mul) X(muls) { - uint32_t result = reg(ic->arg[1]) * reg(ic->arg[2]); - cpu->cd.arm.cpsr &= ~(ARM_FLAG_Z | ARM_FLAG_N); + uint32_t result; + result = reg(ic->arg[1]) * reg(ic->arg[2]); + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); if (result == 0) - cpu->cd.arm.cpsr |= ARM_FLAG_Z; + cpu->cd.arm.flags |= ARM_F_Z; if (result & 0x80000000) - cpu->cd.arm.cpsr |= ARM_FLAG_N; + cpu->cd.arm.flags |= ARM_F_N; reg(ic->arg[0]) = result; } Y(muls) @@ -417,8 +538,9 @@ { /* xxxx0000 00ASdddd nnnnssss 1001mmmm (Rd,Rm,Rs[,Rn]) */ uint32_t iw = ic->arg[0]; - int rd = (iw >> 16) & 15, rn = (iw >> 12) & 15, - rs = (iw >> 8) & 15, rm = iw & 15; + int rd, rs, rn, rm; + rd = (iw >> 16) & 15; rn = (iw >> 12) & 15, + rs = (iw >> 8) & 15; rm = iw & 15; cpu->cd.arm.r[rd] = cpu->cd.arm.r[rm] * cpu->cd.arm.r[rs] + cpu->cd.arm.r[rn]; } @@ -427,15 +549,16 @@ { /* xxxx0000 00ASdddd nnnnssss 1001mmmm (Rd,Rm,Rs[,Rn]) */ uint32_t iw = ic->arg[0]; - int rd = (iw >> 16) & 15, rn = (iw >> 12) & 15, - rs = (iw >> 8) & 15, rm = iw & 15; + int rd, rs, rn, rm; + rd = (iw >> 16) & 15; rn = (iw >> 12) & 15, + rs = (iw >> 8) & 15; rm = iw & 15; cpu->cd.arm.r[rd] = cpu->cd.arm.r[rm] * cpu->cd.arm.r[rs] + cpu->cd.arm.r[rn]; - cpu->cd.arm.cpsr &= ~(ARM_FLAG_Z | ARM_FLAG_N); + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); if (cpu->cd.arm.r[rd] == 0) - cpu->cd.arm.cpsr |= ARM_FLAG_Z; + cpu->cd.arm.flags |= ARM_F_Z; if (cpu->cd.arm.r[rd] & 0x80000000) - cpu->cd.arm.cpsr |= ARM_FLAG_N; + cpu->cd.arm.flags |= ARM_F_N; } Y(mlas) @@ -448,9 +571,10 @@ X(mull) { /* xxxx0000 1UAShhhh llllssss 1001mmmm */ - uint32_t iw = ic->arg[0]; - int u_bit = (iw >> 22) & 1, a_bit = (iw >> 21) & 1; - uint64_t tmp = cpu->cd.arm.r[iw & 15]; + uint32_t iw; uint64_t tmp; int u_bit, a_bit; + iw = ic->arg[0]; + u_bit = iw & 0x00400000; a_bit = iw & 0x00200000; + tmp = cpu->cd.arm.r[iw & 15]; if (u_bit) tmp = (int64_t)(int32_t)tmp * (int64_t)(int32_t)cpu->cd.arm.r[(iw >> 8) & 15]; @@ -471,6 +595,39 @@ /* + * smulXY: 16-bit * 16-bit multiplication (32-bit result) + * + * arg[0] = ptr to rm + * arg[1] = ptr to rs + * arg[2] = ptr to rd + */ +X(smulbb) +{ + reg(ic->arg[2]) = (int32_t)(int16_t)reg(ic->arg[0]) * + (int32_t)(int16_t)reg(ic->arg[1]); +} +Y(smulbb) +X(smultb) +{ + reg(ic->arg[2]) = (int32_t)(int16_t)(reg(ic->arg[0]) >> 16) * + (int32_t)(int16_t)reg(ic->arg[1]); +} +Y(smultb) +X(smulbt) +{ + reg(ic->arg[2]) = (int32_t)(int16_t)reg(ic->arg[0]) * + (int32_t)(int16_t)(reg(ic->arg[1]) >> 16); +} +Y(smulbt) +X(smultt) +{ + reg(ic->arg[2]) = (int32_t)(int16_t)(reg(ic->arg[0]) >> 16) * + (int32_t)(int16_t)(reg(ic->arg[1]) >> 16); +} +Y(smultt) + + +/* * mov_reg_reg: Move a register to another. * * arg[0] = ptr to source register @@ -484,19 +641,34 @@ /* + * mov_reg_pc: Move the PC register to a normal register. + * + * arg[0] = offset compared to start of current page + 8 + * arg[1] = ptr to destination register + */ +X(mov_reg_pc) +{ + reg(ic->arg[1]) = ((uint32_t)cpu->pc&0xfffff000) + (int32_t)ic->arg[0]; +} +Y(mov_reg_pc) + + +/* * ret_trace: "mov pc,lr" with trace enabled + * ret: "mov pc,lr" without trace enabled * * arg[0] = ignored */ X(ret_trace) { - uint32_t old_pc = cpu->cd.arm.r[ARM_PC]; - uint32_t mask_within_page = ((ARM_IC_ENTRIES_PER_PAGE-1) + uint32_t old_pc, mask_within_page; + old_pc = cpu->pc; + mask_within_page = ((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT) | ((1 << ARM_INSTR_ALIGNMENT_SHIFT) - 1); /* Update the PC register: */ - cpu->pc = cpu->cd.arm.r[ARM_PC] = cpu->cd.arm.r[ARM_LR]; + cpu->pc = cpu->cd.arm.r[ARM_LR]; cpu_functioncall_trace_return(cpu); @@ -509,10 +681,16 @@ ((cpu->pc & mask_within_page) >> ARM_INSTR_ALIGNMENT_SHIFT); } else { /* Find the new physical page and update pointers: */ - arm_pc_to_pointers(cpu); + quick_pc_to_pointers(cpu); } } Y(ret_trace) +X(ret) +{ + cpu->pc = cpu->cd.arm.r[ARM_LR]; + quick_pc_to_pointers(cpu); +} +Y(ret) /* @@ -533,12 +711,17 @@ (ic->arg[0] & ARM_FLAG_MODE)); uint32_t new_value = ic->arg[0]; + cpu->cd.arm.cpsr &= 0x0fffffff; + cpu->cd.arm.cpsr |= (cpu->cd.arm.flags << 28); + if (switch_register_banks) arm_save_register_bank(cpu); cpu->cd.arm.cpsr &= ~mask; cpu->cd.arm.cpsr |= (new_value & mask); + cpu->cd.arm.flags = cpu->cd.arm.cpsr >> 28; + if (switch_register_banks) arm_load_register_bank(cpu); } @@ -580,11 +763,10 @@ /* Synchronize the program counter: */ uint32_t old_pc, low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - old_pc = cpu->pc = cpu->cd.arm.r[ARM_PC]; -printf("msr_spsr: old pc = 0x%08x\n", old_pc); + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); + old_pc = cpu->pc; + printf("msr_spsr: old pc = 0x%08"PRIx32"\n", old_pc); } exit(1); } @@ -605,13 +787,15 @@ */ X(mrs) { + cpu->cd.arm.cpsr &= 0x0fffffff; + cpu->cd.arm.cpsr |= (cpu->cd.arm.flags << 28); reg(ic->arg[0]) = cpu->cd.arm.cpsr; } Y(mrs) /* - * mrs: Move from status/flag register to a normal register. + * mrs: Move from saved status/flag register to a normal register. * * arg[0] = pointer to rd */ @@ -640,24 +824,18 @@ * arg[0] = copy of the instruction word */ X(mcr_mrc) { - uint32_t low_pc; - low_pc = ((size_t)ic - (size_t) + uint32_t low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); arm_mcr_mrc(cpu, ic->arg[0]); } Y(mcr_mrc) X(cdp) { - uint32_t low_pc; - low_pc = ((size_t)ic - (size_t) + uint32_t low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); arm_cdp(cpu, ic->arg[0]); } Y(cdp) @@ -668,11 +846,23 @@ */ X(openfirmware) { + /* TODO: sync pc? */ of_emul(cpu); - cpu->pc = cpu->cd.arm.r[ARM_PC] = cpu->cd.arm.r[ARM_LR]; + cpu->pc = cpu->cd.arm.r[ARM_LR]; if (cpu->machine->show_trace_tree) cpu_functioncall_trace_return(cpu); - arm_pc_to_pointers(cpu); + quick_pc_to_pointers(cpu); +} + + +/* + * reboot: + */ +X(reboot) +{ + cpu->running = 0; + cpu->n_translated_instrs --; + cpu->cd.arm.next_ic = ¬hing_call; } @@ -686,21 +876,20 @@ /* Synchronize the program counter: */ uint32_t old_pc, low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - old_pc = cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); + old_pc = cpu->pc; useremul_syscall(cpu, ic->arg[0]); if (!cpu->running) { - cpu->running_translated = 0; cpu->n_translated_instrs --; cpu->cd.arm.next_ic = ¬hing_call; } else if (cpu->pc != old_pc) { /* PC was changed by the SWI call. Find the new physical page and update the translation pointers: */ - arm_pc_to_pointers(cpu); + quick_pc_to_pointers(cpu); } } Y(swi_useremul) @@ -711,20 +900,28 @@ */ X(swi) { - /* Synchronize the program counter: */ - uint32_t low_pc = ((size_t)ic - (size_t) - cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; - + /* Synchronize the program counter first: */ + cpu->pc &= 0xfffff000; + cpu->pc += ic->arg[0]; arm_exception(cpu, ARM_EXCEPTION_SWI); } Y(swi) /* + * und: Undefined instruction. + */ +X(und) +{ + /* Synchronize the program counter first: */ + cpu->pc &= 0xfffff000; + cpu->pc += ic->arg[0]; + arm_exception(cpu, ARM_EXCEPTION_UND); +} +Y(und) + + +/* * swp, swpb: Swap (word or byte). * * arg[0] = ptr to rd @@ -735,13 +932,12 @@ { uint32_t addr = reg(ic->arg[2]), data, data2; unsigned char d[4]; + /* Synchronize the program counter: */ uint32_t low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); if (!cpu->memory_rw(cpu, cpu->mem, addr, d, sizeof(d), MEM_READ, CACHE_DATA)) { @@ -763,13 +959,12 @@ { uint32_t addr = reg(ic->arg[2]), data; unsigned char d[1]; + /* Synchronize the program counter: */ uint32_t low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); if (!cpu->memory_rw(cpu, cpu->mem, addr, d, sizeof(d), MEM_READ, CACHE_DATA)) { @@ -790,8 +985,15 @@ extern void (*arm_load_store_instr[1024])(struct cpu *, struct arm_instr_call *); +X(store_w1_word_u1_p0_imm); X(store_w0_byte_u1_p0_imm); X(store_w0_word_u1_p0_imm); +X(store_w0_word_u1_p1_imm); +X(load_w1_word_u1_p0_imm); +X(load_w0_word_u1_p0_imm); +X(load_w0_byte_u1_p1_imm); +X(load_w0_byte_u1_p1_reg); +X(load_w1_byte_u1_p1_imm); extern void (*arm_load_store_instr_pc[1024])(struct cpu *, struct arm_instr_call *); @@ -803,14 +1005,24 @@ struct arm_instr_call *); extern uint32_t (*arm_r[8192])(struct cpu *, struct arm_instr_call *); +extern void arm_r_r3_t0_c0(void); extern void (*arm_dpi_instr[2 * 2 * 2 * 16 * 16])(struct cpu *, struct arm_instr_call *); +extern void (*arm_dpi_instr_regshort[2 * 16 * 16])(struct cpu *, + struct arm_instr_call *); X(cmps); +X(teqs); +X(tsts); X(sub); +X(add); X(subs); +X(eor_regshort); +X(cmps_regshort); +#include "cpu_arm_instr_misc.c" + /* * bdt_load: Block Data Transfer, Load @@ -832,13 +1044,16 @@ int i, return_flag = 0; uint32_t new_values[16]; +#ifdef GATHER_BDT_STATISTICS + if (!s_bit) + update_bdt_statistics(iw); +#endif + /* Synchronize the program counter: */ low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << - ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); if (s_bit) { /* Load to USR registers: */ @@ -974,6 +1189,7 @@ arm_save_register_bank(cpu); cpu->cd.arm.cpsr = new_cpsr; + cpu->cd.arm.flags = cpu->cd.arm.cpsr >> 28; if (switch_register_banks) arm_load_register_bank(cpu); @@ -981,8 +1197,7 @@ /* NOTE: Special case: Loading the PC */ if (iw & 0x8000) { - cpu->cd.arm.r[ARM_PC] &= ~3; - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc = cpu->cd.arm.r[ARM_PC] & 0xfffffffc; if (cpu->machine->show_trace_tree) cpu_functioncall_trace_return(cpu); /* TODO: There is no need to update the @@ -990,7 +1205,7 @@ same page! */ /* Find the new physical page and update the translation pointers: */ - arm_pc_to_pointers(cpu); + quick_pc_to_pointers(cpu); } } Y(bdt_load) @@ -1015,13 +1230,16 @@ int w_bit = iw & 0x00200000; int i; +#ifdef GATHER_BDT_STATISTICS + if (!s_bit) + update_bdt_statistics(iw); +#endif + /* Synchronize the program counter: */ low_pc = ((size_t)ic - (size_t) cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << - ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); for (i=(u_bit? 0 : 15); i>=0 && i<=15; i+=(u_bit? 1 : -1)) { if (!((iw >> i) & 1)) { @@ -1050,8 +1268,9 @@ } } + /* NOTE/TODO: 8 vs 12 on some ARMs */ if (i == ARM_PC) - value += 12; /* NOTE/TODO: 8 on some ARMs */ + value = cpu->pc + 12; if (p_bit) { if (u_bit) @@ -1108,151 +1327,685 @@ Y(bdt_store) +/* Various load/store multiple instructions: */ +extern uint32_t *multi_opcode[256]; +extern void (**multi_opcode_f[256])(struct cpu *, struct arm_instr_call *); +X(multi_0x08b15018); +X(multi_0x08ac000c__ge); +X(multi_0x08a05018); + + /*****************************************************************************/ /* - * fill_loop_test: + * netbsd_memset: * - * A byte-fill loop. Fills at most one page at a time. If the page was not - * in the host_store table, then the original sequence (beginning with - * cmps rZ,#0) is executed instead. + * The core of a NetBSD/arm memset. * - * L: cmps rZ,#0 ic[0] - * strb rX,[rY],#1 ic[1] - * sub rZ,rZ,#1 ic[2] - * bgt L ic[3] - * - * A maximum of 4 pages are filled before returning. + * f01bc420: e25XX080 subs rX,rX,#0x80 + * f01bc424: a8ac000c stmgeia ip!,{r2,r3} (16 of these) + * .. + * f01bc464: caffffed bgt 0xf01bc420 */ -X(fill_loop_test) +X(netbsd_memset) { - int max_pages_left = 4; - uint32_t addr, a, n, ofs, maxlen; - uint32_t *rzp = (uint32_t *)(size_t)ic[0].arg[0]; unsigned char *page; + uint32_t addr; + + do { + addr = cpu->cd.arm.r[ARM_IP]; + + instr(subs)(cpu, ic); + + if (((cpu->cd.arm.flags & ARM_F_N)?1:0) != + ((cpu->cd.arm.flags & ARM_F_V)?1:0)) { + cpu->n_translated_instrs += 16; + /* Skip the store multiples: */ + cpu->cd.arm.next_ic = &ic[17]; + return; + } + + /* Crossing a page boundary? Then continue non-combined. */ + if ((addr & 0xfff) + 128 > 0x1000) + return; + + /* R2/R3 non-zero? Not allowed here. */ + if (cpu->cd.arm.r[2] != 0 || cpu->cd.arm.r[3] != 0) + return; + + /* printf("addr = 0x%08x\n", addr); */ + + page = cpu->cd.arm.host_store[addr >> 12]; + /* No page translation? Continue non-combined. */ + if (page == NULL) + return; + + /* Clear: */ + memset(page + (addr & 0xfff), 0, 128); + cpu->cd.arm.r[ARM_IP] = addr + 128; + cpu->n_translated_instrs += 16; + + /* Branch back if greater: */ + cpu->n_translated_instrs += 1; + } while (((cpu->cd.arm.flags & ARM_F_N)?1:0) == + ((cpu->cd.arm.flags & ARM_F_V)?1:0) && + !(cpu->cd.arm.flags & ARM_F_Z)); + + /* Continue at the instruction after the bgt: */ + cpu->cd.arm.next_ic = &ic[18]; +} + + +/* + * netbsd_memcpy: + * + * The core of a NetBSD/arm memcpy. + * + * f01bc530: e8b15018 ldmia r1!,{r3,r4,ip,lr} + * f01bc534: e8a05018 stmia r0!,{r3,r4,ip,lr} + * f01bc538: e8b15018 ldmia r1!,{r3,r4,ip,lr} + * f01bc53c: e8a05018 stmia r0!,{r3,r4,ip,lr} + * f01bc540: e2522020 subs r2,r2,#0x20 + * f01bc544: aafffff9 bge 0xf01bc530 + */ +X(netbsd_memcpy) +{ + unsigned char *page_0, *page_1; + uint32_t addr_r0, addr_r1; + + do { + addr_r0 = cpu->cd.arm.r[0]; + addr_r1 = cpu->cd.arm.r[1]; + + /* printf("addr_r0 = %08x r1 = %08x\n", addr_r0, addr_r1); */ + + /* Crossing a page boundary? Then continue non-combined. */ + if ((addr_r0 & 0xfff) + 32 > 0x1000 || + (addr_r1 & 0xfff) + 32 > 0x1000) { + instr(multi_0x08b15018)(cpu, ic); + return; + } + + page_0 = cpu->cd.arm.host_store[addr_r0 >> 12]; + page_1 = cpu->cd.arm.host_store[addr_r1 >> 12]; + + /* No page translations? Continue non-combined. */ + if (page_0 == NULL || page_1 == NULL) { + instr(multi_0x08b15018)(cpu, ic); + return; + } + + memcpy(page_0 + (addr_r0 & 0xfff), + page_1 + (addr_r1 & 0xfff), 32); + cpu->cd.arm.r[0] = addr_r0 + 32; + cpu->cd.arm.r[1] = addr_r1 + 32; + + cpu->n_translated_instrs += 4; + + instr(subs)(cpu, ic + 4); + cpu->n_translated_instrs ++; + + /* Loop while greater or equal: */ + cpu->n_translated_instrs ++; + } while (((cpu->cd.arm.flags & ARM_F_N)?1:0) == + ((cpu->cd.arm.flags & ARM_F_V)?1:0)); + + /* Continue at the instruction after the bge: */ + cpu->cd.arm.next_ic = &ic[6]; + cpu->n_translated_instrs --; +} + + +/* + * netbsd_cacheclean: + * + * The core of a NetBSD/arm cache clean routine, variant 1: + * + * f015f88c: e4902020 ldr r2,[r0],#32 + * f015f890: e2511020 subs r1,r1,#0x20 + * f015f894: 1afffffc bne 0xf015f88c + * f015f898: ee070f9a mcr 15,0,r0,cr7,cr10,4 + */ +X(netbsd_cacheclean) +{ + uint32_t r1 = cpu->cd.arm.r[1]; + cpu->n_translated_instrs += ((r1 >> 5) * 3); + cpu->cd.arm.r[0] += r1; + cpu->cd.arm.r[1] = 0; + cpu->cd.arm.next_ic = &ic[4]; +} + + +/* + * netbsd_cacheclean2: + * + * The core of a NetBSD/arm cache clean routine, variant 2: + * + * f015f93c: ee070f3a mcr 15,0,r0,cr7,cr10,1 + * f015f940: ee070f36 mcr 15,0,r0,cr7,cr6,1 + * f015f944: e2800020 add r0,r0,#0x20 + * f015f948: e2511020 subs r1,r1,#0x20 + * f015f94c: 8afffffa bhi 0xf015f93c + */ +X(netbsd_cacheclean2) +{ + cpu->n_translated_instrs += ((cpu->cd.arm.r[1] >> 5) * 5) - 1; + cpu->cd.arm.next_ic = &ic[5]; +} + + +/* + * netbsd_scanc: + * + * f01bccbc: e5d13000 ldrb r3,[r1] + * f01bccc0: e7d23003 ldrb r3,[r2,r3] + * f01bccc4: e113000c tsts r3,ip + */ +X(netbsd_scanc) +{ + unsigned char *page = cpu->cd.arm.host_load[cpu->cd.arm.r[1] >> 12]; + uint32_t t; -restart_loop: - addr = reg(ic[1].arg[0]); - page = cpu->cd.arm.host_store[addr >> 12]; if (page == NULL) { - instr(cmps)(cpu, ic); + instr(load_w0_byte_u1_p1_imm)(cpu, ic); return; } - n = reg(rzp) + 1; - ofs = addr & 0xfff; - maxlen = 4096 - ofs; - if (n > maxlen) - n = maxlen; + t = page[cpu->cd.arm.r[1] & 0xfff]; + t += cpu->cd.arm.r[2]; + page = cpu->cd.arm.host_load[t >> 12]; - /* printf("x = %x, n = %i\n", reg(ic[1].arg[2]), n); */ - memset(page + ofs, reg(ic[1].arg[2]), n); + if (page == NULL) { + instr(load_w0_byte_u1_p1_imm)(cpu, ic); + return; + } - reg(ic[1].arg[0]) = addr + n; + cpu->cd.arm.r[3] = page[t & 0xfff]; - reg(rzp) -= n; - cpu->n_translated_instrs += (4 * n); + t = cpu->cd.arm.r[3] & cpu->cd.arm.r[ARM_IP]; + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); + if (t == 0) + cpu->cd.arm.flags |= ARM_F_Z; - a = reg(rzp); + cpu->n_translated_instrs += 2; + cpu->cd.arm.next_ic = &ic[3]; +} - cpu->cd.arm.cpsr &= - ~(ARM_FLAG_Z | ARM_FLAG_N | ARM_FLAG_V | ARM_FLAG_C); - if (a != 0) - cpu->cd.arm.cpsr |= ARM_FLAG_C; - else - cpu->cd.arm.cpsr |= ARM_FLAG_Z; - if ((int32_t)a < 0) - cpu->cd.arm.cpsr |= ARM_FLAG_N; - if (max_pages_left-- > 0 && (int32_t)a > 0) - goto restart_loop; +/* + * strlen: + * + * S: e5f03001 ldrb rY,[rX,#1]! + * e3530000 cmps rY,#0 + * 1afffffc bne S + */ +X(strlen) +{ + unsigned int n_loops = 0; + uint32_t rY, rX = reg(ic[0].arg[0]); + unsigned char *p; + + do { + rX ++; + p = cpu->cd.arm.host_load[rX >> 12]; + if (p == NULL) { + cpu->n_translated_instrs += (n_loops * 3); + instr(load_w1_byte_u1_p1_imm)(cpu, ic); + return; + } - cpu->n_translated_instrs --; + rY = reg(ic[0].arg[2]) = p[rX & 0xfff]; /* load */ + reg(ic[0].arg[0]) = rX; /* writeback */ + n_loops ++; - if ((int32_t)a > 0) - cpu->cd.arm.next_ic = ic; - else - cpu->cd.arm.next_ic = &ic[4]; + /* Compare rY to zero: */ + cpu->cd.arm.flags = ARM_F_C; + if (rY == 0) + cpu->cd.arm.flags |= ARM_F_Z; + } while (rY != 0); + + cpu->n_translated_instrs += (n_loops * 3) - 1; + cpu->cd.arm.next_ic = &ic[3]; } /* - * fill_loop_test2: + * xchg: * - * A word-fill loop. Fills at most one page at a time. If the page was not - * in the host_store table, then the original sequence (beginning with - * cmps rZ,#0) is executed instead. - * - * L: str rX,[rY],#4 ic[0] - * subs rZ,rZ,#4 ic[1] - * bgt L ic[2] + * e02YX00X eor rX,rY,rX + * e02XY00Y eor rY,rX,rY + * e02YX00X eor rX,rY,rX + */ +X(xchg) +{ + uint32_t tmp = reg(ic[0].arg[0]); + cpu->n_translated_instrs += 2; + cpu->cd.arm.next_ic = &ic[3]; + reg(ic[0].arg[0]) = reg(ic[1].arg[0]); + reg(ic[1].arg[0]) = tmp; +} + + +/* + * netbsd_copyin: * - * A maximum of 5 pages are filled before returning. + * e4b0a004 ldrt sl,[r0],#4 + * e4b0b004 ldrt fp,[r0],#4 + * e4b06004 ldrt r6,[r0],#4 + * e4b07004 ldrt r7,[r0],#4 + * e4b08004 ldrt r8,[r0],#4 + * e4b09004 ldrt r9,[r0],#4 */ -X(fill_loop_test2) +X(netbsd_copyin) { - int max_pages_left = 5; - unsigned char x1,x2,x3,x4; - uint32_t addr, a, n, x, ofs, maxlen; - uint32_t *rzp = (uint32_t *)(size_t)ic[1].arg[0]; - unsigned char *page; + uint32_t r0 = cpu->cd.arm.r[0], ofs = (r0 & 0xffc), index = r0 >> 12; + unsigned char *p = cpu->cd.arm.host_load[index]; + uint32_t *p32 = (uint32_t *) p, *q32; + int ok = cpu->cd.arm.is_userpage[index >> 5] & (1 << (index & 31)); - x = reg(ic[0].arg[2]); - x1 = x; x2 = x >> 8; x3 = x >> 16; x4 = x >> 24; - if (x1 != x2 || x1 != x3 || x1 != x4) { - instr(store_w0_word_u1_p0_imm)(cpu, ic); + if (ofs > 0x1000 - 6*4 || !ok || p == NULL) { + instr(load_w1_word_u1_p0_imm)(cpu, ic); return; } + q32 = &cpu->cd.arm.r[6]; + ofs >>= 2; + q32[0] = p32[ofs+2]; + q32[1] = p32[ofs+3]; + q32[2] = p32[ofs+4]; + q32[3] = p32[ofs+5]; + q32[4] = p32[ofs+0]; + q32[5] = p32[ofs+1]; + cpu->cd.arm.r[0] = r0 + 24; + cpu->n_translated_instrs += 5; + cpu->cd.arm.next_ic = &ic[6]; +} -restart_loop: - addr = reg(ic[0].arg[0]); - page = cpu->cd.arm.host_store[addr >> 12]; - if (page == NULL || (addr & 3) != 0) { - instr(store_w0_word_u1_p0_imm)(cpu, ic); + +/* + * netbsd_copyout: + * + * e4a18004 strt r8,[r1],#4 + * e4a19004 strt r9,[r1],#4 + * e4a1a004 strt sl,[r1],#4 + * e4a1b004 strt fp,[r1],#4 + * e4a16004 strt r6,[r1],#4 + * e4a17004 strt r7,[r1],#4 + */ +X(netbsd_copyout) +{ + uint32_t r1 = cpu->cd.arm.r[1], ofs = (r1 & 0xffc), index = r1 >> 12; + unsigned char *p = cpu->cd.arm.host_store[index]; + uint32_t *p32 = (uint32_t *) p, *q32; + int ok = cpu->cd.arm.is_userpage[index >> 5] & (1 << (index & 31)); + + if (ofs > 0x1000 - 6*4 || !ok || p == NULL) { + instr(store_w1_word_u1_p0_imm)(cpu, ic); return; } + q32 = &cpu->cd.arm.r[6]; + ofs >>= 2; + p32[ofs ] = q32[2]; + p32[ofs+1] = q32[3]; + p32[ofs+2] = q32[4]; + p32[ofs+3] = q32[5]; + p32[ofs+4] = q32[0]; + p32[ofs+5] = q32[1]; + cpu->cd.arm.r[1] = r1 + 24; + cpu->n_translated_instrs += 5; + cpu->cd.arm.next_ic = &ic[6]; +} - /* printf("addr = 0x%08x, page = %p\n", addr, page); - printf("*rzp = 0x%08x\n", reg(rzp)); */ - n = reg(rzp) / 4; - if (n == 0) - n++; - /* n = nr of _words_ */ - ofs = addr & 0xfff; - maxlen = 4096 - ofs; - if (n*4 > maxlen) - n = maxlen / 4; +/* + * cmps by 0, followed by beq (inside the same page): + */ +X(cmps0_beq_samepage) +{ + uint32_t a = reg(ic->arg[0]); + cpu->n_translated_instrs ++; + if (a == 0) { + cpu->cd.arm.flags = ARM_F_Z | ARM_F_C; + } else { + /* Semi-ugly hack which sets the negative-bit if a < 0: */ + cpu->cd.arm.flags = ARM_F_C | ((a >> 28) & 8); + } + if (a == 0) + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + else + cpu->cd.arm.next_ic = &ic[2]; +} - /* printf("x = %x, n = %i\n", x1, n); */ - memset(page + ofs, x1, n * 4); - reg(ic[0].arg[0]) = addr + n * 4; +/* + * cmps followed by beq (inside the same page): + */ +X(cmps_beq_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if (c == 0) { + cpu->cd.arm.flags |= ARM_F_Z; + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + } else { + cpu->cd.arm.next_ic = &ic[2]; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + } +} + - reg(rzp) -= (n * 4); - cpu->n_translated_instrs += (3 * n); +/* + * cmps followed by beq (not the same page): + */ +X(cmps_0_beq) +{ + uint32_t a = reg(ic->arg[0]); + cpu->n_translated_instrs ++; + if (a == 0) { + cpu->cd.arm.flags = ARM_F_Z | ARM_F_C; + cpu->pc = (uint32_t)(((uint32_t)cpu->pc & 0xfffff000) + + (int32_t)ic[1].arg[0]); + quick_pc_to_pointers(cpu); + } else { + /* Semi-ugly hack which sets the negative-bit if a < 0: */ + cpu->cd.arm.flags = ARM_F_C | ((a >> 28) & 8); + cpu->cd.arm.next_ic = &ic[2]; + } +} +X(cmps_pos_beq) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if ((int32_t)a < 0 && (int32_t)c >= 0) + cpu->cd.arm.flags |= ARM_F_V; + if (c == 0) { + cpu->cd.arm.flags |= ARM_F_Z; + cpu->pc = (uint32_t)(((uint32_t)cpu->pc & 0xfffff000) + + (int32_t)ic[1].arg[0]); + quick_pc_to_pointers(cpu); + } else { + cpu->cd.arm.next_ic = &ic[2]; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + } +} +X(cmps_neg_beq) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if ((int32_t)a >= 0 && (int32_t)c < 0) + cpu->cd.arm.flags |= ARM_F_V; + if (c == 0) { + cpu->cd.arm.flags |= ARM_F_Z; + cpu->pc = (uint32_t)(((uint32_t)cpu->pc & 0xfffff000) + + (int32_t)ic[1].arg[0]); + quick_pc_to_pointers(cpu); + } else { + cpu->cd.arm.next_ic = &ic[2]; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + } +} - a = reg(rzp); - cpu->cd.arm.cpsr &= - ~(ARM_FLAG_Z | ARM_FLAG_N | ARM_FLAG_V | ARM_FLAG_C); - if (a != 0) - cpu->cd.arm.cpsr |= ARM_FLAG_C; +/* + * cmps by 0, followed by bne (inside the same page): + */ +X(cmps0_bne_samepage) +{ + uint32_t a = reg(ic->arg[0]); + cpu->n_translated_instrs ++; + if (a == 0) { + cpu->cd.arm.flags = ARM_F_Z | ARM_F_C; + } else { + /* Semi-ugly hack which sets the negative-bit if a < 0: */ + cpu->cd.arm.flags = ARM_F_C | ((a >> 28) & 8); + } + if (a == 0) + cpu->cd.arm.next_ic = &ic[2]; else - cpu->cd.arm.cpsr |= ARM_FLAG_Z; - if ((int32_t)a < 0) - cpu->cd.arm.cpsr |= ARM_FLAG_N; + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; +} - if (max_pages_left-- > 0 && (int32_t)a > 0) - goto restart_loop; - cpu->n_translated_instrs --; +/* + * cmps followed by bne (inside the same page): + */ +X(cmps_bne_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if (c == 0) { + cpu->cd.arm.flags |= ARM_F_Z; + cpu->cd.arm.next_ic = &ic[2]; + } else { + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + } +} + - if ((int32_t)a > 0) - cpu->cd.arm.next_ic = ic; +/* + * cmps followed by bcc (inside the same page): + */ +X(cmps_bcc_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + else if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if (a >= b) + cpu->cd.arm.next_ic = &ic[2]; else - cpu->cd.arm.next_ic = &ic[3]; + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; +} + + +/* + * cmps (reg) followed by bcc (inside the same page): + */ +X(cmps_reg_bcc_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = reg(ic->arg[1]), c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + else if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if (a >= b) + cpu->cd.arm.next_ic = &ic[2]; + else + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; +} + + +/* + * cmps followed by bhi (inside the same page): + */ +X(cmps_bhi_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + else if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if (a > b) + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + else + cpu->cd.arm.next_ic = &ic[2]; +} + + +/* + * cmps (reg) followed by bhi (inside the same page): + */ +X(cmps_reg_bhi_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = reg(ic->arg[1]), c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + else if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if (a > b) + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + else + cpu->cd.arm.next_ic = &ic[2]; +} + + +/* + * cmps followed by bgt (inside the same page): + */ +X(cmps_bgt_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + else if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if ((int32_t)a > (int32_t)b) + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + else + cpu->cd.arm.next_ic = &ic[2]; +} + + +/* + * cmps followed by ble (inside the same page): + */ +X(cmps_ble_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a - b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags = ((uint32_t)a >= (uint32_t)b)? ARM_F_C : 0; + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + else if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (((int32_t)a >= 0 && (int32_t)b < 0 && (int32_t)c < 0) || + ((int32_t)a < 0 && (int32_t)b >= 0 && (int32_t)c >= 0)) + cpu->cd.arm.flags |= ARM_F_V; + if ((int32_t)a <= (int32_t)b) + cpu->cd.arm.next_ic = (struct arm_instr_call *) ic[1].arg[0]; + else + cpu->cd.arm.next_ic = &ic[2]; +} + + +/* + * teqs followed by beq (inside the same page): + */ +X(teqs_beq_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a ^ b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); + if (c == 0) { + cpu->cd.arm.flags |= ARM_F_Z; + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic[1].arg[0]; + } else { + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + cpu->cd.arm.next_ic = &ic[2]; + } +} + + +/* + * tsts followed by beq (inside the same page): + * (arg[1] must not have its highest bit set)) + */ +X(tsts_lo_beq_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a & b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); + if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (c == 0) + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic[1].arg[0]; + else + cpu->cd.arm.next_ic = &ic[2]; +} + + +/* + * teqs followed by bne (inside the same page): + */ +X(teqs_bne_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a ^ b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); + if (c == 0) { + cpu->cd.arm.flags |= ARM_F_Z; + } else { + if (c & 0x80000000) + cpu->cd.arm.flags |= ARM_F_N; + } + if (c == 0) + cpu->cd.arm.next_ic = &ic[2]; + else + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic[1].arg[0]; +} + + +/* + * tsts followed by bne (inside the same page): + * (arg[1] must not have its highest bit set)) + */ +X(tsts_lo_bne_samepage) +{ + uint32_t a = reg(ic->arg[0]), b = ic->arg[1], c = a & b; + cpu->n_translated_instrs ++; + cpu->cd.arm.flags &= ~(ARM_F_Z | ARM_F_N); + if (c == 0) + cpu->cd.arm.flags |= ARM_F_Z; + if (c == 0) + cpu->cd.arm.next_ic = &ic[2]; + else + cpu->cd.arm.next_ic = (struct arm_instr_call *) + ic[1].arg[0]; } @@ -1262,14 +2015,11 @@ X(end_of_page) { /* Update the PC: (offset 0, but on the next page) */ - cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->cd.arm.r[ARM_PC] += (ARM_IC_ENTRIES_PER_PAGE - << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC]; + cpu->pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (ARM_IC_ENTRIES_PER_PAGE << ARM_INSTR_ALIGNMENT_SHIFT); /* Find the new physical page and update the translation pointers: */ - arm_pc_to_pointers(cpu); + quick_pc_to_pointers(cpu); /* end_of_page doesn't count as an executed instruction: */ cpu->n_translated_instrs --; @@ -1280,45 +2030,400 @@ /* - * arm_combine_instructions(): + * Combine: netbsd_memset(): * - * Combine two or more instructions, if possible, into a single function call. + * Check for the core of a NetBSD/arm memset; large memsets use a sequence + * of 16 store-multiple instructions, each storing 2 registers at a time. */ -void arm_combine_instructions(struct cpu *cpu, struct arm_instr_call *ic, - uint32_t addr) +void COMBINE(netbsd_memset)(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) { - int n_back; - n_back = (addr >> ARM_INSTR_ALIGNMENT_SHIFT) +#ifdef HOST_LITTLE_ENDIAN + int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) & (ARM_IC_ENTRIES_PER_PAGE-1); - if (n_back >= 2) { - if (ic[-2].f == instr(store_w0_word_u1_p0_imm) && - ic[-2].arg[1] == 4 && - ic[-1].f == instr(subs) && - ic[-1].arg[0] == ic[-1].arg[2] && ic[-1].arg[1] == 4 && + if (n_back >= 17) { + int i; + for (i=-16; i<=-1; i++) + if (ic[i].f != instr(multi_0x08ac000c__ge)) + return; + if (ic[-17].f == instr(subs) && + ic[-17].arg[0]==ic[-17].arg[2] && ic[-17].arg[1] == 128 && ic[ 0].f == instr(b_samepage__gt) && - ic[ 0].arg[0] == (size_t)&ic[-2]) { - ic[-2].f = instr(fill_loop_test2); - combined; + ic[ 0].arg[0] == (size_t)&ic[-17]) { + ic[-17].f = instr(netbsd_memset); + } + } +#endif +} + + +/* + * Combine: netbsd_memcpy(): + * + * Check for the core of a NetBSD/arm memcpy; large memcpys use a + * sequence of ldmia instructions. + */ +void COMBINE(netbsd_memcpy)(struct cpu *cpu, struct arm_instr_call *ic, + int low_addr) +{ +#ifdef HOST_LITTLE_ENDIAN + int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) + & (ARM_IC_ENTRIES_PER_PAGE-1); + + if (n_back >= 5) { + if (ic[-5].f==instr(multi_0x08b15018) && + ic[-4].f==instr(multi_0x08a05018) && + ic[-3].f==instr(multi_0x08b15018) && + ic[-2].f==instr(multi_0x08a05018) && + ic[-1].f == instr(subs) && + ic[-1].arg[0]==ic[-1].arg[2] && ic[-1].arg[1] == 0x20 && + ic[ 0].f == instr(b_samepage__ge) && + ic[ 0].arg[0] == (size_t)&ic[-5]) { + ic[-5].f = instr(netbsd_memcpy); } } +#endif +} + + +/* + * Combine: netbsd_cacheclean(): + * + * Check for the core of a NetBSD/arm cache clean. (There are two variants.) + */ +void COMBINE(netbsd_cacheclean)(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) +{ + int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) + & (ARM_IC_ENTRIES_PER_PAGE-1); if (n_back >= 3) { - if (ic[-3].f == instr(cmps) && - ic[-3].arg[0] == ic[-1].arg[0] && - ic[-3].arg[1] == 0 && - ic[-2].f == instr(store_w0_byte_u1_p0_imm) && - ic[-2].arg[1] == 1 && - ic[-1].f == instr(sub) && - ic[-1].arg[0] == ic[-1].arg[2] && ic[-1].arg[1] == 1 && - ic[ 0].f == instr(b_samepage__gt) && - ic[ 0].arg[0] == (size_t)&ic[-3]) { - ic[-3].f = instr(fill_loop_test); - combined; + if (ic[-3].f==instr(load_w0_word_u1_p0_imm) && + ic[-2].f == instr(subs) && + ic[-2].arg[0]==ic[-2].arg[2] && ic[-2].arg[1] == 0x20 && + ic[-1].f == instr(b_samepage__ne) && + ic[-1].arg[0] == (size_t)&ic[-3]) { + ic[-3].f = instr(netbsd_cacheclean); + } + } +} + + +/* + * Combine: netbsd_cacheclean2(): + * + * Check for the core of a NetBSD/arm cache clean. (Second variant.) + */ +void COMBINE(netbsd_cacheclean2)(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) +{ + int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) + & (ARM_IC_ENTRIES_PER_PAGE-1); + + if (n_back >= 4) { + if (ic[-4].f == instr(mcr_mrc) && ic[-4].arg[0] == 0xee070f3a && + ic[-3].f == instr(mcr_mrc) && ic[-3].arg[0] == 0xee070f36 && + ic[-2].f == instr(add) && + ic[-2].arg[0]==ic[-2].arg[2] && ic[-2].arg[1] == 0x20 && + ic[-1].f == instr(subs) && + ic[-1].arg[0]==ic[-1].arg[2] && ic[-1].arg[1] == 0x20) { + ic[-4].f = instr(netbsd_cacheclean2); + } + } +} + + +/* + * Combine: netbsd_scanc(): + */ +void COMBINE(netbsd_scanc)(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) +{ + int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) + & (ARM_IC_ENTRIES_PER_PAGE-1); + + if (n_back < 2) + return; + + if (ic[-2].f == instr(load_w0_byte_u1_p1_imm) && + ic[-2].arg[0] == (size_t)(&cpu->cd.arm.r[1]) && + ic[-2].arg[1] == 0 && + ic[-2].arg[2] == (size_t)(&cpu->cd.arm.r[3]) && + ic[-1].f == instr(load_w0_byte_u1_p1_reg) && + ic[-1].arg[0] == (size_t)(&cpu->cd.arm.r[2]) && + ic[-1].arg[1] == (size_t)arm_r_r3_t0_c0 && + ic[-1].arg[2] == (size_t)(&cpu->cd.arm.r[3])) { + ic[-2].f = instr(netbsd_scanc); + } +} + + +/* + * Combine: strlen(): + */ +void COMBINE(strlen)(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) +{ + int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) + & (ARM_IC_ENTRIES_PER_PAGE-1); + + if (n_back < 2) + return; + + if (ic[-2].f == instr(load_w1_byte_u1_p1_imm) && + ic[-2].arg[1] == 1 && + ic[-2].arg[2] == (size_t)(&cpu->cd.arm.r[3]) && + ic[-1].f == instr(cmps) && + ic[-1].arg[0] == (size_t)(&cpu->cd.arm.r[3]) && + ic[-1].arg[1] == 0) { + ic[-2].f = instr(strlen); + } +} + + +/* + * Combine: xchg(): + */ +void COMBINE(xchg)(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) +{ + int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) + & (ARM_IC_ENTRIES_PER_PAGE-1); + size_t a, b; + + if (n_back < 2) + return; + + a = ic[-2].arg[0]; b = ic[-1].arg[0]; + + if (ic[-2].f == instr(eor_regshort) && + ic[-1].f == instr(eor_regshort) && + ic[-2].arg[0] == a && ic[-2].arg[1] == b && ic[-2].arg[2] == b && + ic[-1].arg[0] == b && ic[-1].arg[1] == a && ic[-1].arg[2] == a && + ic[ 0].arg[0] == a && ic[ 0].arg[1] == b && ic[ 0].arg[2] == b) { + ic[-2].f = instr(xchg); + } +} + + +/* + * Combine: netbsd_copyin(): + */ +void COMBINE(netbsd_copyin)(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) +{ +#ifdef HOST_LITTLE_ENDIAN + int i, n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) + & (ARM_IC_ENTRIES_PER_PAGE-1); + + if (n_back < 5) + return; + + for (i=-5; i<0; i++) { + if (ic[i].f != instr(load_w1_word_u1_p0_imm) || + ic[i].arg[0] != (size_t)(&cpu->cd.arm.r[0]) || + ic[i].arg[1] != 4) + return; + } + + if (ic[-5].arg[2] == (size_t)(&cpu->cd.arm.r[10]) && + ic[-4].arg[2] == (size_t)(&cpu->cd.arm.r[11]) && + ic[-3].arg[2] == (size_t)(&cpu->cd.arm.r[6]) && + ic[-2].arg[2] == (size_t)(&cpu->cd.arm.r[7]) && + ic[-1].arg[2] == (size_t)(&cpu->cd.arm.r[8])) { + ic[-5].f = instr(netbsd_copyin); + } +#endif +} + + +/* + * Combine: netbsd_copyout(): + */ +void COMBINE(netbsd_copyout)(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) +{ +#ifdef HOST_LITTLE_ENDIAN + int i, n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) + & (ARM_IC_ENTRIES_PER_PAGE-1); + + if (n_back < 5) + return; + + for (i=-5; i<0; i++) { + if (ic[i].f != instr(store_w1_word_u1_p0_imm) || + ic[i].arg[0] != (size_t)(&cpu->cd.arm.r[1]) || + ic[i].arg[1] != 4) + return; + } + + if (ic[-5].arg[2] == (size_t)(&cpu->cd.arm.r[8]) && + ic[-4].arg[2] == (size_t)(&cpu->cd.arm.r[9]) && + ic[-3].arg[2] == (size_t)(&cpu->cd.arm.r[10]) && + ic[-2].arg[2] == (size_t)(&cpu->cd.arm.r[11]) && + ic[-1].arg[2] == (size_t)(&cpu->cd.arm.r[6])) { + ic[-5].f = instr(netbsd_copyout); + } +#endif +} + + +/* + * Combine: cmps_b(): + */ +void COMBINE(cmps_b)(struct cpu *cpu, + struct arm_instr_call *ic, int low_addr) +{ + int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT) + & (ARM_IC_ENTRIES_PER_PAGE-1); + if (n_back < 1) + return; + if (ic[0].f == instr(b__eq)) { + if (ic[-1].f == instr(cmps)) { + if (ic[-1].arg[1] == 0) + ic[-1].f = instr(cmps_0_beq); + else if (ic[-1].arg[1] & 0x80000000) + ic[-1].f = instr(cmps_neg_beq); + else + ic[-1].f = instr(cmps_pos_beq); } + return; } + if (ic[0].f == instr(b_samepage__eq)) { + if (ic[-1].f == instr(cmps)) { + if (ic[-1].arg[1] == 0) + ic[-1].f = instr(cmps0_beq_samepage); + else + ic[-1].f = instr(cmps_beq_samepage); + } + if (ic[-1].f == instr(tsts) && + !(ic[-1].arg[1] & 0x80000000)) { + ic[-1].f = instr(tsts_lo_beq_samepage); + } + if (ic[-1].f == instr(teqs)) { + ic[-1].f = instr(teqs_beq_samepage); + } + return; + } + if (ic[0].f == instr(b_samepage__ne)) { + if (ic[-1].f == instr(cmps)) { + if (ic[-1].arg[1] == 0) + ic[-1].f = instr(cmps0_bne_samepage); + else + ic[-1].f = instr(cmps_bne_samepage); + } + if (ic[-1].f == instr(tsts) && + !(ic[-1].arg[1] & 0x80000000)) { + ic[-1].f = instr(tsts_lo_bne_samepage); + } + if (ic[-1].f == instr(teqs)) { + ic[-1].f = instr(teqs_bne_samepage); + } + return; + } + if (ic[0].f == instr(b_samepage__cc)) { + if (ic[-1].f == instr(cmps)) { + ic[-1].f = instr(cmps_bcc_samepage); + } + if (ic[-1].f == instr(cmps_regshort)) { + ic[-1].f = instr(cmps_reg_bcc_samepage); + } + return; + } + if (ic[0].f == instr(b_samepage__hi)) { + if (ic[-1].f == instr(cmps)) { + ic[-1].f = instr(cmps_bhi_samepage); + } + if (ic[-1].f == instr(cmps_regshort)) { + ic[-1].f = instr(cmps_reg_bhi_samepage); + } + return; + } + if (ic[0].f == instr(b_samepage__gt)) { + if (ic[-1].f == instr(cmps)) { + ic[-1].f = instr(cmps_bgt_samepage); + } + return; + } + if (ic[0].f == instr(b_samepage__le)) { + if (ic[-1].f == instr(cmps)) { + ic[-1].f = instr(cmps_ble_samepage); + } + return; + } +} - /* TODO: Combine forward as well */ + +/*****************************************************************************/ + + +static void arm_switch_clear(struct arm_instr_call *ic, int rd, + int condition_code) +{ + switch (rd) { + case 0: ic->f = cond_instr(clear_r0); break; + case 1: ic->f = cond_instr(clear_r1); break; + case 2: ic->f = cond_instr(clear_r2); break; + case 3: ic->f = cond_instr(clear_r3); break; + case 4: ic->f = cond_instr(clear_r4); break; + case 5: ic->f = cond_instr(clear_r5); break; + case 6: ic->f = cond_instr(clear_r6); break; + case 7: ic->f = cond_instr(clear_r7); break; + case 8: ic->f = cond_instr(clear_r8); break; + case 9: ic->f = cond_instr(clear_r9); break; + case 10: ic->f = cond_instr(clear_r10); break; + case 11: ic->f = cond_instr(clear_r11); break; + case 12: ic->f = cond_instr(clear_r12); break; + case 13: ic->f = cond_instr(clear_r13); break; + case 14: ic->f = cond_instr(clear_r14); break; + } +} + + +static void arm_switch_mov1(struct arm_instr_call *ic, int rd, + int condition_code) +{ + switch (rd) { + case 0: ic->f = cond_instr(mov1_r0); break; + case 1: ic->f = cond_instr(mov1_r1); break; + case 2: ic->f = cond_instr(mov1_r2); break; + case 3: ic->f = cond_instr(mov1_r3); break; + case 4: ic->f = cond_instr(mov1_r4); break; + case 5: ic->f = cond_instr(mov1_r5); break; + case 6: ic->f = cond_instr(mov1_r6); break; + case 7: ic->f = cond_instr(mov1_r7); break; + case 8: ic->f = cond_instr(mov1_r8); break; + case 9: ic->f = cond_instr(mov1_r9); break; + case 10: ic->f = cond_instr(mov1_r10); break; + case 11: ic->f = cond_instr(mov1_r11); break; + case 12: ic->f = cond_instr(mov1_r12); break; + case 13: ic->f = cond_instr(mov1_r13); break; + case 14: ic->f = cond_instr(mov1_r14); break; + } +} + + +static void arm_switch_add1(struct arm_instr_call *ic, int rd, + int condition_code) +{ + switch (rd) { + case 0: ic->f = cond_instr(add1_r0); break; + case 1: ic->f = cond_instr(add1_r1); break; + case 2: ic->f = cond_instr(add1_r2); break; + case 3: ic->f = cond_instr(add1_r3); break; + case 4: ic->f = cond_instr(add1_r4); break; + case 5: ic->f = cond_instr(add1_r5); break; + case 6: ic->f = cond_instr(add1_r6); break; + case 7: ic->f = cond_instr(add1_r7); break; + case 8: ic->f = cond_instr(add1_r8); break; + case 9: ic->f = cond_instr(add1_r9); break; + case 10: ic->f = cond_instr(add1_r10); break; + case 11: ic->f = cond_instr(add1_r11); break; + case 12: ic->f = cond_instr(add1_r12); break; + case 13: ic->f = cond_instr(add1_r13); break; + case 14: ic->f = cond_instr(add1_r14); break; + } } @@ -1339,31 +2444,30 @@ unsigned char *page; unsigned char ib[4]; int condition_code, main_opcode, secondary_opcode, s_bit, rn, rd, r8; - int p_bit, u_bit, b_bit, w_bit, l_bit, regform, rm, c, t; - int any_pc_reg; + int p_bit, u_bit, w_bit, l_bit, regform, rm, c, t, any_pc_reg; void (*samepage_function)(struct cpu *, struct arm_instr_call *); /* Figure out the address of the instruction: */ low_pc = ((size_t)ic - (size_t)cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call); - addr = cpu->cd.arm.r[ARM_PC] & ~((ARM_IC_ENTRIES_PER_PAGE-1) << + addr = cpu->pc & ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT); addr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT); - cpu->pc = cpu->cd.arm.r[ARM_PC] = addr; + cpu->pc = addr; addr &= ~((1 << ARM_INSTR_ALIGNMENT_SHIFT) - 1); /* Read the instruction word from memory: */ page = cpu->cd.arm.host_load[addr >> 12]; if (page != NULL) { - /* fatal("TRANSLATION HIT!\n"); */ + /* fatal("TRANSLATION HIT! 0x%08x\n", addr); */ memcpy(ib, page + (addr & 0xfff), sizeof(ib)); } else { - /* fatal("TRANSLATION MISS!\n"); */ + /* fatal("TRANSLATION MISS! 0x%08x\n", addr); */ if (!cpu->memory_rw(cpu, cpu->mem, addr, &ib[0], sizeof(ib), MEM_READ, CACHE_INSTRUCTION)) { fatal("to_be_translated(): " "read failed: TODO\n"); - goto bad; + return; } } @@ -1383,10 +2487,9 @@ condition_code = iword >> 28; main_opcode = (iword >> 24) & 15; secondary_opcode = (iword >> 21) & 15; - u_bit = (iword >> 23) & 1; - b_bit = (iword >> 22) & 1; - w_bit = (iword >> 21) & 1; - s_bit = l_bit = (iword >> 20) & 1; + u_bit = iword & 0x00800000; + w_bit = iword & 0x00200000; + s_bit = l_bit = iword & 0x00100000; rn = (iword >> 16) & 15; rd = (iword >> 12) & 15; r8 = (iword >> 8) & 15; @@ -1451,6 +2554,10 @@ ic->arg[0] = iword; break; } + if ((iword & 0x0f900ff0) == 0x01000050) { + fatal("TODO: q{,d}{add,sub}\n"); + goto bad; + } if ((iword & 0x0ff000d0) == 0x01200010) { /* bx or blx */ if (iword & 0x20) @@ -1475,20 +2582,55 @@ ic->arg[2] = (size_t)(&cpu->cd.arm.r[rn]); break; } + if ((iword & 0x0fff0ff0) == 0x016f0f10) { + ic->f = cond_instr(clz); + ic->arg[0] = (size_t)(&cpu->cd.arm.r[rm]); + ic->arg[1] = (size_t)(&cpu->cd.arm.r[rd]); + break; + } + if ((iword & 0x0ff00090) == 0x01000080) { + /* TODO: smlaXX */ + goto bad; + } + if ((iword & 0x0ff00090) == 0x01400080) { + /* TODO: smlalY */ + goto bad; + } + if ((iword & 0x0ff000b0) == 0x01200080) { + /* TODO: smlawY */ + goto bad; + } + if ((iword & 0x0ff0f090) == 0x01600080) { + /* smulXY (16-bit * 16-bit => 32-bit) */ + switch (iword & 0x60) { + case 0x00: ic->f = cond_instr(smulbb); break; + case 0x20: ic->f = cond_instr(smultb); break; + case 0x40: ic->f = cond_instr(smulbt); break; + default: ic->f = cond_instr(smultt); break; + } + ic->arg[0] = (size_t)(&cpu->cd.arm.r[rm]); + ic->arg[1] = (size_t)(&cpu->cd.arm.r[r8]); + ic->arg[2] = (size_t)(&cpu->cd.arm.r[rn]); /* Rd */ + break; + } + if ((iword & 0x0ff0f0b0) == 0x012000a0) { + /* TODO: smulwY */ + goto bad; + } if ((iword & 0x0fb0fff0) == 0x0120f000 || (iword & 0x0fb0f000) == 0x0320f000) { /* msr: move to [S|C]PSR from a register or immediate value */ - if (rm == ARM_PC) { - fatal("msr PC?\n"); - goto bad; - } if (iword & 0x02000000) { if (iword & 0x00400000) ic->f = cond_instr(msr_imm_spsr); else ic->f = cond_instr(msr_imm); } else { + if (rm == ARM_PC) { + fatal("msr PC?\n"); + goto bad; + } if (iword & 0x00400000) ic->f = cond_instr(msr_spsr); else @@ -1561,22 +2703,47 @@ goto bad; } - /* "mov pc,lr" with trace enabled: */ - if ((iword & 0x0fffffff) == 0x01a0f00e && - cpu->machine->show_trace_tree) { - ic->f = cond_instr(ret_trace); + /* "mov pc,lr": */ + if ((iword & 0x0fffffff) == 0x01a0f00e) { + if (cpu->machine->show_trace_tree) + ic->f = cond_instr(ret_trace); + else + ic->f = cond_instr(ret); break; } - /* "mov reg,reg": */ - if ((iword & 0x0fff0ff0) == 0x01a00000 && - (iword&15) != ARM_PC && rd != ARM_PC) { - ic->f = cond_instr(mov_reg_reg); - ic->arg[0] = (size_t)(&cpu->cd.arm.r[iword & 15]); + /* "mov reg,reg" or "mov reg,pc": */ + if ((iword & 0x0fff0ff0) == 0x01a00000 && rd != ARM_PC) { + if (rm != ARM_PC) { + ic->f = cond_instr(mov_reg_reg); + ic->arg[0] = (size_t)(&cpu->cd.arm.r[rm]); + } else { + ic->f = cond_instr(mov_reg_pc); + ic->arg[0] = (addr & 0xfff) + 8; + } ic->arg[1] = (size_t)(&cpu->cd.arm.r[rd]); break; } + /* "mov reg,#0": */ + if ((iword & 0x0fff0fff) == 0x03a00000 && rd != ARM_PC) { + arm_switch_clear(ic, rd, condition_code); + break; + } + + /* "mov reg,#1": */ + if ((iword & 0x0fff0fff) == 0x03a00001 && rd != ARM_PC) { + arm_switch_mov1(ic, rd, condition_code); + break; + } + + /* "add reg,reg,#1": */ + if ((iword & 0x0ff00fff) == 0x02800001 && rd != ARM_PC + && rn == rd) { + arm_switch_add1(ic, rd, condition_code); + break; + } + /* * Generic Data Processing Instructions: */ @@ -1604,15 +2771,31 @@ ic->arg[1] = imm; } + /* mvn #imm ==> mov #~imm */ + if (secondary_opcode == 0xf && !regform) { + secondary_opcode = 0xd; + ic->arg[1] = ~ic->arg[1]; + } + ic->arg[0] = (size_t)(&cpu->cd.arm.r[rn]); ic->arg[2] = (size_t)(&cpu->cd.arm.r[rd]); any_pc_reg = 0; if (rn == ARM_PC || rd == ARM_PC) any_pc_reg = 1; - ic->f = arm_dpi_instr[condition_code + - 16 * secondary_opcode + (s_bit? 256 : 0) + - (any_pc_reg? 512 : 0) + (regform? 1024 : 0)]; + if (!any_pc_reg && regform && (iword & 0xfff) < ARM_PC) { + ic->arg[1] = (size_t)(&cpu->cd.arm.r[rm]); + ic->f = arm_dpi_instr_regshort[condition_code + + 16 * secondary_opcode + (s_bit? 256 : 0)]; + } else + ic->f = arm_dpi_instr[condition_code + + 16 * secondary_opcode + (s_bit? 256 : 0) + + (any_pc_reg? 512 : 0) + (regform? 1024 : 0)]; + + if (ic->f == instr(eor_regshort)) + cpu->cd.arm.combination_check = COMBINE(xchg); + if (iword == 0xe113000c) + cpu->cd.arm.combination_check = COMBINE(netbsd_scanc); break; case 0x4: /* Load and store... */ @@ -1641,16 +2824,91 @@ fatal("Not a Load/store TODO\n"); goto bad; } + /* Special case: pc-relative load within the same page: */ + if (rn == ARM_PC && rd != ARM_PC && main_opcode < 6) { + int ofs = (addr & 0xfff) + 8, max = 0xffc; + int b_bit = iword & 0x00400000; + if (b_bit) + max = 0xfff; + if (u_bit) + ofs += (iword & 0xfff); + else + ofs -= (iword & 0xfff); + /* NOTE/TODO: This assumes 4KB pages, + it will not work with 1KB pages. */ + if (ofs >= 0 && ofs <= max) { + unsigned char *p; + unsigned char c[4]; + int len = b_bit? 1 : 4; + uint32_t x, a = (addr & 0xfffff000) | ofs; + /* ic->f = cond_instr(mov); */ + ic->f = arm_dpi_instr[condition_code + 16*0xd]; + ic->arg[2] = (size_t)(&cpu->cd.arm.r[rd]); + p = cpu->cd.arm.host_load[a >> 12]; + if (p != NULL) { + memcpy(c, p + (a & 0xfff), len); + } else { + if (!cpu->memory_rw(cpu, cpu->mem, a, + c, len, MEM_READ, CACHE_DATA)) { + fatal("to_be_translated(): " + "read failed X: TODO\n"); + goto bad; + } + } + if (cpu->byte_order == EMUL_LITTLE_ENDIAN) + x = c[0] + (c[1]<<8) + + (c[2]<<16) + (c[3]<<24); + else + x = c[3] + (c[2]<<8) + + (c[1]<<16) + (c[0]<<24); + if (b_bit) + x = c[0]; + ic->arg[1] = x; + } + } + if (iword == 0xe4b09004) + cpu->cd.arm.combination_check = COMBINE(netbsd_copyin); + if (iword == 0xe4a17004) + cpu->cd.arm.combination_check = COMBINE(netbsd_copyout); break; case 0x8: /* Multiple load/store... (Block data transfer) */ case 0x9: /* xxxx100P USWLnnnn llllllll llllllll */ + ic->arg[0] = (size_t)(&cpu->cd.arm.r[rn]); + ic->arg[1] = (size_t)iword; + /* Generic case: */ if (l_bit) ic->f = cond_instr(bdt_load); else ic->f = cond_instr(bdt_store); - ic->arg[0] = (size_t)(&cpu->cd.arm.r[rn]); - ic->arg[1] = (size_t)iword; +#if defined(HOST_LITTLE_ENDIAN) && !defined(GATHER_BDT_STATISTICS) + /* + * Check for availability of optimized implementation: + * xxxx100P USWLnnnn llllllll llllllll + * ^ ^ ^ ^ ^ ^ ^ ^ (0x00950154) + * These bits are used to select which list to scan, and then + * the list is scanned linearly. + * + * The optimized functions do not support show_trace_tree, + * but it's ok to use the unoptimized version in that case. + */ + if (!cpu->machine->show_trace_tree) { + int i = 0, j = iword; + j = ((j & 0x00800000) >> 16) | ((j & 0x00100000) >> 14) + | ((j & 0x00040000) >> 13) | ((j & 0x00010000) >> 12) + | ((j & 0x00000100) >> 5) | ((j & 0x00000040) >> 4) + | ((j & 0x00000010) >> 3) | ((j & 0x00000004) >> 2); + while (multi_opcode[j][i] != 0) { + if ((iword & 0x0fffffff) == + multi_opcode[j][i]) { + ic->f = multi_opcode_f[j] + [i*16 + condition_code]; + break; + } + i ++; + } + } +#endif if (rn == ARM_PC) { fatal("TODO: bdt with PC as base\n"); goto bad; @@ -1662,6 +2920,12 @@ if (main_opcode == 0x0a) { ic->f = cond_instr(b); samepage_function = cond_instr(b_samepage); + if (iword == 0xcaffffed) + cpu->cd.arm.combination_check = + COMBINE(netbsd_memset); + if (iword == 0xaafffff9) + cpu->cd.arm.combination_check = + COMBINE(netbsd_memcpy); } else { if (cpu->machine->show_trace_tree) { ic->f = cond_instr(bl_trace); @@ -1673,6 +2937,11 @@ } } + /* arg 1 = offset of current instruction */ + /* arg 2 = offset of the following instruction */ + ic->arg[1] = addr & 0xffc; + ic->arg[2] = (addr & 0xffc) + 4; + ic->arg[0] = (iword & 0x00ffffff) << 2; /* Sign-extend: */ if (ic->arg[0] & 0x02000000) @@ -1682,7 +2951,12 @@ */ ic->arg[0] = (int32_t)(ic->arg[0] + 8); - /* Special case: branch within the same page: */ + /* + * Special case: branch within the same page: + * + * arg[0] = addr of the arm_instr_call of the target + * arg[1] = addr of the next arm_instr_call. + */ { uint32_t mask_within_page = ((ARM_IC_ENTRIES_PER_PAGE-1) << @@ -1697,11 +2971,70 @@ cpu->cd.arm.cur_ic_page + ((new_pc & mask_within_page) >> ARM_INSTR_ALIGNMENT_SHIFT)); + ic->arg[1] = (size_t) ( + cpu->cd.arm.cur_ic_page + + (((addr & mask_within_page) + 4) >> + ARM_INSTR_ALIGNMENT_SHIFT)); + } else if (main_opcode == 0x0a) { + /* Special hack for a plain "b": */ + ic->arg[0] += ic->arg[1]; } } + + if (main_opcode == 0xa && (condition_code <= 1 + || condition_code == 3 || condition_code == 8 + || condition_code == 12 || condition_code == 13)) + cpu->cd.arm.combination_check = COMBINE(cmps_b); + + if (iword == 0x1afffffc) + cpu->cd.arm.combination_check = COMBINE(strlen); + + /* Hm. Does this really increase performance? */ + if (iword == 0x8afffffa) + cpu->cd.arm.combination_check = + COMBINE(netbsd_cacheclean2); + break; + + case 0xc: + case 0xd: + /* + * xxxx1100 0100nnnn ddddcccc oooommmm MCRR c,op,Rd,Rn,CRm + * xxxx1100 0101nnnn ddddcccc oooommmm MRRC c,op,Rd,Rn,CRm + */ + if ((iword & 0x0fe00fff) == 0x0c400000) { + /* Special case: mar/mra DSP instructions */ + fatal("TODO: mar/mra DSP instructions!\n"); + /* Perhaps these are actually identical to MCRR/MRRC */ + goto bad; + } + + if ((iword & 0x0fe00000) == 0x0c400000) { + fatal("MCRR/MRRC: TODO\n"); + goto bad; + } + + /* + * TODO: LDC/STC + * + * For now, treat as Undefined instructions. This causes e.g. + * Linux/ARM to emulate these instructions (floating point). + */ +#if 0 + ic->f = cond_instr(und); + ic->arg[0] = addr & 0xfff; +#else + fatal("LDC/STC: TODO\n"); + goto bad; +#endif break; case 0xe: + if ((iword & 0x0ff00ff0) == 0x0e200010) { + /* Special case: mia* DSP instructions */ + /* See Intel's 27343601.pdf, page 16-20 */ + fatal("TODO: mia* DSP instructions!\n"); + goto bad; + } if (iword & 0x10) { /* xxxx1110 oooLNNNN ddddpppp qqq1MMMM MCR/MRC */ ic->arg[0] = iword; @@ -1711,13 +3044,20 @@ ic->arg[0] = iword; ic->f = cond_instr(cdp); } + if (iword == 0xee070f9a) + cpu->cd.arm.combination_check = + COMBINE(netbsd_cacheclean); break; case 0xf: /* SWI: */ /* Default handler: */ ic->f = cond_instr(swi); - if (iword == 0xef8c64be) { + ic->arg[0] = addr & 0xfff; + if (iword == 0xef8c64eb) { + /* Hack for rebooting a machine: */ + ic->f = instr(reboot); + } else if (iword == 0xef8c64be) { /* Hack for openfirmware prom emulation: */ ic->f = instr(openfirmware); } else if (cpu->machine->userland_emul != NULL) {