--- trunk/src/cpus/cpu_mips_instr.c 2007/10/08 16:20:26 28 +++ trunk/src/cpus/cpu_mips_instr.c 2007/10/08 16:20:58 32 @@ -25,7 +25,7 @@ * SUCH DAMAGE. * * - * $Id: cpu_mips_instr.c,v 1.97 2006/07/20 03:20:03 debug Exp $ + * $Id: cpu_mips_instr.c,v 1.118 2006/10/29 05:10:27 debug Exp $ * * MIPS instructions. * @@ -37,6 +37,84 @@ /* + * COPROC_AVAILABILITY_CHECK(n) checks for the coprocessor available bit for + * coprocessor number n, and causes a CoProcessor Unusable exception if it + * is not set. (Note: For coprocessor 0 checks, use cop0_availability_check!) + */ +#ifndef COPROC_AVAILABILITY_CHECK +#define COPROC_AVAILABILITY_CHECK(x) { \ + const int cpnr = (x); \ + int low_pc = ((size_t)ic - (size_t)cpu->cd.mips.cur_ic_page) \ + / sizeof(struct mips_instr_call); \ + cpu->pc &= ~((MIPS_IC_ENTRIES_PER_PAGE-1) \ + << MIPS_INSTR_ALIGNMENT_SHIFT); \ + cpu->pc += (low_pc << MIPS_INSTR_ALIGNMENT_SHIFT); \ + if (!(cpu->cd.mips.coproc[0]->reg[COP0_STATUS] & \ + ((1 << cpnr) << STATUS_CU_SHIFT)) ) { \ + mips_cpu_exception(cpu, EXCEPTION_CPU, \ + 0, 0, cpnr, 0, 0, 0); \ + return; \ + } \ + } +#endif + + +#ifndef COP0_AVAILABILITY_CHECK_INCLUDED +#define COP0_AVAILABILITY_CHECK_INCLUDED +/* + * cop0_availability_check() causes a CoProcessor Unusable exception if + * we are currently running in usermode, and the coprocessor available bit + * for coprocessor 0 is not set. + * + * Returns 1 if ok (i.e. if the coprocessor was usable), 0 on exceptions. + */ +int cop0_availability_check(struct cpu *cpu, struct mips_instr_call *ic) +{ + int in_usermode = 0; + struct mips_coproc *cp0 = cpu->cd.mips.coproc[0]; + + switch (cpu->cd.mips.cpu_type.exc_model) { + case EXC3K: + /* + * NOTE: If the KU bit is checked, Linux crashes. + * It is the PC that counts. + * + * TODO: Check whether this is true or not for R4000 as well. + */ + /* TODO: if (cp0->reg[COP0_STATUS] & MIPS1_SR_KU_CUR) */ + if (cpu->pc <= 0x7fffffff) + in_usermode = 1; + break; + default: + /* R4000 etc: (TODO: How about supervisor mode?) */ + if (((cp0->reg[COP0_STATUS] & + STATUS_KSU_MASK) >> STATUS_KSU_SHIFT) != KSU_KERNEL) + in_usermode = 1; + if (cp0->reg[COP0_STATUS] & (STATUS_ERL | STATUS_EXL)) + in_usermode = 0; + break; + } + + if (in_usermode) { + int low_pc = ((size_t)ic - (size_t)cpu->cd.mips.cur_ic_page) + / sizeof(struct mips_instr_call); + cpu->pc &= ~((MIPS_IC_ENTRIES_PER_PAGE-1) + << MIPS_INSTR_ALIGNMENT_SHIFT); + cpu->pc += (low_pc << MIPS_INSTR_ALIGNMENT_SHIFT); + if (!(cpu->cd.mips.coproc[0]->reg[COP0_STATUS] & + (1 << STATUS_CU_SHIFT)) ) { + mips_cpu_exception(cpu, EXCEPTION_CPU, + 0, 0, /* cpnr */ 0, 0, 0, 0); + return 0; + } + } + + return 1; +} +#endif + + +/* * invalid: For catching bugs. */ X(invalid) @@ -1071,6 +1149,56 @@ /* + * ext: Extract bitfield. + * + * arg[0] = pointer to rt + * arg[1] = pointer to rs + * arg[2] = (msbd << 5) + lsb + */ +X(ext) +{ + fatal("ext: todo\n"); + exit(1); +} + + +/* + * dsbh: Doubleword swap bytes within half-word + * dshd: Doubleword swap half-words within double-word + * wsbh: Word swap bytes within half-word + * seb: Sign-extend byte + * seh: Sign-extend half-word + * + * arg[0] = pointer to rt + * arg[1] = pointer to rd + */ +X(dsbh) +{ + uint64_t x = reg(ic->arg[0]); + x = ((x & 0x00ff00ff00ff00ffULL) << 8) + | ((x & 0xff00ff00ff00ff00ULL) >> 8); + reg(ic->arg[1]) = x; +} +X(dshd) +{ + uint64_t x = reg(ic->arg[0]); + x = ((x & 0x000000000000ffffULL) << 48) + | ((x & 0x00000000ffff0000ULL) << 16) + | ((x & 0x0000ffff00000000ULL) >> 16) + | ((x & 0xffff000000000000ULL) >> 48); + reg(ic->arg[1]) = x; +} +X(wsbh) +{ + uint32_t x = reg(ic->arg[0]); + x = ((x & 0x00ff00ff) << 8) | ((x & 0xff00ff00) >> 8); + reg(ic->arg[1]) = (int32_t) x; +} +X(seb) { reg(ic->arg[1]) = (int8_t)reg(ic->arg[0]); } +X(seh) { reg(ic->arg[1]) = (int16_t)reg(ic->arg[0]); } + + +/* * 2-register + immediate: * * arg[0] = pointer to rs @@ -1096,8 +1224,8 @@ res = 0, rem = a; else res = a / b, rem = a - b*res; - reg(&cpu->cd.mips.lo) = (int32_t)res; - reg(&cpu->cd.mips.hi) = (int32_t)rem; + cpu->cd.mips.lo = (int32_t)res; + cpu->cd.mips.hi = (int32_t)rem; } X(divu) { @@ -1107,8 +1235,8 @@ res = 0, rem = a; else res = a / b, rem = a - b*res; - reg(&cpu->cd.mips.lo) = (int32_t)res; - reg(&cpu->cd.mips.hi) = (int32_t)rem; + cpu->cd.mips.lo = (int32_t)res; + cpu->cd.mips.hi = (int32_t)rem; } X(ddiv) { @@ -1119,8 +1247,8 @@ else res = a / b; rem = a - b*res; - reg(&cpu->cd.mips.lo) = res; - reg(&cpu->cd.mips.hi) = rem; + cpu->cd.mips.lo = res; + cpu->cd.mips.hi = rem; } X(ddivu) { @@ -1131,15 +1259,15 @@ else res = a / b; rem = a - b*res; - reg(&cpu->cd.mips.lo) = res; - reg(&cpu->cd.mips.hi) = rem; + cpu->cd.mips.lo = res; + cpu->cd.mips.hi = rem; } X(mult) { int32_t a = reg(ic->arg[0]), b = reg(ic->arg[1]); int64_t res = (int64_t)a * (int64_t)b; - reg(&cpu->cd.mips.lo) = (int32_t)res; - reg(&cpu->cd.mips.hi) = (int32_t)(res >> 32); + cpu->cd.mips.lo = (int32_t)res; + cpu->cd.mips.hi = (int32_t)(res >> 32); } X(mult_r5900) { @@ -1147,16 +1275,16 @@ hi, lo, and a third register */ int32_t a = reg(ic->arg[0]), b = reg(ic->arg[1]); int64_t res = (int64_t)a * (int64_t)b; - reg(&cpu->cd.mips.lo) = (int32_t)res; - reg(&cpu->cd.mips.hi) = (int32_t)(res >> 32); + cpu->cd.mips.lo = (int32_t)res; + cpu->cd.mips.hi = (int32_t)(res >> 32); reg(ic->arg[2]) = (int32_t)res; } X(multu) { uint32_t a = reg(ic->arg[0]), b = reg(ic->arg[1]); uint64_t res = (uint64_t)a * (uint64_t)b; - reg(&cpu->cd.mips.lo) = (int32_t)res; - reg(&cpu->cd.mips.hi) = (int32_t)(res >> 32); + cpu->cd.mips.lo = (int32_t)res; + cpu->cd.mips.hi = (int32_t)(res >> 32); } X(multu_r5900) { @@ -1164,8 +1292,8 @@ hi, lo, and a third register */ uint32_t a = reg(ic->arg[0]), b = reg(ic->arg[1]); uint64_t res = (uint64_t)a * (uint64_t)b; - reg(&cpu->cd.mips.lo) = (int32_t)res; - reg(&cpu->cd.mips.hi) = (int32_t)(res >> 32); + cpu->cd.mips.lo = (int32_t)res; + cpu->cd.mips.hi = (int32_t)(res >> 32); reg(ic->arg[2]) = (int32_t)res; } X(dmult) @@ -1194,8 +1322,8 @@ hi ^= (int64_t) -1; lo ^= (int64_t) -1; } - reg(&cpu->cd.mips.lo) = lo; - reg(&cpu->cd.mips.hi) = hi; + cpu->cd.mips.lo = lo; + cpu->cd.mips.hi = hi; } X(dmultu) { @@ -1211,8 +1339,8 @@ } c = (c << 1) | (b >> 63); b <<= 1; } - reg(&cpu->cd.mips.lo) = lo; - reg(&cpu->cd.mips.hi) = hi; + cpu->cd.mips.lo = lo; + cpu->cd.mips.hi = hi; } X(tge) { @@ -1621,6 +1749,7 @@ /* * set: Set a register to an immediate (signed) 32-bit value. + * (This is the actual implementation of the lui instruction.) * * arg[0] = pointer to the register * arg[1] = (int32_t) immediate value @@ -1659,6 +1788,18 @@ coproc_register_read(cpu, cpu->cd.mips.coproc[0], rd, &tmp, select); reg(ic->arg[0]) = (int32_t)tmp; } +X(mfc0_select0) +{ + /* Fast int32_t read, with no side effects: */ + int rd = ic->arg[1] & 31; +#if 0 + uint64_t tmp; + cpu->pc &= ~((MIPS_IC_ENTRIES_PER_PAGE-1)<pc |= ic->arg[2]; + /* TODO: cause exception if necessary */ +#endif + reg(ic->arg[0]) = (int32_t)cpu->cd.mips.coproc[0]->reg[rd]; +} X(mtc0) { int rd = ic->arg[1] & 31, select = ic->arg[1] >> 5; @@ -1679,8 +1820,10 @@ uint32_t cause = cpu->cd.mips.coproc[0]->reg[COP0_CAUSE]; /* NOTE: STATUS_IE happens to match the enable bit also on R2000/R3000, so this is ok. */ - if (status & (STATUS_EXL | STATUS_ERL)) - status &= ~STATUS_IE; + if (cpu->cd.mips.cpu_type.exc_model != EXC3K) { + if (status & (STATUS_EXL | STATUS_ERL)) + status &= ~STATUS_IE; + } /* Ugly R5900 special case: (TODO: move this?) */ if (cpu->cd.mips.cpu_type.rev == MIPS_R5900 && !(status & R5900_STATUS_EIE)) @@ -1700,6 +1843,18 @@ coproc_register_read(cpu, cpu->cd.mips.coproc[0], rd, (uint64_t *)ic->arg[0], select); } +X(dmfc0_select0) +{ + /* Fast int64_t read, with no side effects: */ + int rd = ic->arg[1] & 31; +#if 0 + uint64_t tmp; + cpu->pc &= ~((MIPS_IC_ENTRIES_PER_PAGE-1)<pc |= ic->arg[2]; + /* TODO: cause exception if necessary */ +#endif + reg(ic->arg[0]) = cpu->cd.mips.coproc[0]->reg[rd]; +} X(dmtc0) { int rd = ic->arg[1] & 31, select = ic->arg[1] >> 5; @@ -1721,18 +1876,9 @@ X(cop1_bc) { MODE_int_t old_pc = cpu->pc; - const int cpnr = 1; - int x, low_pc, cc = ic->arg[0]; + int x, cc = ic->arg[0]; - low_pc = ((size_t)ic - (size_t)cpu->cd.mips.cur_ic_page) - / sizeof(struct mips_instr_call); - cpu->pc &= ~((MIPS_IC_ENTRIES_PER_PAGE-1)<< MIPS_INSTR_ALIGNMENT_SHIFT); - cpu->pc += (low_pc << MIPS_INSTR_ALIGNMENT_SHIFT); - if (!(cpu->cd.mips.coproc[0]-> - reg[COP0_STATUS] & ((1 << cpnr) << STATUS_CU_SHIFT)) ) { - mips_cpu_exception(cpu, EXCEPTION_CPU, 0, 0, cpnr, 0, 0, 0); - return; - } + COPROC_AVAILABILITY_CHECK(1); /* Get the correct condition code bit: */ if (cc == 0) @@ -1772,17 +1918,7 @@ */ X(cop1_slow) { - const int cpnr = 1; - int low_pc = ((size_t)ic - (size_t)cpu->cd.mips.cur_ic_page) - / sizeof(struct mips_instr_call); - cpu->pc &= ~((MIPS_IC_ENTRIES_PER_PAGE-1)<< MIPS_INSTR_ALIGNMENT_SHIFT); - cpu->pc += (low_pc << MIPS_INSTR_ALIGNMENT_SHIFT); - - if (!(cpu->cd.mips.coproc[0]-> - reg[COP0_STATUS] & ((1 << cpnr) << STATUS_CU_SHIFT)) ) { - mips_cpu_exception(cpu, EXCEPTION_CPU, 0, 0, cpnr, 0, 0, 0); - return; - } + COPROC_AVAILABILITY_CHECK(1); coproc_function(cpu, cpu->cd.mips.coproc[1], 1, ic->arg[0], 0, 1); } @@ -1807,6 +1943,15 @@ cpu->pc += (low_pc << MIPS_INSTR_ALIGNMENT_SHIFT); mips_cpu_exception(cpu, EXCEPTION_BP, 0, 0, 0, 0, 0, 0); } +X(reboot) +{ + if (!cop0_availability_check(cpu, ic)) + return; + + cpu->running = 0; + debugger_n_steps_left_before_interaction = 0; + cpu->cd.mips.next_ic = ¬hing_call; +} /* @@ -1865,6 +2010,9 @@ */ X(tlbw) { + if (!cop0_availability_check(cpu, ic)) + return; + cpu->pc &= ~((MIPS_IC_ENTRIES_PER_PAGE-1)<pc |= ic->arg[2]; coproc_tlbwri(cpu, ic->arg[0]); @@ -1879,12 +2027,18 @@ */ X(tlbp) { + if (!cop0_availability_check(cpu, ic)) + return; + cpu->pc &= ~((MIPS_IC_ENTRIES_PER_PAGE-1)<pc |= ic->arg[2]; coproc_tlbpr(cpu, 0); } X(tlbr) { + if (!cop0_availability_check(cpu, ic)) + return; + cpu->pc &= ~((MIPS_IC_ENTRIES_PER_PAGE-1)<pc |= ic->arg[2]; coproc_tlbpr(cpu, 1); @@ -1896,6 +2050,9 @@ */ X(rfe) { + if (!cop0_availability_check(cpu, ic)) + return; + /* Just rotate the interrupt/user bits: */ cpu->cd.mips.coproc[0]->reg[COP0_STATUS] = (cpu->cd.mips.coproc[0]->reg[COP0_STATUS] & ~0x3f) | @@ -1914,6 +2071,9 @@ */ X(eret) { + if (!cop0_availability_check(cpu, ic)) + return; + if (cpu->cd.mips.coproc[0]->reg[COP0_STATUS] & STATUS_ERL) { cpu->pc = cpu->cd.mips.coproc[0]->reg[COP0_ERROREPC]; cpu->cd.mips.coproc[0]->reg[COP0_STATUS] &= ~STATUS_ERL; @@ -1934,6 +2094,9 @@ */ X(deret) { + if (!cop0_availability_check(cpu, ic)) + return; + /* * According to the MIPS64 manual, deret loads PC from the DEPC cop0 * register, and jumps there immediately. No delay slot. @@ -1952,6 +2115,66 @@ /* + * idle: Called from the implementation of wait, or netbsd_pmax_idle. + */ +X(idle) +{ + /* + * If there is an interrupt, then just return. Otherwise + * re-run the wait instruction (after a delay). + */ + uint32_t status = cpu->cd.mips.coproc[0]->reg[COP0_STATUS]; + uint32_t cause = cpu->cd.mips.coproc[0]->reg[COP0_CAUSE]; + + if (cpu->cd.mips.cpu_type.exc_model != EXC3K) { + if (status & (STATUS_EXL | STATUS_ERL)) + status &= ~STATUS_IE; + } + + /* Ugly R5900 special case: (TODO: move this?) */ + if (cpu->cd.mips.cpu_type.rev == MIPS_R5900 && + !(status & R5900_STATUS_EIE)) + status &= ~STATUS_IE; + if (status & STATUS_IE && (status & cause & STATUS_IM_MASK)) + return; + + cpu->cd.mips.next_ic = ic; + cpu->is_halted = 1; + cpu->has_been_idling = 1; + + /* + * There was no interrupt. Go to sleep. + * + * TODO: + * + * Think about how to actually implement this usleep stuff, + * in an SMP and/or timing accurate environment. + */ + + if (cpu->machine->ncpus == 1) { + static int x = 0; + if ((++x) == 600) { + usleep(10); + x = 0; + } + cpu->n_translated_instrs += N_SAFE_DYNTRANS_LIMIT / 6; + } +} + + +/* + * wait: Wait for external interrupt. + */ +X(wait) +{ + if (!cop0_availability_check(cpu, ic)) + return; + + instr(idle)(cpu, ic); +} + + +/* * rdhwr: Read hardware register into gpr (MIPS32/64 rev 2). * * arg[0] = ptr to rt (destination register) @@ -2188,21 +2411,7 @@ */ X(lwc1) { - const int cpnr = 1; - - /* Synch. PC and call the generic load/store function: */ - int low_pc = ((size_t)ic - (size_t)cpu->cd.mips.cur_ic_page) - / sizeof(struct mips_instr_call); - cpu->pc &= ~((MIPS_IC_ENTRIES_PER_PAGE-1) - << MIPS_INSTR_ALIGNMENT_SHIFT); - cpu->pc += (low_pc << MIPS_INSTR_ALIGNMENT_SHIFT); - - /* ... but first, let's see if the coprocessor is available: */ - if (!(cpu->cd.mips.coproc[0]-> - reg[COP0_STATUS] & ((1 << cpnr) << STATUS_CU_SHIFT)) ) { - mips_cpu_exception(cpu, EXCEPTION_CPU, 0, 0, cpnr, 0, 0, 0); - return; - } + COPROC_AVAILABILITY_CHECK(1); #ifdef MODE32 mips32_loadstore @@ -2214,21 +2423,7 @@ } X(swc1) { - const int cpnr = 1; - - /* Synch. PC and call the generic load/store function: */ - int low_pc = ((size_t)ic - (size_t)cpu->cd.mips.cur_ic_page) - / sizeof(struct mips_instr_call); - cpu->pc &= ~((MIPS_IC_ENTRIES_PER_PAGE-1) - << MIPS_INSTR_ALIGNMENT_SHIFT); - cpu->pc += (low_pc << MIPS_INSTR_ALIGNMENT_SHIFT); - - /* ... but first, let's see if the coprocessor is available: */ - if (!(cpu->cd.mips.coproc[0]-> - reg[COP0_STATUS] & ((1 << cpnr) << STATUS_CU_SHIFT)) ) { - mips_cpu_exception(cpu, EXCEPTION_CPU, 0, 0, cpnr, 0, 0, 0); - return; - } + COPROC_AVAILABILITY_CHECK(1); #ifdef MODE32 mips32_loadstore @@ -2240,24 +2435,11 @@ } X(ldc1) { - const int cpnr = 1; int use_fp_pairs = !(cpu->cd.mips.coproc[0]->reg[COP0_STATUS] & STATUS_FR); uint64_t fpr, *backup_ptr; - /* Synch. PC and call the generic load/store function: */ - int low_pc = ((size_t)ic - (size_t)cpu->cd.mips.cur_ic_page) - / sizeof(struct mips_instr_call); - cpu->pc &= ~((MIPS_IC_ENTRIES_PER_PAGE-1) - << MIPS_INSTR_ALIGNMENT_SHIFT); - cpu->pc += (low_pc << MIPS_INSTR_ALIGNMENT_SHIFT); - - /* ... but first, let's see if the coprocessor is available: */ - if (!(cpu->cd.mips.coproc[0]-> - reg[COP0_STATUS] & ((1 << cpnr) << STATUS_CU_SHIFT)) ) { - mips_cpu_exception(cpu, EXCEPTION_CPU, 0, 0, cpnr, 0, 0, 0); - return; - } + COPROC_AVAILABILITY_CHECK(1); backup_ptr = (uint64_t *) ic->arg[0]; ic->arg[0] = (size_t) &fpr; @@ -2281,24 +2463,11 @@ } X(sdc1) { - const int cpnr = 1; int use_fp_pairs = !(cpu->cd.mips.coproc[0]->reg[COP0_STATUS] & STATUS_FR); uint64_t fpr, *backup_ptr; - /* Synch. PC and call the generic load/store function: */ - int low_pc = ((size_t)ic - (size_t)cpu->cd.mips.cur_ic_page) - / sizeof(struct mips_instr_call); - cpu->pc &= ~((MIPS_IC_ENTRIES_PER_PAGE-1) - << MIPS_INSTR_ALIGNMENT_SHIFT); - cpu->pc += (low_pc << MIPS_INSTR_ALIGNMENT_SHIFT); - - /* ... but first, let's see if the coprocessor is available: */ - if (!(cpu->cd.mips.coproc[0]-> - reg[COP0_STATUS] & ((1 << cpnr) << STATUS_CU_SHIFT)) ) { - mips_cpu_exception(cpu, EXCEPTION_CPU, 0, 0, cpnr, 0, 0, 0); - return; - } + COPROC_AVAILABILITY_CHECK(1); backup_ptr = (uint64_t *) ic->arg[0]; ic->arg[0] = (size_t) &fpr; @@ -2349,10 +2518,16 @@ */ X(di_r5900) { + if (!cop0_availability_check(cpu, ic)) + return; + cpu->cd.mips.coproc[0]->reg[COP0_STATUS] &= ~R5900_STATUS_EIE; } X(ei_r5900) { + if (!cop0_availability_check(cpu, ic)) + return; + cpu->cd.mips.coproc[0]->reg[COP0_STATUS] |= R5900_STATUS_EIE; } @@ -2381,7 +2556,7 @@ if (cpu->delay_slot || page == NULL || (rX & 3) != 0 || rZ != 0) { instr(addiu)(cpu, ic); return; - } + } if (rYp == (uint64_t *) ic->arg[0]) rYp = (uint64_t *) ic[1].arg[1]; @@ -2410,13 +2585,80 @@ } +#ifdef MODE32 /* - * multi_sw_3: + * multi_sw_2, _3, _4: * * sw r?,ofs(rX) r?=arg[0], rX=arg[1], ofs=arg[2] - * sw r?,ofs(rX) r?=arg[0], rX=arg[1], ofs=arg[2] - * sw r?,ofs(rX) r?=arg[0], rX=arg[1], ofs=arg[2] */ +X(multi_sw_2_le) +{ + uint32_t *page; + MODE_uint_t rX = reg(ic[0].arg[1]), r1, r2; + MODE_uint_t addr0 = rX + (int32_t)ic[0].arg[2]; + MODE_uint_t addr1 = rX + (int32_t)ic[1].arg[2]; + uint32_t index0 = addr0 >> 12, index1 = addr1 >> 12; + + page = (uint32_t *) cpu->cd.mips.host_store[index0]; + + /* Fallback: */ + if (cpu->delay_slot || + page == NULL || (addr0 & 3) != 0 || (addr1 & 3) != 0 || + index0 != index1) { + /* Normal safe sw: */ + mips32_loadstore[8 + 2 * 2](cpu, ic); + return; + } + + addr0 = (addr0 >> 2) & 0x3ff; + addr1 = (addr1 >> 2) & 0x3ff; + + r1 = reg(ic[0].arg[0]); + r2 = reg(ic[1].arg[0]); + + r1 = LE32_TO_HOST(r1); + r2 = LE32_TO_HOST(r2); + + page[addr0] = r1; + page[addr1] = r2; + + cpu->n_translated_instrs ++; + cpu->cd.mips.next_ic ++; +} +X(multi_sw_2_be) +{ + uint32_t *page; + MODE_uint_t rX = reg(ic[0].arg[1]), r1, r2; + MODE_uint_t addr0 = rX + (int32_t)ic[0].arg[2]; + MODE_uint_t addr1 = rX + (int32_t)ic[1].arg[2]; + uint32_t index0 = addr0 >> 12, index1 = addr1 >> 12; + + page = (uint32_t *) cpu->cd.mips.host_store[index0]; + + /* Fallback: */ + if (cpu->delay_slot || + page == NULL || (addr0 & 3) != 0 || (addr1 & 3) != 0 || + index0 != index1) { + /* Normal safe sw: */ + mips32_loadstore[16 + 8 + 2 * 2](cpu, ic); + return; + } + + addr0 = (addr0 >> 2) & 0x3ff; + addr1 = (addr1 >> 2) & 0x3ff; + + r1 = reg(ic[0].arg[0]); + r2 = reg(ic[1].arg[0]); + + r1 = BE32_TO_HOST(r1); + r2 = BE32_TO_HOST(r2); + + page[addr0] = r1; + page[addr1] = r2; + + cpu->n_translated_instrs ++; + cpu->cd.mips.next_ic ++; +} X(multi_sw_3_le) { uint32_t *page; @@ -2434,7 +2676,7 @@ page == NULL || (addr0 & 3) != 0 || (addr1 & 3) != 0 || (addr2 & 3) != 0 || index0 != index1 || index0 != index2) { /* Normal safe sw: */ - ic[1].f(cpu, ic); + mips32_loadstore[8 + 2 * 2](cpu, ic); return; } @@ -2458,7 +2700,7 @@ page[addr2] = r3; cpu->n_translated_instrs += 2; - cpu->cd.mips.next_ic = (struct mips_instr_call *) &ic[3]; + cpu->cd.mips.next_ic += 2; } X(multi_sw_3_be) { @@ -2477,7 +2719,7 @@ page == NULL || (addr0 & 3) != 0 || (addr1 & 3) != 0 || (addr2 & 3) != 0 || index0 != index1 || index0 != index2) { /* Normal safe sw: */ - ic[1].f(cpu, ic); + mips32_loadstore[16 + 8 + 2 * 2](cpu, ic); return; } @@ -2501,7 +2743,374 @@ page[addr2] = r3; cpu->n_translated_instrs += 2; - cpu->cd.mips.next_ic = (struct mips_instr_call *) &ic[3]; + cpu->cd.mips.next_ic += 2; +} +X(multi_sw_4_le) +{ + uint32_t *page; + MODE_uint_t rX = reg(ic[0].arg[1]), r1, r2, r3, r4; + MODE_uint_t addr0 = rX + (int32_t)ic[0].arg[2]; + MODE_uint_t addr1 = rX + (int32_t)ic[1].arg[2]; + MODE_uint_t addr2 = rX + (int32_t)ic[2].arg[2]; + MODE_uint_t addr3 = rX + (int32_t)ic[3].arg[2]; + uint32_t index0 = addr0 >> 12, index1 = addr1 >> 12, + index2 = addr2 >> 12, index3 = addr3 >> 12; + + page = (uint32_t *) cpu->cd.mips.host_store[index0]; + + /* Fallback: */ + if (cpu->delay_slot || + page == NULL || (addr0 & 3) != 0 || (addr1 & 3) != 0 || + (addr2 & 3) != 0 || (addr3 & 3) != 0 || index0 != index1 || + index0 != index2 || index0 != index3) { + /* Normal safe sw: */ + mips32_loadstore[8 + 2 * 2](cpu, ic); + return; + } + + addr0 = (addr0 >> 2) & 0x3ff; + addr1 = (addr1 >> 2) & 0x3ff; + addr2 = (addr2 >> 2) & 0x3ff; + addr3 = (addr3 >> 2) & 0x3ff; + + r1 = reg(ic[0].arg[0]); + r2 = reg(ic[1].arg[0]); + r3 = reg(ic[2].arg[0]); + r4 = reg(ic[3].arg[0]); + + r1 = LE32_TO_HOST(r1); + r2 = LE32_TO_HOST(r2); + r3 = LE32_TO_HOST(r3); + r4 = LE32_TO_HOST(r4); + + page[addr0] = r1; + page[addr1] = r2; + page[addr2] = r3; + page[addr3] = r4; + + cpu->n_translated_instrs += 3; + cpu->cd.mips.next_ic += 3; +} +X(multi_sw_4_be) +{ + uint32_t *page; + MODE_uint_t rX = reg(ic[0].arg[1]), r1, r2, r3, r4; + MODE_uint_t addr0 = rX + (int32_t)ic[0].arg[2]; + MODE_uint_t addr1 = rX + (int32_t)ic[1].arg[2]; + MODE_uint_t addr2 = rX + (int32_t)ic[2].arg[2]; + MODE_uint_t addr3 = rX + (int32_t)ic[3].arg[2]; + uint32_t index0 = addr0 >> 12, index1 = addr1 >> 12, + index2 = addr2 >> 12, index3 = addr3 >> 12; + + page = (uint32_t *) cpu->cd.mips.host_store[index0]; + + /* Fallback: */ + if (cpu->delay_slot || + page == NULL || (addr0 & 3) != 0 || (addr1 & 3) != 0 || + (addr2 & 3) != 0 || (addr3 & 3) != 0 || index0 != index1 || + index0 != index2 || index0 != index3) { + /* Normal safe sw: */ + mips32_loadstore[16 + 8 + 2 * 2](cpu, ic); + return; + } + + addr0 = (addr0 >> 2) & 0x3ff; + addr1 = (addr1 >> 2) & 0x3ff; + addr2 = (addr2 >> 2) & 0x3ff; + addr3 = (addr3 >> 2) & 0x3ff; + + r1 = reg(ic[0].arg[0]); + r2 = reg(ic[1].arg[0]); + r3 = reg(ic[2].arg[0]); + r4 = reg(ic[3].arg[0]); + + r1 = BE32_TO_HOST(r1); + r2 = BE32_TO_HOST(r2); + r3 = BE32_TO_HOST(r3); + r4 = BE32_TO_HOST(r4); + + page[addr0] = r1; + page[addr1] = r2; + page[addr2] = r3; + page[addr3] = r4; + + cpu->n_translated_instrs += 3; + cpu->cd.mips.next_ic += 3; +} +#endif + + +#ifdef MODE32 +/* + * multi_lw_2, _3, _4: + * + * lw r?,ofs(rX) r?=arg[0], rX=arg[1], ofs=arg[2] + */ +X(multi_lw_2_le) +{ + uint32_t *page; + MODE_uint_t rX = reg(ic[0].arg[1]), r1, r2; + MODE_uint_t addr0 = rX + (int32_t)ic[0].arg[2]; + MODE_uint_t addr1 = rX + (int32_t)ic[1].arg[2]; + uint32_t index0 = addr0 >> 12, index1 = addr1 >> 12; + + page = (uint32_t *) cpu->cd.mips.host_load[index0]; + + /* Fallback: */ + if (cpu->delay_slot || + page == NULL || (addr0 & 3) != 0 || (addr1 & 3) != 0 || + index0 != index1) { + /* Normal safe lw: */ + mips32_loadstore[2 * 2 + 1](cpu, ic); + return; + } + + addr0 = (addr0 >> 2) & 0x3ff; + addr1 = (addr1 >> 2) & 0x3ff; + + r1 = page[addr0]; + r2 = page[addr1]; + + r1 = LE32_TO_HOST(r1); + r2 = LE32_TO_HOST(r2); + + reg(ic[0].arg[0]) = r1; + reg(ic[1].arg[0]) = r2; + + cpu->n_translated_instrs ++; + cpu->cd.mips.next_ic ++; +} +X(multi_lw_2_be) +{ + uint32_t *page; + MODE_uint_t rX = reg(ic[0].arg[1]), r1, r2; + MODE_uint_t addr0 = rX + (int32_t)ic[0].arg[2]; + MODE_uint_t addr1 = rX + (int32_t)ic[1].arg[2]; + uint32_t index0 = addr0 >> 12, index1 = addr1 >> 12; + + page = (uint32_t *) cpu->cd.mips.host_load[index0]; + + /* Fallback: */ + if (cpu->delay_slot || + page == NULL || (addr0 & 3) != 0 || (addr1 & 3) != 0 || + index0 != index1) { + /* Normal safe lw: */ + mips32_loadstore[16 + 2 * 2 + 1](cpu, ic); + return; + } + + addr0 = (addr0 >> 2) & 0x3ff; + addr1 = (addr1 >> 2) & 0x3ff; + + r1 = page[addr0]; + r2 = page[addr1]; + + r1 = BE32_TO_HOST(r1); + r2 = BE32_TO_HOST(r2); + + reg(ic[0].arg[0]) = r1; + reg(ic[1].arg[0]) = r2; + + cpu->n_translated_instrs ++; + cpu->cd.mips.next_ic ++; +} +X(multi_lw_3_le) +{ + uint32_t *page; + MODE_uint_t rX = reg(ic[0].arg[1]), r1, r2, r3; + MODE_uint_t addr0 = rX + (int32_t)ic[0].arg[2]; + MODE_uint_t addr1 = rX + (int32_t)ic[1].arg[2]; + MODE_uint_t addr2 = rX + (int32_t)ic[2].arg[2]; + uint32_t index0 = addr0 >> 12, index1 = addr1 >> 12, + index2 = addr2 >> 12; + + page = (uint32_t *) cpu->cd.mips.host_load[index0]; + + /* Fallback: */ + if (cpu->delay_slot || + page == NULL || (addr0 & 3) != 0 || (addr1 & 3) != 0 || + (addr2 & 3) != 0 || index0 != index1 || index0 != index2) { + /* Normal safe lw: */ + mips32_loadstore[2 * 2 + 1](cpu, ic); + return; + } + + addr0 = (addr0 >> 2) & 0x3ff; + addr1 = (addr1 >> 2) & 0x3ff; + addr2 = (addr2 >> 2) & 0x3ff; + + /* printf("addr0=%x 1=%x 2=%x\n", + (int)addr0, (int)addr1, (int)addr2); */ + + r1 = page[addr0]; + r2 = page[addr1]; + r3 = page[addr2]; + + r1 = LE32_TO_HOST(r1); + r2 = LE32_TO_HOST(r2); + r3 = LE32_TO_HOST(r3); + + reg(ic[0].arg[0]) = r1; + reg(ic[1].arg[0]) = r2; + reg(ic[2].arg[0]) = r3; + + cpu->n_translated_instrs += 2; + cpu->cd.mips.next_ic += 2; +} +X(multi_lw_3_be) +{ + uint32_t *page; + MODE_uint_t rX = reg(ic[0].arg[1]), r1, r2, r3; + MODE_uint_t addr0 = rX + (int32_t)ic[0].arg[2]; + MODE_uint_t addr1 = rX + (int32_t)ic[1].arg[2]; + MODE_uint_t addr2 = rX + (int32_t)ic[2].arg[2]; + uint32_t index0 = addr0 >> 12, index1 = addr1 >> 12, + index2 = addr2 >> 12; + + page = (uint32_t *) cpu->cd.mips.host_load[index0]; + + /* Fallback: */ + if (cpu->delay_slot || + page == NULL || (addr0 & 3) != 0 || (addr1 & 3) != 0 || + (addr2 & 3) != 0 || index0 != index1 || index0 != index2) { + /* Normal safe lw: */ + mips32_loadstore[16 + 2 * 2 + 1](cpu, ic); + return; + } + + addr0 = (addr0 >> 2) & 0x3ff; + addr1 = (addr1 >> 2) & 0x3ff; + addr2 = (addr2 >> 2) & 0x3ff; + + /* printf("addr0=%x 1=%x 2=%x\n", + (int)addr0, (int)addr1, (int)addr2); */ + + r1 = page[addr0]; + r2 = page[addr1]; + r3 = page[addr2]; + + r1 = BE32_TO_HOST(r1); + r2 = BE32_TO_HOST(r2); + r3 = BE32_TO_HOST(r3); + + reg(ic[0].arg[0]) = r1; + reg(ic[1].arg[0]) = r2; + reg(ic[2].arg[0]) = r3; + + cpu->n_translated_instrs += 2; + cpu->cd.mips.next_ic += 2; +} +X(multi_lw_4_le) +{ + uint32_t *page; + MODE_uint_t rX = reg(ic[0].arg[1]), r1, r2, r3, r4; + MODE_uint_t addr0 = rX + (int32_t)ic[0].arg[2]; + MODE_uint_t addr1 = rX + (int32_t)ic[1].arg[2]; + MODE_uint_t addr2 = rX + (int32_t)ic[2].arg[2]; + MODE_uint_t addr3 = rX + (int32_t)ic[3].arg[2]; + uint32_t index0 = addr0 >> 12, index1 = addr1 >> 12, + index2 = addr2 >> 12, index3 = addr3 >> 12; + + page = (uint32_t *) cpu->cd.mips.host_load[index0]; + + /* Fallback: */ + if (cpu->delay_slot || + page == NULL || (addr0 & 3) != 0 || (addr1 & 3) != 0 || + (addr2 & 3) != 0 || (addr3 & 3) != 0 || + index0 != index1 || index0 != index2 || index0 != index3) { + /* Normal safe lw: */ + mips32_loadstore[2 * 2 + 1](cpu, ic); + return; + } + + addr0 = (addr0 >> 2) & 0x3ff; + addr1 = (addr1 >> 2) & 0x3ff; + addr2 = (addr2 >> 2) & 0x3ff; + addr3 = (addr3 >> 2) & 0x3ff; + + r1 = page[addr0]; + r2 = page[addr1]; + r3 = page[addr2]; + r4 = page[addr3]; + + r1 = LE32_TO_HOST(r1); + r2 = LE32_TO_HOST(r2); + r3 = LE32_TO_HOST(r3); + r4 = LE32_TO_HOST(r4); + + reg(ic[0].arg[0]) = r1; + reg(ic[1].arg[0]) = r2; + reg(ic[2].arg[0]) = r3; + reg(ic[3].arg[0]) = r4; + + cpu->n_translated_instrs += 3; + cpu->cd.mips.next_ic += 3; +} +X(multi_lw_4_be) +{ + uint32_t *page; + MODE_uint_t rX = reg(ic[0].arg[1]), r1, r2, r3, r4; + MODE_uint_t addr0 = rX + (int32_t)ic[0].arg[2]; + MODE_uint_t addr1 = rX + (int32_t)ic[1].arg[2]; + MODE_uint_t addr2 = rX + (int32_t)ic[2].arg[2]; + MODE_uint_t addr3 = rX + (int32_t)ic[3].arg[2]; + uint32_t index0 = addr0 >> 12, index1 = addr1 >> 12, + index2 = addr2 >> 12, index3 = addr3 >> 12; + + page = (uint32_t *) cpu->cd.mips.host_load[index0]; + + /* Fallback: */ + if (cpu->delay_slot || + page == NULL || (addr0 & 3) != 0 || (addr1 & 3) != 0 || + (addr2 & 3) != 0 || (addr3 & 3) != 0 || + index0 != index1 || index0 != index2 || index0 != index3) { + /* Normal safe lw: */ + mips32_loadstore[16 + 2 * 2 + 1](cpu, ic); + return; + } + + addr0 = (addr0 >> 2) & 0x3ff; + addr1 = (addr1 >> 2) & 0x3ff; + addr2 = (addr2 >> 2) & 0x3ff; + addr3 = (addr3 >> 2) & 0x3ff; + + r1 = page[addr0]; + r2 = page[addr1]; + r3 = page[addr2]; + r4 = page[addr3]; + + r1 = BE32_TO_HOST(r1); + r2 = BE32_TO_HOST(r2); + r3 = BE32_TO_HOST(r3); + r4 = BE32_TO_HOST(r4); + + reg(ic[0].arg[0]) = r1; + reg(ic[1].arg[0]) = r2; + reg(ic[2].arg[0]) = r3; + reg(ic[3].arg[0]) = r4; + + cpu->n_translated_instrs += 3; + cpu->cd.mips.next_ic += 3; +} +#endif + + +/* + * multi_addu_3: + */ +X(multi_addu_3) +{ + /* Fallback: */ + if (cpu->delay_slot) { + instr(addu)(cpu, ic); + return; + } + + reg(ic[0].arg[2]) = (int32_t)(reg(ic[0].arg[0]) + reg(ic[0].arg[1])); + reg(ic[1].arg[2]) = (int32_t)(reg(ic[1].arg[0]) + reg(ic[1].arg[1])); + reg(ic[2].arg[2]) = (int32_t)(reg(ic[2].arg[0]) + reg(ic[2].arg[1])); + cpu->n_translated_instrs += 2; + cpu->cd.mips.next_ic += 2; } @@ -2539,6 +3148,73 @@ #ifdef MODE32 /* + * netbsd_pmax_idle(): + * + * s: lui rX, hi + * lw rY, lo(rX) + * nop + * beq zr, rY, s + * nop + */ +X(netbsd_pmax_idle) +{ + uint32_t addr, pageindex, i; + int32_t *page; + + reg(ic[0].arg[0]) = (int32_t)ic[0].arg[1]; + + addr = reg(ic[0].arg[0]) + (int32_t)ic[1].arg[2]; + pageindex = addr >> 12; + i = (addr & 0xfff) >> 2; + page = (int32_t *) cpu->cd.mips.host_load[pageindex]; + + /* Fallback: */ + if (cpu->delay_slot || page == NULL || page[i] != 0) + return; + + instr(idle)(cpu, ic); +} + + +/* + * linux_pmax_idle(): + * + * s: lui rX, hi + * lw rX, lo(rX) + * nop + * bne zr, rX, ... + * nop + * lw rX, ofs(gp) + * nop + * beq zr, rX, s + * nop + */ +X(linux_pmax_idle) +{ + uint32_t addr, addr2, pageindex, pageindex2, i, i2; + int32_t *page, *page2; + + reg(ic[0].arg[0]) = (int32_t)ic[0].arg[1]; + + addr = reg(ic[0].arg[0]) + (int32_t)ic[1].arg[2]; + pageindex = addr >> 12; + i = (addr & 0xfff) >> 2; + page = (int32_t *) cpu->cd.mips.host_load[pageindex]; + + addr2 = reg(ic[5].arg[1]) + (int32_t)ic[5].arg[2]; + pageindex2 = addr2 >> 12; + i2 = (addr2 & 0xfff) >> 2; + page2 = (int32_t *) cpu->cd.mips.host_load[pageindex2]; + + /* Fallback: */ + if (cpu->delay_slot || page == NULL || page[i] != 0 || page2[i2] != 0) + return; + + instr(idle)(cpu, ic); +} + + +/* * netbsd_strlen(): * * lb rV,0(rX) @@ -2592,14 +3268,104 @@ /* - * lui_32bit: - * - * Combination of lui and addiu. - * Note: All 32 bits of arg[2] of the lui instr_call are used. + * addiu_bne_samepage_addiu: */ -X(lui_32bit) +X(addiu_bne_samepage_addiu) { - reg(ic[0].arg[0]) = (int32_t) ic[0].arg[2]; + MODE_uint_t rs, rt; + + if (cpu->delay_slot) { + instr(addiu)(cpu, ic); + return; + } + + cpu->n_translated_instrs += 2; + reg(ic[0].arg[1]) = (int32_t) + ((int32_t)reg(ic[0].arg[0]) + (int32_t)ic[0].arg[2]); + rs = reg(ic[1].arg[0]); + rt = reg(ic[1].arg[1]); + reg(ic[2].arg[1]) = (int32_t) + ((int32_t)reg(ic[2].arg[0]) + (int32_t)ic[2].arg[2]); + if (rs != rt) + cpu->cd.mips.next_ic = (struct mips_instr_call *) ic[1].arg[2]; + else + cpu->cd.mips.next_ic += 2; +} + + +/* + * xor_andi_sll: + */ +X(xor_andi_sll) +{ + /* Fallback: */ + if (cpu->delay_slot) { + instr(xor)(cpu, ic); + return; + } + + reg(ic[0].arg[2]) = reg(ic[0].arg[0]) ^ reg(ic[0].arg[1]); + reg(ic[1].arg[1]) = reg(ic[1].arg[0]) & (uint32_t)ic[1].arg[2]; + reg(ic[2].arg[2]) = (int32_t)(reg(ic[2].arg[0])<<(int32_t)ic[2].arg[1]); + + cpu->n_translated_instrs += 2; + cpu->cd.mips.next_ic += 2; +} + + +/* + * andi_sll: + */ +X(andi_sll) +{ + /* Fallback: */ + if (cpu->delay_slot) { + instr(andi)(cpu, ic); + return; + } + + reg(ic[0].arg[1]) = reg(ic[0].arg[0]) & (uint32_t)ic[0].arg[2]; + reg(ic[1].arg[2]) = (int32_t)(reg(ic[1].arg[0])<<(int32_t)ic[1].arg[1]); + + cpu->n_translated_instrs ++; + cpu->cd.mips.next_ic ++; +} + + +/* + * lui_ori: + */ +X(lui_ori) +{ + /* Fallback: */ + if (cpu->delay_slot) { + instr(set)(cpu, ic); + return; + } + + reg(ic[0].arg[0]) = (int32_t)ic[0].arg[1]; + reg(ic[1].arg[1]) = reg(ic[1].arg[0]) | (uint32_t)ic[1].arg[2]; + + cpu->n_translated_instrs ++; + cpu->cd.mips.next_ic ++; +} + + +/* + * lui_addiu: + */ +X(lui_addiu) +{ + /* Fallback: */ + if (cpu->delay_slot) { + instr(set)(cpu, ic); + return; + } + + reg(ic[0].arg[0]) = (int32_t)ic[0].arg[1]; + reg(ic[1].arg[1]) = (int32_t) + ((int32_t)reg(ic[1].arg[0]) + (int32_t)ic[1].arg[2]); + cpu->n_translated_instrs ++; cpu->cd.mips.next_ic ++; } @@ -2739,11 +3505,12 @@ ic[0].arg[0] != ic[0].arg[1] && ic[0].arg[1] == ic[-2].arg[0] && (int32_t)ic[0].arg[2] == -4) { ic[-2].f = instr(sw_loop); - combined; } } +/* Only for 32-bit virtual address translation so far. */ +#ifdef MODE32 /* * Combine: Multiple SW in a row using the same base register * @@ -2758,28 +3525,91 @@ int n_back = (low_addr >> MIPS_INSTR_ALIGNMENT_SHIFT) & (MIPS_IC_ENTRIES_PER_PAGE - 1); - /* Only for 32-bit virtual address translation so far. */ - if (!cpu->is_32bit) + if (n_back < 3) return; - if (n_back < 4) - return; + /* Convert a multi_sw_3 to a multi_sw_4: */ + if ((ic[-3].f == instr(multi_sw_3_be) || + ic[-3].f == instr(multi_sw_3_le)) && + ic[-3].arg[1] == ic[0].arg[1]) { + if (cpu->byte_order == EMUL_LITTLE_ENDIAN) + ic[-3].f = instr(multi_sw_4_le); + else + ic[-3].f = instr(multi_sw_4_be); + } - /* Avoid "overlapping" instruction combinations: */ - if (ic[-4].f == instr(multi_sw_3_be)||ic[-3].f == instr(multi_sw_3_be)|| - ic[-4].f == instr(multi_sw_3_le)||ic[-3].f == instr(multi_sw_3_le)) + /* Convert a multi_sw_2 to a multi_sw_3: */ + if ((ic[-2].f == instr(multi_sw_2_be) || + ic[-2].f == instr(multi_sw_2_le)) && + ic[-2].arg[1] == ic[0].arg[1]) { + if (cpu->byte_order == EMUL_LITTLE_ENDIAN) + ic[-2].f = instr(multi_sw_3_le); + else + ic[-2].f = instr(multi_sw_3_be); + } + + if (ic[-1].f == ic[0].f && ic[-1].arg[1] == ic[0].arg[1]) { + if (cpu->byte_order == EMUL_LITTLE_ENDIAN) + ic[-1].f = instr(multi_sw_2_le); + else + ic[-1].f = instr(multi_sw_2_be); + } +} +#endif + + +/* Only for 32-bit virtual address translation so far. */ +#ifdef MODE32 +/* + * Combine: Multiple LW in a row using the same base register + * + * lw r?,???(rX) + * lw r?,???(rX) + * lw r?,???(rX) + * ... + */ +void COMBINE(multi_lw)(struct cpu *cpu, struct mips_instr_call *ic, + int low_addr) +{ + int n_back = (low_addr >> MIPS_INSTR_ALIGNMENT_SHIFT) + & (MIPS_IC_ENTRIES_PER_PAGE - 1); + + if (n_back < 3) return; - if (ic[-2].f == ic[0].f && ic[-1].f == ic[0].f && + /* Convert a multi_lw_3 to a multi_lw_4: */ + if ((ic[-3].f == instr(multi_lw_3_be) || + ic[-3].f == instr(multi_lw_3_le)) && + ic[-3].arg[1] == ic[0].arg[1] && + ic[-1].arg[0] != ic[0].arg[1]) { + if (cpu->byte_order == EMUL_LITTLE_ENDIAN) + ic[-3].f = instr(multi_lw_4_le); + else + ic[-3].f = instr(multi_lw_4_be); + } + + /* Convert a multi_lw_2 to a multi_lw_3: */ + if ((ic[-2].f == instr(multi_lw_2_be) || + ic[-2].f == instr(multi_lw_2_le)) && ic[-2].arg[1] == ic[0].arg[1] && - ic[-1].arg[1] == ic[0].arg[1]) { + ic[-1].arg[0] != ic[0].arg[1]) { if (cpu->byte_order == EMUL_LITTLE_ENDIAN) - ic[-2].f = instr(multi_sw_3_le); + ic[-2].f = instr(multi_lw_3_le); else - ic[-2].f = instr(multi_sw_3_be); - combined; + ic[-2].f = instr(multi_lw_3_be); + } + + /* Note: Loads to the base register are not allowed in slot -1. */ + if (ic[-1].f == ic[0].f && + ic[-1].arg[1] == ic[0].arg[1] && + ic[-1].arg[0] != ic[0].arg[1]) { + if (cpu->byte_order == EMUL_LITTLE_ENDIAN) + ic[-1].f = instr(multi_lw_2_le); + else + ic[-1].f = instr(multi_lw_2_be); } } +#endif /* @@ -2815,7 +3645,6 @@ ic[-3].arg[1] == ic[-5].arg[0] && ic[-2].f == instr(nop) && ic[-1].f == instr(nop)) { ic[-8].f = instr(netbsd_r3k_picache_do_inv); - combined; } } @@ -2825,15 +3654,49 @@ * * NetBSD's strlen core. * [Conditional] branch, followed by nop. + * NetBSD/pmax' idle loop (and possibly others as well). + * Linux/pmax' idle loop. */ void COMBINE(nop)(struct cpu *cpu, struct mips_instr_call *ic, int low_addr) { int n_back = (low_addr >> MIPS_INSTR_ALIGNMENT_SHIFT) & (MIPS_IC_ENTRIES_PER_PAGE - 1); + if (n_back < 8) + return; + #ifdef MODE32 - if (n_back < 3) + if (ic[-8].f == instr(set) && + ic[-7].f == mips32_loadstore[4 + 1] && + ic[-7].arg[0] == ic[-1].arg[0] && + ic[-7].arg[0] == ic[-3].arg[0] && + ic[-7].arg[0] == ic[-5].arg[0] && + ic[-7].arg[0] == ic[-7].arg[1] && + ic[-7].arg[0] == ic[-8].arg[0] && + ic[-6].f == instr(nop) && + ic[-5].arg[1] == (size_t) &cpu->cd.mips.gpr[MIPS_GPR_ZERO] && + ic[-5].f == instr(bne_samepage_nop) && + ic[-4].f == instr(nop) && + ic[-3].f == mips32_loadstore[4 + 1] && + ic[-2].f == instr(nop) && + ic[-1].arg[1] == (size_t) &cpu->cd.mips.gpr[MIPS_GPR_ZERO] && + ic[-1].arg[2] == (size_t) &ic[-8] && + ic[-1].f == instr(beq_samepage)) { + ic[-8].f = instr(linux_pmax_idle); + return; + } + + if (ic[-4].f == instr(set) && + ic[-3].f == mips32_loadstore[4 + 1] && + ic[-3].arg[0] == ic[-1].arg[0] && + ic[-3].arg[1] == ic[-4].arg[0] && + ic[-2].f == instr(nop) && + ic[-1].arg[1] == (size_t) &cpu->cd.mips.gpr[MIPS_GPR_ZERO] && + ic[-1].arg[2] == (size_t) &ic[-4] && + ic[-1].f == instr(beq_samepage)) { + ic[-4].f = instr(netbsd_pmax_idle); return; + } if ((ic[-3].f == mips32_loadstore[1] || ic[-3].f == mips32_loadstore[16 + 1]) && @@ -2844,23 +3707,17 @@ ic[-1].arg[1] == (size_t) &cpu->cd.mips.gpr[MIPS_GPR_ZERO] && ic[-1].f == instr(bne_samepage)) { ic[-3].f = instr(netbsd_strlen); - combined; return; } #endif - if (n_back < 1) - return; - if (ic[-1].f == instr(bne_samepage)) { ic[-1].f = instr(bne_samepage_nop); - combined; return; } if (ic[-1].f == instr(beq_samepage)) { ic[-1].f = instr(beq_samepage_nop); - combined; return; } @@ -2871,46 +3728,111 @@ /* * Combine: * + * xor + andi + sll + * andi + sll + */ +void COMBINE(sll)(struct cpu *cpu, struct mips_instr_call *ic, int low_addr) +{ + int n_back = (low_addr >> MIPS_INSTR_ALIGNMENT_SHIFT) + & (MIPS_IC_ENTRIES_PER_PAGE - 1); + + if (n_back < 2) + return; + + if (ic[-2].f == instr(xor) && ic[-1].f == instr(andi)) { + ic[-2].f = instr(xor_andi_sll); + return; + } + + if (ic[-1].f == instr(andi)) { + ic[-1].f = instr(andi_sll); + return; + } +} + + +/* + * lui + ori + */ +void COMBINE(ori)(struct cpu *cpu, struct mips_instr_call *ic, int low_addr) +{ + int n_back = (low_addr >> MIPS_INSTR_ALIGNMENT_SHIFT) + & (MIPS_IC_ENTRIES_PER_PAGE - 1); + + if (n_back < 1) + return; + + if (ic[-1].f == instr(set)) { + ic[-1].f = instr(lui_ori); + return; + } +} + + +/* + * addu + addu + addu + */ +void COMBINE(addu)(struct cpu *cpu, struct mips_instr_call *ic, int low_addr) +{ + int n_back = (low_addr >> MIPS_INSTR_ALIGNMENT_SHIFT) + & (MIPS_IC_ENTRIES_PER_PAGE - 1); + + if (n_back < 4) + return; + + /* Avoid "overlapping" instruction combinations: */ + if (ic[-4].f == instr(multi_addu_3) || + ic[-3].f == instr(multi_addu_3)) + return; + + if (ic[-2].f == instr(addu) && ic[-1].f == instr(addu)) { + ic[-2].f = instr(multi_addu_3); + return; + } +} + + +/* + * Combine: + * * [Conditional] branch, followed by addiu. - * lui + addiu. */ void COMBINE(addiu)(struct cpu *cpu, struct mips_instr_call *ic, int low_addr) { int n_back = (low_addr >> MIPS_INSTR_ALIGNMENT_SHIFT) & (MIPS_IC_ENTRIES_PER_PAGE - 1); - if (n_back < 1) + if (n_back < 2) return; - if (ic[-1].f == instr(set) && ic[-1].arg[0] == ic[0].arg[0] && - ic[0].arg[0] == ic[0].arg[1]) { - ic[-1].f = instr(lui_32bit); - ic[-1].arg[2] = (int32_t) (ic[-1].arg[1] + ic[0].arg[2]); - combined; + if (ic[-2].f == instr(addiu) && + ic[-1].f == instr(bne_samepage)) { + ic[-2].f = instr(addiu_bne_samepage_addiu); + return; + } + + if (ic[-1].f == instr(set)) { + ic[-1].f = instr(lui_addiu); return; } if (ic[-1].f == instr(b_samepage)) { ic[-1].f = instr(b_samepage_addiu); - combined; return; } if (ic[-1].f == instr(beq_samepage)) { ic[-1].f = instr(beq_samepage_addiu); - combined; return; } if (ic[-1].f == instr(bne_samepage)) { ic[-1].f = instr(bne_samepage_addiu); - combined; return; } if (ic[-1].f == instr(jr_ra)) { ic[-1].f = instr(jr_ra_addiu); - combined; return; } @@ -2932,7 +3854,6 @@ if (ic[-1].f == instr(b_samepage)) { ic[-1].f = instr(b_samepage_daddiu); - combined; } /* TODO: other branches that are followed by daddiu should be here */ @@ -2956,7 +3877,7 @@ uint32_t iword, imm; unsigned char *page; unsigned char ib[4]; - int main_opcode, rt, rs, rd, sa, s6, x64 = 0; + int main_opcode, rt, rs, rd, sa, s6, x64 = 0, s10; int in_crosspage_delayslot = 0; void (*samepage_function)(struct cpu *, struct mips_instr_call *); int store, signedness, size; @@ -3037,6 +3958,7 @@ sa = (iword >> 6) & 31; imm = (int16_t)iword; s6 = iword & 63; + s10 = (rs << 5) | sa; switch (main_opcode) { @@ -3089,6 +4011,25 @@ ic->arg[2] = (size_t)&cpu->cd.mips.gpr[rd]; if (rd == MIPS_GPR_ZERO) ic->f = instr(nop); + if (ic->f == instr(sll)) + cpu->cd.mips.combination_check = COMBINE(sll); + if (ic->f == instr(nop)) + cpu->cd.mips.combination_check = COMBINE(nop); + + /* Special checks for MIPS32/64 revision 2 opcodes, + such as rotation instructions: */ + if (sa >= 0 && rs != 0x00) { + switch (rs) { + /* TODO: [d]ror, etc. */ + default:goto bad; + } + } + if (sa < 0 && (s10 & 0x1f) != 0) { + switch (s10 & 0x1f) { + /* TODO: [d]rorv, etc. */ + default:goto bad; + } + } break; case SPECIAL_ADD: @@ -3220,6 +4161,9 @@ default:if (rd == MIPS_GPR_ZERO) ic->f = instr(nop); } + + if (ic->f == instr(addu)) + cpu->cd.mips.combination_check = COMBINE(addu); break; case SPECIAL_JR: @@ -3263,7 +4207,12 @@ break; case SPECIAL_BREAK: - ic->f = instr(break); + if (((iword >> 6) & 0xfffff) == 0x30378) { + /* "Magic trap" for REBOOT: */ + ic->f = instr(reboot); + } else { + ic->f = instr(break); + } break; case SPECIAL_SYNC: @@ -3390,6 +4339,8 @@ if (rt == MIPS_GPR_ZERO) ic->f = instr(nop); + if (ic->f == instr(ori)) + cpu->cd.mips.combination_check = COMBINE(ori); if (ic->f == instr(addiu)) cpu->cd.mips.combination_check = COMBINE(addiu); if (ic->f == instr(daddiu)) @@ -3455,12 +4406,44 @@ case COP0_DERET: ic->f = instr(deret); break; - case COP0_IDLE: + case COP0_WAIT: + ic->f = instr(wait); + if (cpu->cd.mips.cpu_type.rev != MIPS_RM5200 && + cpu->cd.mips.cpu_type.isa_level < 32) { + static int warned = 0; + ic->f = instr(reserved); + if (!warned) { + fatal("{ WARNING: Attempt to " + "execute the WAIT instruct" + "ion, but the emulated CPU " + "is neither RM52xx, nor " + "MIPS32/64! }\n"); + warned = 1; + } + } + break; case COP0_STANDBY: - case COP0_SUSPEND: + /* NOTE: Reusing the 'wait' instruction: */ + ic->f = instr(wait); + if (cpu->cd.mips.cpu_type.rev != MIPS_R4100) { + static int warned = 0; + ic->f = instr(reserved); + if (!warned) { + fatal("{ WARNING: Attempt to " + "execute a R41xx instruct" + "ion, but the emulated CPU " + "doesn't support it! }\n"); + warned = 1; + } + } + break; case COP0_HIBERNATE: /* TODO */ - ic->f = instr(nop); + goto bad; + case COP0_SUSPEND: + /* Used by NetBSD on HPCmips (VR41xx) to + halt the machine. */ + ic->f = instr(reboot); break; case COP0_EI: if (cpu->cd.mips.cpu_type.rev == MIPS_R5900) { @@ -3497,6 +4480,12 @@ ic->arg[1] = rd + ((iword & 7) << 5); ic->arg[2] = addr & 0xffc; ic->f = rs == COPz_MFCz? instr(mfc0) : instr(dmfc0); + if (rs == COPz_MFCz && (iword & 7) == 0 && + rd != COP0_COUNT) + ic->f = instr(mfc0_select0); + if (rs == COPz_DMFCz && (iword & 7) == 0 && + rd != COP0_COUNT) + ic->f = instr(dmfc0_select0); if (rt == MIPS_GPR_ZERO) ic->f = instr(nop); break; @@ -3830,11 +4819,14 @@ if (!store && rt == MIPS_GPR_ZERO) ic->arg[0] = (size_t)&cpu->cd.mips.scratch; - /* Check for multiple stores in a row using the same + /* Check for multiple loads or stores in a row using the same base register: */ - if (main_opcode == HI6_SW && rs == MIPS_GPR_SP) +#ifdef MODE32 + if (main_opcode == HI6_LW) + cpu->cd.mips.combination_check = COMBINE(multi_lw); + if (main_opcode == HI6_SW) cpu->cd.mips.combination_check = COMBINE(multi_sw); - +#endif break; case HI6_LL: @@ -3967,6 +4959,50 @@ switch (s6) { + case SPECIAL3_EXT: + /* TODO: Cleanup and extend to DEXT... etc */ + { + int msbd = rd, lsb = (iword >> 6) & 0x1f; + ic->arg[0] = (size_t)&cpu->cd.mips.gpr[rt]; + ic->arg[1] = (size_t)&cpu->cd.mips.gpr[rs]; + ic->arg[2] = (msbd << 5) + lsb; + ic->f = instr(ext); + if (rt == MIPS_GPR_ZERO) + ic->f = instr(nop); + } + break; + + case SPECIAL3_BSHFL: + ic->arg[0] = (size_t)&cpu->cd.mips.gpr[rt]; + ic->arg[1] = (size_t)&cpu->cd.mips.gpr[rd]; + switch (s10) { + case BSHFL_WSBH: + ic->f = instr(wsbh); + break; + case BSHFL_SEB: + ic->f = instr(seb); + break; + case BSHFL_SEH: + ic->f = instr(seh); + break; + default:goto bad; + } + break; + + case SPECIAL3_DBSHFL: + ic->arg[0] = (size_t)&cpu->cd.mips.gpr[rt]; + ic->arg[1] = (size_t)&cpu->cd.mips.gpr[rd]; + switch (s10) { + case BSHFL_DSBH: + ic->f = instr(dsbh); + break; + case BSHFL_DSHD: + ic->f = instr(dshd); + break; + default:goto bad; + } + break; + case SPECIAL3_RDHWR: ic->arg[0] = (size_t)&cpu->cd.mips.gpr[rt]; @@ -4007,9 +5043,6 @@ } #endif - if (ic->f == instr(nop) && cpu->cd.mips.combination_check == NULL) - cpu->cd.mips.combination_check = COMBINE(nop); - #define DYNTRANS_TO_BE_TRANSLATED_TAIL #include "cpu_dyntrans.c"