--- trunk/src/cpus/cpu_dyntrans.c 2007/10/08 16:19:28 21 +++ trunk/src/cpus/cpu_dyntrans.c 2007/10/08 16:19:37 22 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 Anders Gavare. All rights reserved. + * Copyright (C) 2005-2006 Anders Gavare. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -25,13 +25,34 @@ * SUCH DAMAGE. * * - * $Id: cpu_dyntrans.c,v 1.41 2005/11/23 22:03:31 debug Exp $ + * $Id: cpu_dyntrans.c,v 1.54 2006/02/09 22:55:20 debug Exp $ * * Common dyntrans routines. Included from cpu_*.c. */ #ifdef DYNTRANS_CPU_RUN_INSTR +#if 1 /* IC statistics: */ +static void gather_statistics(struct cpu *cpu) +{ + struct DYNTRANS_IC *ic = cpu->cd.DYNTRANS_ARCH.next_ic; + static long long n = 0; + static FILE *f = NULL; + + n++; + if (n < 100000000) + return; + + if (f == NULL) { + f = fopen("instruction_call_statistics.raw", "w"); + if (f == NULL) { + fatal("Unable to open statistics file for output.\n"); + exit(1); + } + } + fwrite(&ic->f, 1, sizeof(void *), f); +} +#else /* PC statistics: */ static void gather_statistics(struct cpu *cpu) { uint64_t a; @@ -89,13 +110,13 @@ } } } - +#endif /* PC statistics */ #define S gather_statistics(cpu) #ifdef DYNTRANS_VARIABLE_INSTRUCTION_LENGTH #define I ic = cpu->cd.DYNTRANS_ARCH.next_ic; \ - cpu->cd.DYNTRANS_ARCH.next_ic += ic->len; \ + cpu->cd.DYNTRANS_ARCH.next_ic += ic->arg[0]; \ ic->f(cpu, ic); #else #define I ic = cpu->cd.DYNTRANS_ARCH.next_ic ++; ic->f(cpu, ic); @@ -178,9 +199,16 @@ sizeof(instr), MEM_READ, CACHE_INSTRUCTION)) { fatal("XXX_cpu_run_instr(): could not read " "the instruction\n"); - } else + } else { cpu_disassemble_instr(cpu->machine, cpu, instr, 1, 0, 0); +#ifdef DYNTRANS_MIPS +/* TODO: generalize, not just MIPS */ + /* Show the instruction in the delay slot, + if any: */ + fatal("TODO: check for delay slot!\n"); +#endif + } } /* When single-stepping, multiple instruction calls cannot @@ -263,6 +291,13 @@ DYNTRANS_INSTR_ALIGNMENT_SHIFT); cpu->pc += (DYNTRANS_IC_ENTRIES_PER_PAGE << DYNTRANS_INSTR_ALIGNMENT_SHIFT); + } else if (low_pc == DYNTRANS_IC_ENTRIES_PER_PAGE + 1) { + /* Switch to next page and skip an instruction which was + already executed (in a delay slot): */ + cpu->pc &= ~((DYNTRANS_IC_ENTRIES_PER_PAGE-1) << + DYNTRANS_INSTR_ALIGNMENT_SHIFT); + cpu->pc += ((DYNTRANS_IC_ENTRIES_PER_PAGE + 1) << + DYNTRANS_INSTR_ALIGNMENT_SHIFT); } #ifdef DYNTRANS_PPC @@ -270,9 +305,9 @@ { uint32_t old = cpu->cd.ppc.spr[SPR_DEC]; cpu->cd.ppc.spr[SPR_DEC] = (uint32_t) (old - n_instrs); - if ((old >> 31) == 0 && (cpu->cd.ppc.spr[SPR_DEC] >> 31) == 1) + if ((old >> 31) == 0 && (cpu->cd.ppc.spr[SPR_DEC] >> 31) == 1 + && !(cpu->cd.ppc.cpu_type.flags & PPC_NO_DEC)) cpu->cd.ppc.dec_intr_pending = 1; - old = cpu->cd.ppc.spr[SPR_TBL]; cpu->cd.ppc.spr[SPR_TBL] += n_instrs; if ((old >> 31) == 1 && (cpu->cd.ppc.spr[SPR_TBL] >> 31) == 0) @@ -361,9 +396,6 @@ #ifdef DYNTRANS_MIPS gpr[MIPS_GPR_A0 #endif -#ifdef DYNTRANS_NEWMIPS - r[0 /* TODO */ -#endif #ifdef DYNTRANS_PPC gpr[3 #endif @@ -371,7 +403,7 @@ r[2 #endif #ifdef DYNTRANS_SPARC - r_i[0 + r[24 #endif + x]; #endif @@ -403,6 +435,7 @@ #ifdef DYNTRANS_TC_ALLOCATE_DEFAULT_PAGE + /* forward declaration of to_be_translated and end_of_page: */ static void instr(to_be_translated)(struct cpu *, struct DYNTRANS_IC *); static void instr(end_of_page)(struct cpu *,struct DYNTRANS_IC *); @@ -410,6 +443,14 @@ static void instr32(to_be_translated)(struct cpu *, struct DYNTRANS_IC *); static void instr32(end_of_page)(struct cpu *,struct DYNTRANS_IC *); #endif + +#ifdef DYNTRANS_DELAYSLOT +static void instr(end_of_page2)(struct cpu *,struct DYNTRANS_IC *); +#ifdef DYNTRANS_DUALMODE_32 +static void instr32(end_of_page2)(struct cpu *,struct DYNTRANS_IC *); +#endif +#endif + /* * XXX_tc_allocate_default_page(): * @@ -436,12 +477,20 @@ #endif instr(to_be_translated); - ppp->ics[DYNTRANS_IC_ENTRIES_PER_PAGE].f = + ppp->ics[DYNTRANS_IC_ENTRIES_PER_PAGE + 0].f = #ifdef DYNTRANS_DUALMODE_32 cpu->is_32bit? instr32(end_of_page) : #endif instr(end_of_page); +#ifdef DYNTRANS_DELAYSLOT + ppp->ics[DYNTRANS_IC_ENTRIES_PER_PAGE + 1].f = +#ifdef DYNTRANS_DUALMODE_32 + cpu->is_32bit? instr32(end_of_page2) : +#endif + instr(end_of_page2); +#endif + cpu->translation_cache_cur_ofs += sizeof(struct DYNTRANS_TC_PHYSPAGE); cpu->translation_cache_cur_ofs --; @@ -489,14 +538,10 @@ } else vph_p = cpu->cd.alpha.vph_table0[a]; #else -#ifdef DYNTRANS_IA64 - fatal("IA64 todo\n"); -#else - fatal("Neither alpha, ia64, nor 32-bit? 3\n"); + fatal("Neither alpha nor 32-bit? 3\n"); exit(1); #endif #endif -#endif /* Virtual to physical address translation: */ ok = 0; @@ -512,14 +557,10 @@ ok = 1; } #else -#ifdef DYNTRANS_IA64 - fatal("IA64 todo\n"); -#else - fatal("Neither alpha, ia64, nor 32-bit? 4\n"); + fatal("Neither alpha nor 32-bit? 4\n"); exit(1); #endif #endif -#endif if (!ok) { uint64_t paddr; @@ -660,12 +701,11 @@ #else uint64_t #endif - cached_pc; + cached_pc = cpu->pc; struct DYNTRANS_TC_PHYSPAGE *ppp; #ifdef MODE32 int index; - cached_pc = cpu->pc; index = DYNTRANS_ADDR_TO_PAGENR(cached_pc); ppp = cpu->cd.DYNTRANS_ARCH.phys_page[index]; if (ppp != NULL) @@ -675,7 +715,6 @@ uint32_t a, b; int kernel = 0; struct alpha_vph_page *vph_p; - cached_pc = cpu->pc; a = (cached_pc >> ALPHA_LEVEL0_SHIFT) & (ALPHA_LEVEL0 - 1); b = (cached_pc >> ALPHA_LEVEL1_SHIFT) & (ALPHA_LEVEL1 - 1); if ((cached_pc >> ALPHA_TOPSHIFT) == ALPHA_TOP_KERNEL) { @@ -689,17 +728,13 @@ goto have_it; } #else + fatal("X1: cached_pc=%016llx\n", (long long)cached_pc); /* Temporary, to avoid a compiler warning: */ - cached_pc = 0; ppp = NULL; -#ifdef DYNTRANS_IA64 - fatal("IA64 todo\n"); -#else - fatal("Neither alpha, ia64, nor 32-bit? 1\n"); + fatal("Neither alpha nor 32-bit? 1\n"); exit(1); #endif #endif -#endif DYNTRANS_PC_TO_POINTERS_GENERIC(cpu); return; @@ -800,11 +835,7 @@ cpu->cd.alpha.vph_default_page; } #else /* !DYNTRANS_ALPHA */ -#ifdef DYNTRANS_IA64 - fatal("IA64: blah blah TODO\n"); -#else - fatal("Not yet for non-1-level, non-Alpha, non-ia64\n"); -#endif /* !DYNTRANS_IA64 */ + fatal("Not yet for non-1-level, non-Alpha\n"); #endif /* !DYNTRANS_ALPHA */ #endif } @@ -821,6 +852,10 @@ * flags should be one of * INVALIDATE_PADDR INVALIDATE_VADDR or INVALIDATE_ALL * + * In addition, for INVALIDATE_ALL, INVALIDATE_VADDR_UPPER4 may be set and + * bit 31..28 of addr are used to select the virtual addresses to invalidate. + * (This is useful for PowerPC emulation, when segment registers are updated.) + * * In the case when all translations are invalidated, paddr doesn't need * to be supplied. * @@ -828,7 +863,7 @@ * the quick translation array, not from the linear * vph_tlb_entry[] array. Hopefully this is enough anyway. */ -void DYNTRANS_INVALIDATE_TC(struct cpu *cpu, uint64_t paddr, int flags) +void DYNTRANS_INVALIDATE_TC(struct cpu *cpu, uint64_t addr, int flags) { int r; #ifdef MODE32 @@ -836,17 +871,34 @@ #else uint64_t #endif - addr_page = paddr & ~(DYNTRANS_PAGESIZE - 1); + addr_page = addr & ~(DYNTRANS_PAGESIZE - 1); /* fatal("invalidate(): "); */ - /* Quick case for virtual addresses: see note above. */ + /* Quick case for _one_ virtual addresses: see note above. */ if (flags & INVALIDATE_VADDR) { /* fatal("vaddr 0x%08x\n", (int)addr_page); */ DYNTRANS_INVALIDATE_TLB_ENTRY(cpu, addr_page, flags); return; } + /* Invalidate everything: */ +#ifdef DYNTRANS_PPC + if (flags & INVALIDATE_ALL && flags & INVALIDATE_VADDR_UPPER4) { + /* fatal("all, upper4 (PowerPC segment)\n"); */ + for (r=0; rcd.DYNTRANS_ARCH.vph_tlb_entry[r].valid && + (cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[r].vaddr_page + & 0xf0000000) == addr_page) { + DYNTRANS_INVALIDATE_TLB_ENTRY(cpu, cpu->cd. + DYNTRANS_ARCH.vph_tlb_entry[r].vaddr_page, + 0); + cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[r].valid=0; + } + } + return; + } +#endif if (flags & INVALIDATE_ALL) { /* fatal("all\n"); */ for (r=0; rcd.DYNTRANS_ARCH.vph_tlb_entry[r].valid && ( - (cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[r].paddr_page == - addr_page && flags & INVALIDATE_PADDR) || - flags & INVALIDATE_ALL) ) { + if (cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[r].valid && addr_page + == cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[r].paddr_page) { DYNTRANS_INVALIDATE_TLB_ENTRY(cpu, cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[r].vaddr_page, flags); @@ -1022,12 +1077,7 @@ vph_p = cpu->cd.alpha.vph_table0[a]; vph_p->phys_page[b] = NULL; #else /* !DYNTRANS_ALPHA */ -#ifdef DYNTRANS_IA64 - fatal("IA64: blah yo yo TODO\n"); -#else - fatal("Not yet for non-1-level, non-Alpha, " - "non-ia64\n"); -#endif /* !DYNTRANS_IA64 */ + fatal("Not yet for non-1-level, non-Alpha\n"); #endif /* !DYNTRANS_ALPHA */ #endif } @@ -1068,14 +1118,10 @@ " p=0x%x\n", (int)vaddr_page, host_page, writeflag, (int)paddr_page); */ #else /* !MODE32 */ -#ifdef DYNTRANS_IA64 - fatal("IA64 update todo\n"); -#else - fatal("Neither 32-bit, IA64, nor Alpha? 2\n"); + fatal("Neither 32-bit nor Alpha? 2\n"); exit(1); #endif #endif -#endif if (writeflag & MEMORY_USER_ACCESS) { writeflag &= ~MEMORY_USER_ACCESS; @@ -1300,6 +1346,12 @@ fatal("BREAKPOINT: pc = 0x%llx\n(The " "instruction has not yet executed.)\n", (long long)cpu->pc); +#ifdef DYNTRANS_DELAYSLOT + if (cpu->cd.DYNTRANS_ARCH.delay_slot != + NOT_DELAYED) + fatal("ERROR! Breakpoint in a delay" + " slot! Not yet supported.\n"); +#endif single_step_breakpoint = 1; single_step = 1; goto stop_running_translated; @@ -1321,7 +1373,7 @@ */ #ifdef MODE32 if (!(cpu->cd.DYNTRANS_ARCH.cur_physpage->flags & TRANSLATIONS)) { - uint32_t index = DYNTRANS_ADDR_TO_PAGENR(addr); + uint32_t index = DYNTRANS_ADDR_TO_PAGENR((uint32_t)addr); cpu->cd.DYNTRANS_ARCH.phystranslation[index >> 5] |= (1 << (index & 31)); } @@ -1329,6 +1381,122 @@ cpu->cd.DYNTRANS_ARCH.cur_physpage->flags |= TRANSLATIONS; +#ifdef DYNTRANS_BACKEND + /* + * "Empty"/simple native dyntrans backend stuff: + * + * 1) If no translation is currently being done, but the translated + * instruction was simple enough, then let's start making a new + * native translation block. + * + * 2) If a native translation block is currently being constructed, + * but this instruction wasn't simple enough, then end the block + * (without including this instruction). + * + * 3) If a native translation block is currently being constructed, + * and this is a simple instruction, then add it. + */ + if (simple && cpu->translation_context.p == NULL && + dyntrans_backend_enable) { + size_t s = 0; + + if (cpu->translation_context.translation_buffer == NULL) { + cpu->translation_context.translation_buffer = + zeroed_alloc(DTB_TRANSLATION_SIZE_MAX + + DTB_TRANSLATION_SIZE_MARGIN); + } + + cpu->translation_context.p = + cpu->translation_context.translation_buffer; + + cpu->translation_context.ic_page = + cpu->cd.DYNTRANS_ARCH.cur_ic_page; + cpu->translation_context.start_instr_call_index = + ((size_t)ic - (size_t)cpu->cd.DYNTRANS_ARCH.cur_ic_page) + / (sizeof(*ic)); + + dtb_function_prologue(&cpu->translation_context, &s); + cpu->translation_context.p += s; + cpu->translation_context.n_simple = 0; + } + + /* If this is not a continuation of a simple translation, then + stop now! */ + if (cpu->translation_context.ic_page != cpu->cd.DYNTRANS_ARCH. + cur_ic_page || ic != &cpu->cd.DYNTRANS_ARCH.cur_ic_page[ + cpu->translation_context.start_instr_call_index + + cpu->translation_context.n_simple]) + simple = 0; + + if (cpu->translation_context.p != NULL && !simple) { + size_t s = 0, total; + + if (cpu->translation_context.n_simple > 1) { + dtb_generate_ptr_inc(cpu, &cpu->translation_context, + &s, &cpu->cd.DYNTRANS_ARCH.next_ic, + (cpu->translation_context.n_simple - 1) * + sizeof(*(cpu->cd.DYNTRANS_ARCH.next_ic))); + cpu->translation_context.p += s; + } + + dtb_function_epilogue(&cpu->translation_context, &s); + cpu->translation_context.p += s; + + cpu_dtb_do_fixups(cpu); +#if 0 +{ +int i; +unsigned char *addr = cpu->translation_context.translation_buffer; +printf("index = %i\n", cpu->translation_context.start_instr_call_index); +quiet_mode = 0; +for (i=0; i<4*32; i+=4) + alpha_cpu_disassemble_instr(cpu, (unsigned char *)addr + i, + 0, i, 0); +} +#endif + total = (size_t)cpu->translation_context.p - + (size_t)cpu->translation_context.translation_buffer; + + /* Copy the translated block to the translation cache: */ + /* Align first: */ + cpu->translation_cache_cur_ofs --; + cpu->translation_cache_cur_ofs |= 31; + cpu->translation_cache_cur_ofs ++; + + memcpy(cpu->translation_cache + cpu->translation_cache_cur_ofs, + cpu->translation_context.translation_buffer, total); + + /* Set the ic pointer: */ + ((struct DYNTRANS_IC *)cpu->translation_context.ic_page) + [cpu->translation_context.start_instr_call_index].f = + (void *) + (cpu->translation_cache + cpu->translation_cache_cur_ofs); + + /* Align cur_ofs afterwards as well, just to be safe. */ + cpu->translation_cache_cur_ofs += total; + cpu->translation_cache_cur_ofs --; + cpu->translation_cache_cur_ofs |= 31; + cpu->translation_cache_cur_ofs ++; + + /* Set the "combined instruction" flag for this page: */ + cpu->cd.DYNTRANS_ARCH.cur_physpage = (void *) + cpu->cd.DYNTRANS_ARCH.cur_ic_page; + cpu->cd.DYNTRANS_ARCH.cur_physpage->flags |= COMBINATIONS; + + dtb_host_cacheinvalidate(0,0); /* p , size ... ); */ + + cpu->translation_context.p = NULL; + } + if (cpu->translation_context.p != NULL) { + size_t s = 0; + dtb_generate_fcall(cpu, &cpu->translation_context, + &s, (size_t)ic->f, (size_t)ic); + cpu->translation_context.p += s; + cpu->translation_context.n_simple ++; + } +#endif /* DYNTRANS_BACKEND */ + + /* * Now it is time to check for combinations of instructions that can * be converted into a single function call.