--- trunk/src/cpus/cpu_dyntrans.c 2007/10/08 16:19:05 17 +++ trunk/src/cpus/cpu_dyntrans.c 2007/10/08 16:19:11 18 @@ -25,13 +25,81 @@ * SUCH DAMAGE. * * - * $Id: cpu_dyntrans.c,v 1.19 2005/10/07 22:10:51 debug Exp $ + * $Id: cpu_dyntrans.c,v 1.27 2005/10/27 14:01:13 debug Exp $ * * Common dyntrans routines. Included from cpu_*.c. */ #ifdef DYNTRANS_CPU_RUN_INSTR +static void gather_statistics(struct cpu *cpu) +{ + uint64_t a; + int low_pc = ((size_t)cpu->cd.DYNTRANS_ARCH.next_ic - (size_t) + cpu->cd.DYNTRANS_ARCH.cur_ic_page) / sizeof(struct DYNTRANS_IC); + if (low_pc < 0 || low_pc >= DYNTRANS_IC_ENTRIES_PER_PAGE) + return; + +#if 1 + /* Use the physical address: */ + cpu->cd.DYNTRANS_ARCH.cur_physpage = (void *) + cpu->cd.DYNTRANS_ARCH.cur_ic_page; + a = cpu->cd.DYNTRANS_ARCH.cur_physpage->physaddr; +#else + /* Use the PC (virtual address): */ + a = cpu->pc; +#endif + + a &= ~((DYNTRANS_IC_ENTRIES_PER_PAGE-1) << + DYNTRANS_INSTR_ALIGNMENT_SHIFT); + a += low_pc << DYNTRANS_INSTR_ALIGNMENT_SHIFT; + + /* + * TODO: Everything below this line should be cleaned up :-) + */ +a &= 0x03ffffff; +{ + static long long *array = NULL; + static char *array_16kpage_in_use = NULL; + static int n = 0; + a >>= DYNTRANS_INSTR_ALIGNMENT_SHIFT; + if (array == NULL) + array = zeroed_alloc(sizeof(long long) * 16384*1024); + if (array_16kpage_in_use == NULL) + array_16kpage_in_use = zeroed_alloc(sizeof(char) * 1024); + a &= (16384*1024-1); + array[a] ++; + array_16kpage_in_use[a / 16384] = 1; + n++; + if ((n & 0x3fffffff) == 0) { + FILE *f = fopen("statistics.out", "w"); + int i, j; + printf("Saving statistics... "); fflush(stdout); + for (i=0; i<1024; i++) + if (array_16kpage_in_use[i]) { + for (j=0; j<16384; j++) + if (array[i*16384 + j] > 0) + fprintf(f, "%lli\t0x%016llx\n", + (long long)array[i*16384+j], + (long long)((i*16384+j) << + DYNTRANS_INSTR_ALIGNMENT_SHIFT)); + } + fclose(f); + printf("n=0x%08x\n", n); + } +} +} + + +#define S gather_statistics(cpu) + +#ifdef DYNTRANS_VARIABLE_INSTRUCTION_LENGTH +#define I ic = cpu->cd.DYNTRANS_ARCH.next_ic; ic->f(cpu, ic); +#else +#define I ic = cpu->cd.DYNTRANS_ARCH.next_ic ++; ic->f(cpu, ic); +#endif + + /* * XXX_cpu_run_instr(): * @@ -72,6 +140,9 @@ cpu->n_translated_instrs = 0; cpu->running_translated = 1; + cpu->cd.DYNTRANS_ARCH.cur_physpage = (void *) + cpu->cd.DYNTRANS_ARCH.cur_ic_page; + if (single_step || cpu->machine->instruction_trace) { /* * Single-step: @@ -122,26 +193,34 @@ ~(COMBINATIONS | TRANSLATIONS); } + if (show_opcode_statistics) + S; + /* Execute just one instruction: */ ic->f(cpu, ic); n_instrs = 1; - } else { - /* Execute multiple instructions: */ + } else if (show_opcode_statistics) { + /* Gather statistics while executing multiple instructions: */ n_instrs = 0; for (;;) { struct DYNTRANS_IC *ic; -#ifdef DYNTRANS_VARIABLE_INSTRUCTION_LENGTH -#define I ic = cpu->cd.DYNTRANS_ARCH.next_ic; ic->f(cpu, ic); -#else -#define I ic = cpu->cd.DYNTRANS_ARCH.next_ic ++; ic->f(cpu, ic); -#endif + S; I; S; I; S; I; S; I; S; I; S; I; + S; I; S; I; S; I; S; I; S; I; S; I; + S; I; S; I; S; I; S; I; S; I; S; I; + S; I; S; I; S; I; S; I; S; I; S; I; - I; I; I; I; I; I; I; I; I; I; - I; I; I; I; I; I; I; I; I; I; - I; I; I; I; I; I; I; I; I; I; - I; I; I; I; I; I; I; I; I; I; - I; I; I; I; I; I; I; I; I; I; + n_instrs += 24; + + if (!cpu->running_translated || + n_instrs + cpu->n_translated_instrs >= 16384) + break; + } + } else { + /* Execute multiple instructions: */ + n_instrs = 0; + for (;;) { + struct DYNTRANS_IC *ic; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; @@ -150,9 +229,8 @@ I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; - I; I; I; I; I; I; I; I; I; I; - n_instrs += 120; + n_instrs += 60; if (!cpu->running_translated || n_instrs + cpu->n_translated_instrs >= 16384) @@ -353,6 +431,10 @@ instr(end_of_page); cpu->translation_cache_cur_ofs += sizeof(struct DYNTRANS_TC_PHYSPAGE); + + cpu->translation_cache_cur_ofs --; + cpu->translation_cache_cur_ofs |= 63; + cpu->translation_cache_cur_ofs ++; } #endif /* DYNTRANS_TC_ALLOCATE_DEFAULT_PAGE */ @@ -380,7 +462,7 @@ #ifdef MODE32 int index; cached_pc = cpu->pc; - index = cached_pc >> 12; + index = DYNTRANS_ADDR_TO_PAGENR(cached_pc); #else #ifdef DYNTRANS_ALPHA uint32_t a, b; @@ -458,11 +540,30 @@ } } cached_pc = cpu->pc; +#ifdef MODE32 + index = DYNTRANS_ADDR_TO_PAGENR(cached_pc); +#endif physaddr = paddr; } - if (cpu->translation_cache_cur_ofs >= DYNTRANS_CACHE_SIZE) +#ifdef MODE32 + if (cpu->cd.DYNTRANS_ARCH.host_load[index] == NULL) { + unsigned char *host_page = memory_paddr_to_hostaddr(cpu->mem, + physaddr, MEM_READ); + if (host_page != NULL) { + int q = DYNTRANS_PAGESIZE - 1; + host_page += (physaddr & + ((1 << BITS_PER_MEMBLOCK) - 1) & ~q); + cpu->update_translation_table(cpu, cached_pc & ~q, + host_page, TLB_CODE, physaddr & ~q); + } + } +#endif + + if (cpu->translation_cache_cur_ofs >= DYNTRANS_CACHE_SIZE) { + fatal("[ dyntrans: resetting the translation cache ]\n"); cpu_create_or_reset_tc(cpu); + } pagenr = DYNTRANS_ADDR_TO_PAGENR(physaddr); table_index = PAGENR_TO_TABLE_INDEX(pagenr); @@ -509,11 +610,12 @@ vph_p->phys_page[b] = ppp; #endif - cpu->invalidate_translation_caches_paddr(cpu, physaddr, - JUST_MARK_AS_NON_WRITABLE); + cpu->invalidate_translation_caches(cpu, physaddr, + JUST_MARK_AS_NON_WRITABLE | INVALIDATE_PADDR); - cpu->cd.DYNTRANS_ARCH.cur_physpage = ppp; +/* cpu->cd.DYNTRANS_ARCH.cur_physpage = ppp; */ cpu->cd.DYNTRANS_ARCH.cur_ic_page = &ppp->ics[0]; + cpu->cd.DYNTRANS_ARCH.next_ic = cpu->cd.DYNTRANS_ARCH.cur_ic_page + DYNTRANS_PC_TO_IC_ENTRY(cached_pc); @@ -549,7 +651,7 @@ #ifdef MODE32 int index; cached_pc = cpu->pc; - index = cached_pc >> 12; + index = DYNTRANS_ADDR_TO_PAGENR(cached_pc); ppp = cpu->cd.DYNTRANS_ARCH.phys_page[index]; if (ppp != NULL) goto have_it; @@ -587,7 +689,7 @@ /* Quick return path: */ have_it: - cpu->cd.DYNTRANS_ARCH.cur_physpage = ppp; +/* cpu->cd.DYNTRANS_ARCH.cur_physpage = ppp; */ cpu->cd.DYNTRANS_ARCH.cur_ic_page = &ppp->ics[0]; cpu->cd.DYNTRANS_ARCH.next_ic = cpu->cd.DYNTRANS_ARCH.cur_ic_page + DYNTRANS_PC_TO_IC_ENTRY(cached_pc); @@ -610,7 +712,7 @@ * is just downgraded to non-writable (ie the host store page is set to * NULL). Otherwise, the entire translation is removed. */ -void DYNTRANS_INVALIDATE_TLB_ENTRY(struct cpu *cpu, +static void DYNTRANS_INVALIDATE_TLB_ENTRY(struct cpu *cpu, #ifdef MODE32 uint32_t #else @@ -619,7 +721,11 @@ vaddr_page, int flags) { #ifdef MODE32 - uint32_t index = vaddr_page >> 12; + uint32_t index = DYNTRANS_ADDR_TO_PAGENR(vaddr_page); + +#ifdef DYNTRANS_ARM + cpu->cd.DYNTRANS_ARCH.is_userpage[index >> 3] &= ~(1 << (index & 7)); +#endif if (flags & JUST_MARK_AS_NON_WRITABLE) { /* printf("JUST MARKING NON-W: vaddr 0x%08x\n", @@ -630,6 +736,7 @@ cpu->cd.DYNTRANS_ARCH.host_store[index] = NULL; cpu->cd.DYNTRANS_ARCH.phys_addr[index] = 0; cpu->cd.DYNTRANS_ARCH.phys_page[index] = NULL; + cpu->cd.DYNTRANS_ARCH.vaddr_to_tlbindex[index] = 0; } #else /* 2-level: */ @@ -686,9 +793,9 @@ #endif -#ifdef DYNTRANS_INVALIDATE_TC_PADDR +#ifdef DYNTRANS_INVALIDATE_TC /* - * XXX_invalidate_translation_caches_paddr(): + * XXX_invalidate_translation_caches(): * * Invalidate all entries matching a specific physical address, a specific * virtual address, or ALL entries. @@ -699,10 +806,11 @@ * In the case when all translations are invalidated, paddr doesn't need * to be supplied. * - * NOTE/TODO: Poorly choosen name for this function, as it can - * invalidate based on virtual address as well. + * NOTE/TODO: When invalidating a virtual address, it is only cleared from + * the quick translation array, not from the linear + * vph_tlb_entry[] array. Hopefully this is enough anyway. */ -void DYNTRANS_INVALIDATE_TC_PADDR(struct cpu *cpu, uint64_t paddr, int flags) +void DYNTRANS_INVALIDATE_TC(struct cpu *cpu, uint64_t paddr, int flags) { int r; #ifdef MODE32 @@ -712,12 +820,16 @@ #endif addr_page = paddr & ~(DYNTRANS_PAGESIZE - 1); + /* Quick case for virtual addresses: see note above. */ + if (flags & INVALIDATE_VADDR) { + DYNTRANS_INVALIDATE_TLB_ENTRY(cpu, addr_page, flags); + return; + } + for (r=0; rcd.DYNTRANS_ARCH.vph_tlb_entry[r].valid && ( (cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[r].paddr_page == addr_page && flags & INVALIDATE_PADDR) || - (cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[r].vaddr_page == - addr_page && flags & INVALIDATE_VADDR) || flags & INVALIDATE_ALL) ) { DYNTRANS_INVALIDATE_TLB_ENTRY(cpu, cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[r].vaddr_page, @@ -731,7 +843,7 @@ } } } -#endif /* DYNTRANS_INVALIDATE_TC_PADDR */ +#endif /* DYNTRANS_INVALIDATE_TC */ @@ -745,7 +857,7 @@ void DYNTRANS_INVALIDATE_TC_CODE(struct cpu *cpu, uint64_t addr, int flags) { int r; -#ifdef MODE_32 +#ifdef MODE32 uint32_t #else uint64_t @@ -760,18 +872,31 @@ if (flags & INVALIDATE_PADDR) { int pagenr, table_index; uint32_t physpage_ofs, *physpage_entryp; - struct DYNTRANS_TC_PHYSPAGE *ppp; + struct DYNTRANS_TC_PHYSPAGE *ppp, *prev_ppp; pagenr = DYNTRANS_ADDR_TO_PAGENR(addr); + +#ifdef MODE32 + /* If this page isn't marked as having any translations, + then return immediately. */ + if (!(cpu->cd.DYNTRANS_ARCH.phystranslation[pagenr >> 5] + & 1 << (pagenr & 31))) + return; + /* Remove the mark: */ + cpu->cd.DYNTRANS_ARCH.phystranslation[pagenr >> 5] &= + ~ (1 << (pagenr & 31)); +#endif + table_index = PAGENR_TO_TABLE_INDEX(pagenr); physpage_entryp = &(((uint32_t *)cpu-> translation_cache)[table_index]); physpage_ofs = *physpage_entryp; - ppp = NULL; + prev_ppp = ppp = NULL; /* Traverse the physical page chain: */ while (physpage_ofs != 0) { + prev_ppp = ppp; ppp = (struct DYNTRANS_TC_PHYSPAGE *) (cpu->translation_cache + physpage_ofs); /* If we found the page in the cache, @@ -782,8 +907,31 @@ physpage_ofs = ppp->next_ofs; } - /* If the page was found, then we should invalidate all - code translations: */ + if (physpage_ofs == 0) + ppp = NULL; + +#if 1 + /* + * "Bypass" the page, removing it from the code cache. + * + * NOTE/TODO: This gives _TERRIBLE_ performance with self- + * modifying code, or when a single page is used for both + * code and (writable) data. + */ + if (ppp != NULL) { + if (prev_ppp != NULL) + prev_ppp->next_ofs = ppp->next_ofs; + else + *physpage_entryp = ppp->next_ofs; + } +#else + /* + * Instead of removing the page from the code cache, each + * entry can be set to "to_be_translated". This is slow in + * the general case, but in the case of self-modifying code, + * it might be faster since we don't risk wasting cache + * memory as quickly (which would force unnecessary Restarts). + */ if (ppp != NULL) { /* TODO: Is this faster than copying an entire template page? */ @@ -795,10 +943,12 @@ #endif instr(to_be_translated); } +#endif } - /* Invalidate entries in the VPH table: */ - for (r=0; rcd.DYNTRANS_ARCH.vph_tlb_entry[r].valid) { vaddr_page = cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[r] .vaddr_page & ~(DYNTRANS_PAGESIZE-1); @@ -809,8 +959,13 @@ (flags & INVALIDATE_PADDR && paddr_page == addr) || (flags & INVALIDATE_VADDR && vaddr_page == addr)) { #ifdef MODE32 - uint32_t index = vaddr_page >> 12; + uint32_t index = + DYNTRANS_ADDR_TO_PAGENR(vaddr_page); cpu->cd.DYNTRANS_ARCH.phys_page[index] = NULL; + /* Remove the mark: */ + index = DYNTRANS_ADDR_TO_PAGENR(paddr_page); + cpu->cd.DYNTRANS_ARCH.phystranslation[ + index >> 5] &= ~ (1 << (index & 31)); #else /* 2-level: */ #ifdef DYNTRANS_ALPHA @@ -857,7 +1012,7 @@ unsigned char *host_page, int writeflag, uint64_t paddr_page) { int64_t lowest, highest = -1; - int found, r, lowest_index; + int found, r, lowest_index, start, end, useraccess = 0; #ifdef DYNTRANS_ALPHA uint32_t a, b; @@ -884,10 +1039,38 @@ #endif #endif + if (writeflag & MEMORY_USER_ACCESS) { + writeflag &= ~MEMORY_USER_ACCESS; + useraccess = 1; + } + + start = 0; end = DYNTRANS_MAX_VPH_TLB_ENTRIES / 2; +#if 1 + /* Half of the TLB used for data, half for code: */ + if (writeflag & TLB_CODE) { + writeflag &= ~TLB_CODE; + start = end; end = DYNTRANS_MAX_VPH_TLB_ENTRIES; + } +#else + /* Data and code entries are mixed. */ + end = DYNTRANS_MAX_VPH_TLB_ENTRIES; +#endif + /* Scan the current TLB entries: */ - found = -1; lowest_index = 0; + found = -1; lowest_index = start; lowest = cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[0].timestamp; - for (r=0; rcd.DYNTRANS_ARCH.vaddr_to_tlbindex[ + DYNTRANS_ADDR_TO_PAGENR(vaddr_page)] - 1; + if (found < 0) + lowest_index = (random() % (end-start)) + start; + if (0) +#endif + + for (r=start; rcd.DYNTRANS_ARCH.vph_tlb_entry[r].timestamp < lowest) { lowest = cpu->cd.DYNTRANS_ARCH. vph_tlb_entry[r].timestamp; @@ -959,12 +1142,18 @@ vph_p->phys_page[b] = NULL; #else #ifdef MODE32 - index = vaddr_page >> 12; + index = DYNTRANS_ADDR_TO_PAGENR(vaddr_page); cpu->cd.DYNTRANS_ARCH.host_load[index] = host_page; cpu->cd.DYNTRANS_ARCH.host_store[index] = writeflag? host_page : NULL; cpu->cd.DYNTRANS_ARCH.phys_addr[index] = paddr_page; cpu->cd.DYNTRANS_ARCH.phys_page[index] = NULL; + cpu->cd.DYNTRANS_ARCH.vaddr_to_tlbindex[index] = r + 1; +#ifdef DYNTRANS_ARM + if (useraccess) + cpu->cd.DYNTRANS_ARCH.is_userpage[index >> 3] + |= 1 << (index & 7); +#endif #endif /* 32 */ #endif /* !ALPHA */ } else { @@ -974,8 +1163,8 @@ * Writeflag = 1: Make sure the page is writable. * Writeflag = -1: Downgrade to readonly. */ - cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[found].timestamp = - highest + 1; + r = found; + cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[r].timestamp = highest + 1; if (writeflag == 1) cpu->cd.DYNTRANS_ARCH.vph_tlb_entry[r].writeflag = 1; if (writeflag == -1) @@ -1002,8 +1191,14 @@ } #else #ifdef MODE32 - index = vaddr_page >> 12; + index = DYNTRANS_ADDR_TO_PAGENR(vaddr_page); cpu->cd.DYNTRANS_ARCH.phys_page[index] = NULL; +#ifdef DYNTRANS_ARM + cpu->cd.DYNTRANS_ARCH.is_userpage[index >> 3]&=~(1<<(index&7)); + if (useraccess) + cpu->cd.DYNTRANS_ARCH.is_userpage[index >> 3] + |= 1 << (index & 7); +#endif if (cpu->cd.DYNTRANS_ARCH.phys_addr[index] == paddr_page) { if (writeflag == 1) cpu->cd.DYNTRANS_ARCH.host_store[index] = @@ -1067,8 +1262,20 @@ #ifdef DYNTRANS_TO_BE_TRANSLATED_TAIL /* * If we end up here, then an instruction was translated. + * Mark the page as containing a translation. + * + * (Special case for 32-bit mode: set the corresponding bit in the + * phystranslation[] array.) */ - translated; +#ifdef MODE32 + if (!(cpu->cd.DYNTRANS_ARCH.cur_physpage->flags & TRANSLATIONS)) { + uint32_t index = DYNTRANS_ADDR_TO_PAGENR(addr); + cpu->cd.DYNTRANS_ARCH.phystranslation[index >> 5] |= + (1 << (index & 31)); + } +#endif + cpu->cd.DYNTRANS_ARCH.cur_physpage->flags |= TRANSLATIONS; + /* * Now it is time to check for combinations of instructions that can @@ -1077,8 +1284,13 @@ * Note: Single-stepping or instruction tracing doesn't work with * instruction combination. */ - if (!single_step && !cpu->machine->instruction_trace) - COMBINE_INSTRUCTIONS(cpu, ic, addr); + if (!single_step && !cpu->machine->instruction_trace) { + if (cpu->combination_check != NULL && + cpu->machine->speed_tricks) + cpu->combination_check(cpu, ic, + addr & (DYNTRANS_PAGESIZE - 1)); + cpu->combination_check = NULL; + } /* ... and finally execute the translated instruction: */ if (single_step_breakpoint) {