--- trunk/src/cpus/cpu_dyntrans.c 2007/10/08 16:22:20 41 +++ trunk/src/cpus/cpu_dyntrans.c 2007/10/08 16:22:32 42 @@ -25,7 +25,7 @@ * SUCH DAMAGE. * * - * $Id: cpu_dyntrans.c,v 1.147 2007/04/19 15:18:16 debug Exp $ + * $Id: cpu_dyntrans.c,v 1.165 2007/06/15 21:43:53 debug Exp $ * * Common dyntrans routines. Included from cpu_*.c. */ @@ -51,6 +51,10 @@ return; } + /* low_pc must be within the page! */ + if (low_pc < 0 || low_pc > DYNTRANS_IC_ENTRIES_PER_PAGE) + return; + buf[0] = '\0'; while ((ch = cpu->machine->statistics_fields[i]) != '\0') { @@ -64,10 +68,6 @@ break; case 'p': /* Physical program counter address: */ - /* (low_pc must be within the page!) */ - if (low_pc < 0 || - low_pc >= DYNTRANS_IC_ENTRIES_PER_PAGE) - strlcat(buf, "-", sizeof(buf)); cpu->cd.DYNTRANS_ARCH.cur_physpage = (void *) cpu->cd.DYNTRANS_ARCH.cur_ic_page; a = cpu->cd.DYNTRANS_ARCH.cur_physpage->physaddr; @@ -76,24 +76,20 @@ a += low_pc << DYNTRANS_INSTR_ALIGNMENT_SHIFT; if (cpu->is_32bit) snprintf(buf + strlen(buf), sizeof(buf), - "0x%016"PRIx32, (uint32_t)a); + "0x%08"PRIx32, (uint32_t)a); else snprintf(buf + strlen(buf), sizeof(buf), "0x%016"PRIx64, (uint64_t)a); break; case 'v': /* Virtual program counter address: */ - /* (low_pc inside the page, or in a delay slot) */ - if (low_pc < 0 || - low_pc >= DYNTRANS_IC_ENTRIES_PER_PAGE + 2) - strlcat(buf, "-", sizeof(buf)); a = cpu->pc; a &= ~((DYNTRANS_IC_ENTRIES_PER_PAGE-1) << DYNTRANS_INSTR_ALIGNMENT_SHIFT); a += low_pc << DYNTRANS_INSTR_ALIGNMENT_SHIFT; if (cpu->is_32bit) snprintf(buf + strlen(buf), sizeof(buf), - "0x%016"PRIx32, (uint32_t)a); + "0x%08"PRIx32, (uint32_t)a); else snprintf(buf + strlen(buf), sizeof(buf), "0x%016"PRIx64, (uint64_t)a); @@ -109,17 +105,15 @@ #define S gather_statistics(cpu) -#ifdef DYNTRANS_VARIABLE_INSTRUCTION_LENGTH -#define I ic = cpu->cd.DYNTRANS_ARCH.next_ic; \ - cpu->cd.DYNTRANS_ARCH.next_ic += ic->arg[0]; \ - ic->f(cpu, ic); -#else +#if 1 /* The normal instruction execution core: */ #define I ic = cpu->cd.DYNTRANS_ARCH.next_ic ++; ic->f(cpu, ic); +#else + /* For heavy debugging: */ -/* #define I ic = cpu->cd.DYNTRANS_ARCH.next_ic ++; \ +#define I ic = cpu->cd.DYNTRANS_ARCH.next_ic ++; \ { \ int low_pc = ((size_t)cpu->cd.DYNTRANS_ARCH.next_ic - \ (size_t)cpu->cd.DYNTRANS_ARCH.cur_ic_page) / \ @@ -128,7 +122,9 @@ cpu->cd.DYNTRANS_ARCH.cur_ic_page, \ ic, low_pc << DYNTRANS_INSTR_ALIGNMENT_SHIFT); \ } \ - ic->f(cpu, ic); */ + ic->f(cpu, ic); + +#endif /* static long long nr_of_I_calls = 0; */ @@ -161,7 +157,6 @@ } \ ic = cpu->cd.DYNTRANS_ARCH.next_ic ++; ic->f(cpu, ic); } */ -#endif #endif /* STATIC STUFF */ @@ -306,27 +301,6 @@ I; n_instrs = 1; - } else if (cpu->machine->cycle_accurate) { - /* Executing multiple instructions, and call devices' - tick functions: */ - n_instrs = 0; - for (;;) { - struct DYNTRANS_IC *ic; -/* TODO: continue here */ -int64_t cycles = cpu->cd.avr.extra_cycles; - I; - n_instrs += 1; -cycles = cpu->cd.avr.extra_cycles - cycles + 1; -/* The instruction took 'cycles' cycles. */ -/* printf("A\n"); */ -while (cycles-- > 0) - cpu->machine->tick_func[1](cpu, cpu->machine->tick_extra[1]); -/* printf("B\n"); */ - - if (n_instrs + cpu->n_translated_instrs >= - N_SAFE_DYNTRANS_LIMIT) - break; - } } else if (cpu->machine->statistics_enabled) { /* Gather statistics while executing multiple instructions: */ n_instrs = 0; @@ -345,8 +319,14 @@ break; } } else { - /* Execute multiple instructions: */ + /* + * Execute multiple instructions: + * + * (This is the core dyntrans loop.) + */ n_instrs = 0; + cpu->sampling = 1; + for (;;) { struct DYNTRANS_IC *ic; @@ -370,6 +350,8 @@ if (cpu->n_translated_instrs >= N_SAFE_DYNTRANS_LIMIT) break; } + + cpu->sampling = 0; } n_instrs += cpu->n_translated_instrs; @@ -439,6 +421,18 @@ } #endif + cpu->ninstrs += n_instrs; + + /* + * Check if there are enough samples to decide whether or not to + * perform native code generation: + */ + if (cpu->sampling_curindex == N_PADDR_SAMPLES) { + /* TODO: Check against known blocks, etc. */ + + cpu->sampling_curindex = 0; + } + /* Return the nr of instructions executed: */ return n_instrs; } @@ -456,6 +450,7 @@ */ void DYNTRANS_FUNCTION_TRACE(struct cpu *cpu, uint64_t f, int n_args) { + int show_symbolic_function_name = 1; char strbuf[50]; char *symbol; uint64_t ot; @@ -476,6 +471,12 @@ n_args_to_print = n_args; } +#ifdef DYNTRANS_M88K + /* Special hack for M88K userspace: */ + if (!(cpu->cd.m88k.cr[M88K_CR_PSR] & M88K_PSR_MODE)) + show_symbolic_function_name = 0; +#endif + /* * TODO: The type of each argument should be taken from the symbol * table, in some way. @@ -525,7 +526,8 @@ else if (memory_points_to_string(cpu, cpu->mem, d, 1)) fatal("\"%s\"", memory_conv_to_string(cpu, cpu->mem, d, strbuf, sizeof(strbuf))); - else if (symbol != NULL && ot == 0) + else if (symbol != NULL && ot == 0 && + show_symbolic_function_name) fatal("&%s", symbol); else { if (cpu->is_32bit) @@ -545,6 +547,57 @@ +#ifdef DYNTRANS_TIMER_SAMPLE_TICK +/* + * XXX_timer_sample_tick(): + * + * Gathers statistics about which translation blocks are being executed. + * This can then be used to calculate if it is worth the effort to perform + * native code generation (which is assumed to have a large overhead, but + * will result in faster code). + */ +void DYNTRANS_TIMER_SAMPLE_TICK(struct timer *timer, void *extra) +{ + struct cpu *cpu = extra; + struct DYNTRANS_IC *next_ic; + size_t low_pc; + uint64_t paddr; + + /* + * Don't sample if: + * + * 1) Sampling is not enabled. It should only be enabled during + * the core dyntrans loop. + * 2) Enough samples have already been gathered. + */ + + if (!cpu->sampling || cpu->sampling_curindex == N_PADDR_SAMPLES) + return; + + /* Get the physical address of the program counter: */ + + next_ic = cpu->cd.DYNTRANS_ARCH.next_ic; + low_pc = ((size_t)next_ic - (size_t)cpu->cd.DYNTRANS_ARCH.cur_ic_page) + / sizeof(struct DYNTRANS_IC); + + /* Not possible to represent as a physical address? Then abort. */ + if (low_pc > DYNTRANS_IC_ENTRIES_PER_PAGE) + return; + + cpu->cd.DYNTRANS_ARCH.cur_physpage = (void *) + cpu->cd.DYNTRANS_ARCH.cur_ic_page; + paddr = cpu->cd.DYNTRANS_ARCH.cur_physpage->physaddr; + paddr &= ~((DYNTRANS_IC_ENTRIES_PER_PAGE-1) << + DYNTRANS_INSTR_ALIGNMENT_SHIFT); + paddr += low_pc << DYNTRANS_INSTR_ALIGNMENT_SHIFT; + + /* ... and finally add the sample to the sampling array: */ + cpu->sampling_paddr[cpu->sampling_curindex ++] = paddr; +} +#endif /* DYNTRANS_TIMER_SAMPLE_TICK */ + + + #ifdef DYNTRANS_TC_ALLOCATE_DEFAULT_PAGE /* * XXX_tc_allocate_default_page(): @@ -896,13 +949,9 @@ int x1, x2; #endif int i; - struct DYNTRANS_TC_PHYSPAGE *ppp = malloc(sizeof( - struct DYNTRANS_TC_PHYSPAGE)); + struct DYNTRANS_TC_PHYSPAGE *ppp; - if (ppp == NULL) { - fprintf(stderr, "out of memory\n"); - exit(1); - } + CHECK_ALLOCATION(ppp = malloc(sizeof(struct DYNTRANS_TC_PHYSPAGE))); ppp->next_ofs = 0; ppp->translations = 0; @@ -914,9 +963,6 @@ cpu->is_32bit? instr32(to_be_translated) : #endif instr(to_be_translated); -#ifdef DYNTRANS_VARIABLE_INSTRUCTION_LENGTH - ppp->ics[i].arg[0] = 0; -#endif } /* End-of-page: */ @@ -926,10 +972,6 @@ #endif instr(end_of_page); -#ifdef DYNTRANS_VARIABLE_INSTRUCTION_LENGTH - ppp->ics[DYNTRANS_IC_ENTRIES_PER_PAGE + 0].arg[0] = 0; -#endif - /* End-of-page-2, for delay-slot architectures: */ #ifdef DYNTRANS_DELAYSLOT ppp->ics[DYNTRANS_IC_ENTRIES_PER_PAGE + 1].f = @@ -1381,7 +1423,14 @@ #ifdef MODE32 uint32_t index; vaddr_page &= 0xffffffffULL; - paddr_page &= 0xffffffffULL; + + if (paddr_page > 0xffffffffULL) { + fatal("update_translation_table(): v=0x%016"PRIx64", h=%p w=%i" + " p=0x%016"PRIx64"\n", vaddr_page, host_page, writeflag, + paddr_page); + exit(1); + } + /* fatal("update_translation_table(): v=0x%x, h=%p w=%i" " p=0x%x\n", (int)vaddr_page, host_page, writeflag, (int)paddr_page); */ @@ -1406,6 +1455,12 @@ useraccess = 1; } +#ifdef DYNTRANS_M88K + /* TODO */ + if (useraccess) + return; +#endif + /* Scan the current TLB entries: */ #ifdef MODE32 @@ -1479,8 +1534,9 @@ cpu->cd.DYNTRANS_ARCH.next_free_l2 = l2->next; } else { int i; - l2 = cpu->cd.DYNTRANS_ARCH.l1_64[x1] = - malloc(sizeof(struct DYNTRANS_L2_64_TABLE)); + CHECK_ALLOCATION(l2 = + cpu->cd.DYNTRANS_ARCH.l1_64[x1] = malloc( + sizeof(struct DYNTRANS_L2_64_TABLE))); l2->refcount = 0; for (i=0; i<(1 << DYNTRANS_L2N); i++) l2->l3[i] = cpu->cd.DYNTRANS_ARCH. @@ -1640,21 +1696,27 @@ /* * Check for breakpoints. */ - if (!single_step_breakpoint) { + if (!single_step_breakpoint && !cpu->translation_readahead) { MODE_uint_t curpc = cpu->pc; int i; - for (i=0; imachine->n_breakpoints; i++) + for (i=0; imachine->breakpoints.n; i++) if (curpc == (MODE_uint_t) - cpu->machine->breakpoint_addr[i]) { + cpu->machine->breakpoints.addr[i]) { if (!cpu->machine->instruction_trace) { int old_quiet_mode = quiet_mode; quiet_mode = 0; DISASSEMBLE(cpu, ib, 1, 0); quiet_mode = old_quiet_mode; } +#ifdef MODE32 + fatal("BREAKPOINT: pc = 0x%"PRIx32"\n(The " + "instruction has not yet executed.)\n", + (uint32_t)cpu->pc); +#else fatal("BREAKPOINT: pc = 0x%"PRIx64"\n(The " "instruction has not yet executed.)\n", (uint64_t)cpu->pc); +#endif #ifdef DYNTRANS_DELAYSLOT if (cpu->delay_slot != NOT_DELAYED) fatal("ERROR! Breakpoint in a delay" @@ -1695,7 +1757,7 @@ * be converted into a single function call. * * Note: Single-stepping or instruction tracing doesn't work with - * instruction combination. For architectures with delay slots, + * instruction combinations. For architectures with delay slots, * we also ignore combinations if the delay slot is across a page * boundary. */ @@ -1726,44 +1788,79 @@ } /* ... and finally execute the translated instruction: */ + + /* (Except when doing read-ahead!) */ + if (cpu->translation_readahead) + return; + + /* + * Special case when single-stepping: Execute the translated + * instruction, but then replace it with a "to be translated" + * directly afterwards. + */ if ((single_step_breakpoint && cpu->delay_slot == NOT_DELAYED) #ifdef DYNTRANS_DELAYSLOT || in_crosspage_delayslot #endif ) { - /* - * Special case when single-stepping: Execute the translated - * instruction, but then replace it with a "to be translated" - * directly afterwards. - */ single_step_breakpoint = 0; -#ifdef DYNTRANS_VARIABLE_INSTRUCTION_LENGTH - cpu->cd.DYNTRANS_ARCH.next_ic = ic + ic->arg[0]; -#endif ic->f(cpu, ic); ic->f = #ifdef DYNTRANS_DUALMODE_32 cpu->is_32bit? instr32(to_be_translated) : #endif instr(to_be_translated); -#ifdef DYNTRANS_VARIABLE_INSTRUCTION_LENGTH - ic->arg[0] = 0; + + return; + } + + /* Translation read-ahead: */ + if (!single_step && !cpu->machine->instruction_trace) { + /* Do readahead: */ + int i = 1; + uint64_t pagenr = DYNTRANS_ADDR_TO_PAGENR(cpu->pc); + uint64_t baseaddr = cpu->pc; + + cpu->translation_readahead = MAX_DYNTRANS_READAHEAD; + + while (DYNTRANS_ADDR_TO_PAGENR(baseaddr + + (i << DYNTRANS_INSTR_ALIGNMENT_SHIFT)) == pagenr && + cpu->translation_readahead > 0) { + void (*old_f)(struct cpu *, + struct DYNTRANS_IC *) = ic[i].f; + + /* Already translated? Then abort: */ + if (old_f != ( +#ifdef DYNTRANS_DUALMODE_32 + cpu->is_32bit? instr32(to_be_translated) : #endif - } else { -#ifdef DYNTRANS_VARIABLE_INSTRUCTION_LENGTH - cpu->cd.DYNTRANS_ARCH.next_ic = ic + ic->arg[0]; + instr(to_be_translated))) + break; - /* Additional check, for variable length ISAs: */ - if (ic->arg[0] == 0) { - fatal("INTERNAL ERROR: instr len = 0!\n"); - goto bad; + /* Translate the instruction: */ + ic[i].f(cpu, ic+i); + + /* Translation failed? Then abort. */ + if (ic[i].f == old_f) + break; + + cpu->translation_readahead --; + ++i; } -#endif - /* Finally finally :-), execute the instruction: */ - ic->f(cpu, ic); + cpu->translation_readahead = 0; } + + /* + * Finally finally :-), execute the instruction. + * + * Note: The instruction might have changed during read-ahead, if + * instruction combinations are used. + */ + + ic->f(cpu, ic); + return; @@ -1771,6 +1868,16 @@ * Nothing was translated. (Unimplemented or illegal instruction.) */ + /* Clear the translation, in case it was "half-way" done: */ + ic->f = +#ifdef DYNTRANS_DUALMODE_32 + cpu->is_32bit? instr32(to_be_translated) : +#endif + instr(to_be_translated); + + if (cpu->translation_readahead) + return; + quiet_mode = 0; fatal("to_be_translated(): TODO: unimplemented instruction"); @@ -1795,6 +1902,13 @@ ic = cpu->cd.DYNTRANS_ARCH.next_ic = ¬hing_call; cpu->cd.DYNTRANS_ARCH.next_ic ++; +#ifdef DYNTRANS_DELAYSLOT + /* Special hack: If the bad instruction was in a delay slot, + make sure that execution does not continue anyway: */ + if (cpu->delay_slot) + cpu->delay_slot |= EXCEPTION_IN_DELAY_SLOT; +#endif + /* Execute the "nothing" instruction: */ ic->f(cpu, ic);