--- trunk/src/file.c 2007/10/08 16:18:11 6 +++ trunk/src/file.c 2007/10/08 16:18:51 14 @@ -25,16 +25,17 @@ * SUCH DAMAGE. * * - * $Id: file.c,v 1.96 2005/06/02 12:31:39 debug Exp $ + * $Id: file.c,v 1.116 2005/09/30 14:07:45 debug Exp $ * * This file contains functions which load executable images into (emulated) - * memory. File formats recognized so far: + * memory. File formats recognized so far are: * - * ELF 32-bit and 64-bit ELFs * a.out old format used by OpenBSD 2.x pmax kernels + * Mach-O MacOS X format, etc. * ecoff old format used by Ultrix, Windows NT, etc * srec Motorola SREC format * raw raw binaries, "address:[skiplen:[entrypoint:]]filename" + * ELF 32-bit and 64-bit ELFs * * If a file is not of one of the above mentioned formats, it is assumed * to be symbol data generated by 'nm' or 'nm -S'. @@ -125,6 +126,7 @@ #define AOUT_FLAG_DECOSF1 1 #define AOUT_FLAG_FROM_BEGINNING 2 +#define AOUT_FLAG_VADDR_ZERO_HACK 4 /* * file_load_aout(): * @@ -145,9 +147,12 @@ uint32_t entry, datasize, textsize; int32_t symbsize = 0; uint32_t vaddr, total_len; - unsigned char buf[4096]; + unsigned char buf[65536]; unsigned char *syms; + if (m->cpus[0]->byte_order == EMUL_BIG_ENDIAN) + encoding = ELFDATA2MSB; + f = fopen(filename, "r"); if (f == NULL) { perror(filename); @@ -165,7 +170,7 @@ symbsize = 0; fseek(f, 0, SEEK_END); /* This is of course wrong, but should work anyway: */ - textsize = ftell(f) - 512; + textsize = ftello(f) - 512; datasize = 0; fseek(f, 512, SEEK_SET); } else { @@ -177,8 +182,11 @@ } unencode(entry, &aout_header.a_entry, uint32_t); - vaddr = entry; debug("a.out, entry point 0x%08lx\n", (long)entry); + vaddr = entry; + + if (flags & AOUT_FLAG_VADDR_ZERO_HACK) + vaddr = 0; unencode(textsize, &aout_header.a_text, uint32_t); unencode(datasize, &aout_header.a_data, uint32_t); @@ -199,12 +207,22 @@ len = fread(buf, 1, len, f); /* printf("fread len=%i vaddr=%x buf[0..]=%02x %02x %02x\n", - len, (int)vaddr, buf[0], buf[1], buf[2]); */ + (int)len, (int)vaddr, buf[0], buf[1], buf[2]); */ - if (len > 0) - m->cpus[0]->memory_rw(m->cpus[0], mem, vaddr, - &buf[0], len, MEM_WRITE, NO_EXCEPTIONS); - else { + if (len > 0) { + int len2 = 0; + uint64_t vaddr1 = vaddr & + ((1 << BITS_PER_MEMBLOCK) - 1); + uint64_t vaddr2 = (vaddr + + len) & ((1 << BITS_PER_MEMBLOCK) - 1); + if (vaddr2 < vaddr1) { + len2 = len - vaddr2; + m->cpus[0]->memory_rw(m->cpus[0], mem, vaddr, + &buf[0], len2, MEM_WRITE, NO_EXCEPTIONS); + } + m->cpus[0]->memory_rw(m->cpus[0], mem, vaddr + len2, + &buf[len2], len-len2, MEM_WRITE, NO_EXCEPTIONS); + } else { if (flags & AOUT_FLAG_DECOSF1) break; else { @@ -226,7 +244,7 @@ char *string_symbols; off_t oldpos; - debug("symbols: %i bytes @ 0x%x\n", symbsize, (int)ftell(f)); + debug("symbols: %i bytes @ 0x%x\n", symbsize, (int)ftello(f)); syms = malloc(symbsize); if (syms == NULL) { fprintf(stderr, "out of memory\n"); @@ -239,11 +257,11 @@ exit(1); } - oldpos = ftell(f); + oldpos = ftello(f); fseek(f, 0, SEEK_END); - strings_len = ftell(f) - oldpos; + strings_len = ftello(f) - oldpos; fseek(f, oldpos, SEEK_SET); - debug("strings: %i bytes @ 0x%x\n", strings_len, (int)ftell(f)); + debug("strings: %i bytes @ 0x%x\n", strings_len,(int)ftello(f)); string_symbols = malloc(strings_len); if (string_symbols == NULL) { fprintf(stderr, "out of memory\n"); @@ -265,7 +283,7 @@ if (type != 0 && addr != 0) add_symbol_name(&m->symbol_context, - addr, 0, string_symbols + str_index, 0); + addr, 0, string_symbols + str_index, 0, -1); i++; } @@ -287,6 +305,283 @@ /* + * file_load_macho(): + * + * Loads a Mach-O binary image into the emulated memory. The entry point + * is stored in the specified CPU's registers. + * + * TODO: + * + * o) Almost everything. + * + * o) I haven't had time to look into whether Apple's open source + * license is BSD-compatible or not. Perhaps it would be possible + * to use a header file containing symbolic names, and not use + * hardcoded values. + */ +static void file_load_macho(struct machine *m, struct memory *mem, + char *filename, uint64_t *entrypointp, int arch, int *byte_orderp, + int is_64bit, int is_reversed) +{ + FILE *f; + uint64_t entry = 0; + int entry_set = 0; + int encoding = ELFDATA2MSB; + unsigned char buf[65536]; + char *symbols, *strings; + uint32_t cputype, cpusubtype, filetype, ncmds, sizeofcmds, flags; + uint64_t vmaddr, vmsize, fileoff, filesize; + int cmd_type, cmd_len, pos, i, flavor; + int32_t symoff, nsyms, stroff, strsize; + size_t len; + + if (m->cpus[0]->byte_order == EMUL_BIG_ENDIAN) + encoding = ELFDATA2MSB; + + f = fopen(filename, "r"); + if (f == NULL) { + perror(filename); + exit(1); + } + + if (is_64bit) { + fatal("TODO: 64-bit Mach-O. Not supported yet.\n"); + exit(1); + } + if (is_reversed) { + fatal("TODO: Reversed-endianness. Not supported yet.\n"); + exit(1); + } + + len = fread(buf, 1, sizeof(buf), f); + if (len < 100) { + fatal("Bad Mach-O file?\n"); + exit(1); + } + + unencode(cputype, &buf[4], uint32_t); + unencode(cpusubtype, &buf[8], uint32_t); + unencode(filetype, &buf[12], uint32_t); + unencode(ncmds, &buf[16], uint32_t); + unencode(sizeofcmds, &buf[20], uint32_t); + unencode(flags, &buf[24], uint32_t); + + /* debug("cputype=0x%x cpusubtype=0x%x filetype=0x%x\n", + cputype, cpusubtype, filetype); + debug("ncmds=%i sizeofcmds=0x%08x flags=0x%08x\n", + ncmds, sizeofcmds, flags); */ + + /* + * Compare to "normal" values. + * NOTE/TODO: These were for a Darwin (Macintosh PPC) kernel. + */ + if (cputype != 0x12) { + fatal("Error: Unimplemented cputype 0x%x\n", cputype); + exit(1); + } + if (cpusubtype != 0) { + fatal("Error: Unimplemented cpusubtype 0x%x\n", cpusubtype); + exit(1); + } + /* Filetype 2 means an executable image. */ + if (filetype != 2) { + fatal("Error: Unimplemented filetype 0x%x\n", filetype); + exit(1); + } + if (!(flags & 1)) { + fatal("Error: File has 'undefined references'. Cannot" + " be executed.\n", flags); + exit(1); + } + + /* I've only encountered flags == 1 so far. */ + if (flags != 1) { + fatal("Error: Unimplemented flags 0x%x\n", flags); + exit(1); + } + + /* + * Read all load commands: + */ + pos = is_64bit? 32 : 28; + cmd_type = 0; + do { + /* Read command type and length: */ + unencode(cmd_type, &buf[pos], uint32_t); + unencode(cmd_len, &buf[pos+4], uint32_t); + +#if 0 + debug("cmd %i, len=%i\n", cmd_type, cmd_len); + for (i=8; i= ' ' && ch < 127) + debug("%c", ch); + else + debug("."); + } +#endif + switch (cmd_type) { + case 1: /* LC_SEGMENT */ + debug("seg "); + for (i=0; i<16; i++) { + if (buf[pos + 8 + i] == 0) + break; + debug("%c", buf[pos + 8 + i]); + } + unencode(vmaddr, &buf[pos+8+16+0], uint32_t); + unencode(vmsize, &buf[pos+8+16+4], uint32_t); + unencode(fileoff, &buf[pos+8+16+8], uint32_t); + unencode(filesize, &buf[pos+8+16+12], uint32_t); + debug(": vmaddr=0x%x size=0x%x fileoff=0x%x", + (int)vmaddr, (int)vmsize, (int)fileoff); + + if (filesize == 0) { + debug("\n"); + break; + } + + fseek(f, fileoff, SEEK_SET); + + /* Load data from the file: */ + while (filesize != 0) { + unsigned char buf[32768]; + ssize_t len = filesize > sizeof(buf) ? + sizeof(buf) : filesize; + len = fread(buf, 1, len, f); + + /* printf("fread len=%i vmaddr=%x buf[0..]=" + "%02x %02x %02x\n", (int)len, (int)vmaddr, + buf[0], buf[1], buf[2]); */ + + if (len > 0) { + int len2 = 0; + uint64_t vaddr1 = vmaddr & + ((1 << BITS_PER_MEMBLOCK) - 1); + uint64_t vaddr2 = (vmaddr + + len) & ((1 << BITS_PER_MEMBLOCK)-1); + if (vaddr2 < vaddr1) { + len2 = len - vaddr2; + m->cpus[0]->memory_rw(m->cpus[ + 0], mem, vmaddr, &buf[0], + len2, MEM_WRITE, + NO_EXCEPTIONS); + } + m->cpus[0]->memory_rw(m->cpus[0], mem, + vmaddr + len2, &buf[len2], len-len2, + MEM_WRITE, NO_EXCEPTIONS); + } else { + fprintf(stderr, "error reading\n"); + exit(1); + } + + vmaddr += len; + filesize -= len; + } + + debug("\n"); + break; + + case 2: /* LC_SYMTAB */ + unencode(symoff, &buf[pos+8], uint32_t); + unencode(nsyms, &buf[pos+12], uint32_t); + unencode(stroff, &buf[pos+16], uint32_t); + unencode(strsize, &buf[pos+20], uint32_t); + debug("symtable: %i symbols @ 0x%x (strings at " + "0x%x)\n", nsyms, symoff, stroff); + + symbols = malloc(12 * nsyms); + if (symbols == NULL) { + fprintf(stderr, "out of memory\n"); + exit(1); + } + fseek(f, symoff, SEEK_SET); + fread(symbols, 1, 12 * nsyms, f); + + strings = malloc(strsize); + if (strings == NULL) { + fprintf(stderr, "out of memory\n"); + exit(1); + } + fseek(f, stroff, SEEK_SET); + fread(strings, 1, strsize, f); + + for (i=0; isymbol_context, + n_value, 0, strings + n_strx, 0, -1); + } + + free(symbols); + free(strings); + break; + + case 5: debug("unix thread context: "); + /* See http://cvs.sf.net/viewcvs.py/hte/ + HT%20Editor/machostruc.h or similar for details + on the thread struct. */ + unencode(flavor, &buf[pos+8], uint32_t); + if (flavor != 1) { + fatal("unimplemented flavor %i\n", flavor); + exit(1); + } + + if (arch != ARCH_PPC) { + fatal("non-PPC arch? TODO\n"); + exit(1); + } + + unencode(entry, &buf[pos+16], uint32_t); + entry_set = 1; + debug("pc=0x%x\n", (int)entry); + + for (i=1; i<40; i++) { + uint32_t x; + unencode(x, &buf[pos+16+i*4], uint32_t); + if (x != 0) { + fatal("Entry nr %i in the Mach-O" + " thread struct is non-zero" + " (0x%x). This is not supported" + " yet. TODO\n", i, x); + exit(1); + } + } + break; + + default:fatal("WARNING! Unimplemented load command %i!\n", + cmd_type); + } + + pos += cmd_len; + } while (pos < sizeofcmds && cmd_type != 0); + + fclose(f); + + if (!entry_set) { + fatal("No entry point? Aborting.\n"); + exit(1); + } + + *entrypointp = entry; + + if (encoding == ELFDATA2LSB) + *byte_orderp = EMUL_LITTLE_ENDIAN; + else + *byte_orderp = EMUL_BIG_ENDIAN; + + n_executables_loaded ++; +} + + +/* * file_load_ecoff(): * * Loads an ecoff binary image into the emulated memory. The entry point @@ -474,7 +769,7 @@ if (s_scnptr != 0 && s_size != 0 && s_vaddr != 0 && !(s_flags & 0x02)) { /* Remember the current file offset: */ - oldpos = ftell(f); + oldpos = ftello(f); /* Load the section into emulated memory: */ fseek(f, s_scnptr, SEEK_SET); @@ -575,7 +870,7 @@ memcpy(name, sym->name, 8); name[8] = '\0'; add_symbol_name(&m->symbol_context, - v, 0, name, 0); + v, 0, name, 0, -1); n_real_symbols ++; } else if (t == 0x20 && !sym->name[0]) { off_t ofs; @@ -587,7 +882,7 @@ /* debug(" [altname=0x%x '%s']", altname, name); */ add_symbol_name(&m->symbol_context, - v, 0, name, 0); + v, 0, name, 0, -1); n_real_symbols ++; } @@ -658,7 +953,7 @@ add_symbol_name(&m->symbol_context, extsyms[sym_nr].es_value, 0, - symbol_data + extsyms[sym_nr].es_strindex, 0); + symbol_data + extsyms[sym_nr].es_strindex, 0, -1); } free(extsyms); @@ -900,7 +1195,7 @@ } debug("RAW: 0x%llx bytes @ 0x%08llx", - (long long) (ftell(f) - skip), (long long)loadaddr); + (long long) (ftello(f) - skip), (long long)loadaddr); if (skip != 0) debug(" (0x%llx bytes of header skipped)", (long long)skip); debug("\n"); @@ -1060,12 +1355,42 @@ ok = 1; } break; + case ARCH_ARM: + switch (emachine) { + case EM_ARM: + ok = 1; + } + break; + case ARCH_AVR: + switch (emachine) { + case EM_AVR: + ok = 1; + } + break; case ARCH_HPPA: switch (emachine) { case EM_PARISC: ok = 1; } break; + case ARCH_I960: + switch (emachine) { + case EM_960: + ok = 1; + } + break; + case ARCH_IA64: + switch (emachine) { + case EM_IA_64: + ok = 1; + } + break; + case ARCH_M68K: + switch (emachine) { + case EM_68K: + ok = 1; + } + break; case ARCH_MIPS: switch (emachine) { case EM_MIPS: @@ -1080,6 +1405,12 @@ ok = 1; } break; + case ARCH_SH: + switch (emachine) { + case EM_SH: + ok = 1; + } + break; case ARCH_SPARC: switch (emachine) { case EM_SPARC: @@ -1100,12 +1431,6 @@ break; } break; - case ARCH_ARM: - switch (emachine) { - case EM_ARM: - ok = 1; - } - break; default: fatal("file.c: INTERNAL ERROR: Unimplemented arch!\n"); } @@ -1137,8 +1462,7 @@ * TODO: Find out what e_flag actually contains. * TODO 2: This only sets mips16 for cpu 0. Yuck. Fix this! */ - - if (((eflags >> 24) & 0xff) == 0x24) { + if (arch == ARCH_MIPS && ((eflags >> 24) & 0xff) == 0x24) { debug("MIPS16 encoding (e_flags = 0x%08x)\n", eflags); #ifdef ENABLE_MIPS16 m->cpus[0]->cd.mips.mips16 = 1; @@ -1147,7 +1471,7 @@ "(or use the --mips16 configure option)\n"); exit(1); #endif - } else if (eentry & 0x3) { + } else if (arch == ARCH_MIPS && (eentry & 0x3)) { debug("MIPS16 encoding (eentry not 32-bit aligned)\n"); #ifdef ENABLE_MIPS16 m->cpus[0]->cd.mips.mips16 = 1; @@ -1158,6 +1482,15 @@ #endif } + /* + * SH64: 32-bit instruction encoding? TODO + */ + if (arch == ARCH_SH && (eentry & 1)) { + debug("SH64: 32-bit instruction encoding\n"); + m->cpus[0]->cd.sh.compact = 0; + m->cpus[0]->cd.sh.bits = 64; + } + /* Read the program headers: */ for (i=0; isymbol_context, - addr, size, symbol_strings + st_name, 0); + addr, size, symbol_strings + st_name, + 0, -1); } if (strcmp(symbol_strings + st_name, "_gp") == 0) { @@ -1580,7 +1918,7 @@ } fseek(f, 0, SEEK_END); - size = ftell(f); + size = ftello(f); fseek(f, 0, SEEK_SET); memset(buf, 0, sizeof(buf)); @@ -1609,6 +1947,19 @@ entrypointp, arch, byte_orderp); goto ret; } + if (buf[0]==0x00 && buf[1]==0x87 && buf[2]==0x01 && buf[3]==0x08) { + /* M68K a.out */ + file_load_aout(machine, mem, filename, + AOUT_FLAG_VADDR_ZERO_HACK /* for OpenBSD/mac68k */, + entrypointp, arch, byte_orderp); + goto ret; + } + if (buf[0]==0x00 && buf[1]==0x8f && buf[2]==0x01 && buf[3]==0x0b) { + /* ARM a.out */ + file_load_aout(machine, mem, filename, AOUT_FLAG_FROM_BEGINNING, + entrypointp, arch, byte_orderp); + goto ret; + } if (buf[0]==0x00 && buf[1]==0x86 && buf[2]==0x01 && buf[3]==0x0b) { /* i386 a.out (old OpenBSD and NetBSD etc) */ file_load_aout(machine, mem, filename, AOUT_FLAG_FROM_BEGINNING, @@ -1622,6 +1973,22 @@ goto ret; } + /* + * Is it a Mach-O file? + */ + if (buf[0] == 0xfe && buf[1] == 0xed && buf[2] == 0xfa && + (buf[3] == 0xce || buf[3] == 0xcf)) { + file_load_macho(machine, mem, filename, entrypointp, + arch, byte_orderp, buf[3] == 0xcf, 0); + goto ret; + } + if ((buf[0] == 0xce || buf[0] == 0xcf) && buf[1] == 0xfa && + buf[2] == 0xed && buf[3] == 0xfe) { + file_load_macho(machine, mem, filename, entrypointp, + arch, byte_orderp, buf[0] == 0xcf, 1); + goto ret; + } + /* * Is it an ecoff? *