--- trunk/src/file.c 2007/10/08 16:18:11 6 +++ trunk/src/file.c 2007/10/08 16:19:37 22 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2003-2005 Anders Gavare. All rights reserved. + * Copyright (C) 2003-2006 Anders Gavare. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -25,16 +25,17 @@ * SUCH DAMAGE. * * - * $Id: file.c,v 1.96 2005/06/02 12:31:39 debug Exp $ + * $Id: file.c,v 1.127 2006/01/22 23:20:33 debug Exp $ * * This file contains functions which load executable images into (emulated) - * memory. File formats recognized so far: + * memory. File formats recognized so far are: * - * ELF 32-bit and 64-bit ELFs * a.out old format used by OpenBSD 2.x pmax kernels + * Mach-O MacOS X format, etc. * ecoff old format used by Ultrix, Windows NT, etc * srec Motorola SREC format * raw raw binaries, "address:[skiplen:[entrypoint:]]filename" + * ELF 32-bit and 64-bit ELFs * * If a file is not of one of the above mentioned formats, it is assumed * to be symbol data generated by 'nm' or 'nm -S'. @@ -55,6 +56,10 @@ #include "symbol.h" +extern int quiet_mode; +extern int verbose; + + /* ELF machine types as strings: (same as exec_elf.h) */ #define N_ELF_MACHINE_TYPES 64 static char *elf_machine_type[N_ELF_MACHINE_TYPES] = { @@ -125,6 +130,8 @@ #define AOUT_FLAG_DECOSF1 1 #define AOUT_FLAG_FROM_BEGINNING 2 +#define AOUT_FLAG_VADDR_ZERO_HACK 4 +#define AOUT_FLAG_NO_SIZES 8 /* * file_load_aout(): * @@ -145,9 +152,12 @@ uint32_t entry, datasize, textsize; int32_t symbsize = 0; uint32_t vaddr, total_len; - unsigned char buf[4096]; + unsigned char buf[65536]; unsigned char *syms; + if (m->cpus[0]->byte_order == EMUL_BIG_ENDIAN) + encoding = ELFDATA2MSB; + f = fopen(filename, "r"); if (f == NULL) { perror(filename); @@ -157,17 +167,23 @@ if (flags & AOUT_FLAG_DECOSF1) { fread(&buf, 1, 32, f); vaddr = buf[16] + (buf[17] << 8) + - (buf[18] << 16) + (buf[19] << 24); + (buf[18] << 16) + ((uint64_t)buf[19] << 24); entry = buf[20] + (buf[21] << 8) + - (buf[22] << 16) + (buf[23] << 24); + (buf[22] << 16) + ((uint64_t)buf[23] << 24); debug("OSF1 a.out, load address 0x%08lx, " "entry point 0x%08x\n", (long)vaddr, (long)entry); symbsize = 0; fseek(f, 0, SEEK_END); /* This is of course wrong, but should work anyway: */ - textsize = ftell(f) - 512; + textsize = ftello(f) - 512; datasize = 0; fseek(f, 512, SEEK_SET); + } else if (flags & AOUT_FLAG_NO_SIZES) { + fseek(f, 0, SEEK_END); + textsize = ftello(f) - 32; + datasize = 0; + vaddr = entry = 0; + fseek(f, 32, SEEK_SET); } else { len = fread(&aout_header, 1, sizeof(aout_header), f); if (len != sizeof(aout_header)) { @@ -177,8 +193,11 @@ } unencode(entry, &aout_header.a_entry, uint32_t); - vaddr = entry; debug("a.out, entry point 0x%08lx\n", (long)entry); + vaddr = entry; + + if (flags & AOUT_FLAG_VADDR_ZERO_HACK) + vaddr = 0; unencode(textsize, &aout_header.a_text, uint32_t); unencode(datasize, &aout_header.a_data, uint32_t); @@ -199,12 +218,22 @@ len = fread(buf, 1, len, f); /* printf("fread len=%i vaddr=%x buf[0..]=%02x %02x %02x\n", - len, (int)vaddr, buf[0], buf[1], buf[2]); */ + (int)len, (int)vaddr, buf[0], buf[1], buf[2]); */ - if (len > 0) - m->cpus[0]->memory_rw(m->cpus[0], mem, vaddr, - &buf[0], len, MEM_WRITE, NO_EXCEPTIONS); - else { + if (len > 0) { + int len2 = 0; + uint64_t vaddr1 = vaddr & + ((1 << BITS_PER_MEMBLOCK) - 1); + uint64_t vaddr2 = (vaddr + + len) & ((1 << BITS_PER_MEMBLOCK) - 1); + if (vaddr2 < vaddr1) { + len2 = len - vaddr2; + m->cpus[0]->memory_rw(m->cpus[0], mem, vaddr, + &buf[0], len2, MEM_WRITE, NO_EXCEPTIONS); + } + m->cpus[0]->memory_rw(m->cpus[0], mem, vaddr + len2, + &buf[len2], len-len2, MEM_WRITE, NO_EXCEPTIONS); + } else { if (flags & AOUT_FLAG_DECOSF1) break; else { @@ -226,7 +255,7 @@ char *string_symbols; off_t oldpos; - debug("symbols: %i bytes @ 0x%x\n", symbsize, (int)ftell(f)); + debug("symbols: %i bytes @ 0x%x\n", symbsize, (int)ftello(f)); syms = malloc(symbsize); if (syms == NULL) { fprintf(stderr, "out of memory\n"); @@ -239,11 +268,11 @@ exit(1); } - oldpos = ftell(f); + oldpos = ftello(f); fseek(f, 0, SEEK_END); - strings_len = ftell(f) - oldpos; + strings_len = ftello(f) - oldpos; fseek(f, oldpos, SEEK_SET); - debug("strings: %i bytes @ 0x%x\n", strings_len, (int)ftell(f)); + debug("strings: %i bytes @ 0x%x\n", strings_len,(int)ftello(f)); string_symbols = malloc(strings_len); if (string_symbols == NULL) { fprintf(stderr, "out of memory\n"); @@ -265,7 +294,7 @@ if (type != 0 && addr != 0) add_symbol_name(&m->symbol_context, - addr, 0, string_symbols + str_index, 0); + addr, 0, string_symbols + str_index, 0, -1); i++; } @@ -287,6 +316,283 @@ /* + * file_load_macho(): + * + * Loads a Mach-O binary image into the emulated memory. The entry point + * is stored in the specified CPU's registers. + * + * TODO: + * + * o) Almost everything. + * + * o) I haven't had time to look into whether Apple's open source + * license is BSD-compatible or not. Perhaps it would be possible + * to use a header file containing symbolic names, and not use + * hardcoded values. + */ +static void file_load_macho(struct machine *m, struct memory *mem, + char *filename, uint64_t *entrypointp, int arch, int *byte_orderp, + int is_64bit, int is_reversed) +{ + FILE *f; + uint64_t entry = 0; + int entry_set = 0; + int encoding = ELFDATA2MSB; + unsigned char buf[65536]; + char *symbols, *strings; + uint32_t cputype, cpusubtype, filetype, ncmds, sizeofcmds, flags; + uint64_t vmaddr, vmsize, fileoff, filesize; + int cmd_type, cmd_len, i, flavor; + int32_t symoff, nsyms, stroff, strsize; + size_t len, pos; + + if (m->cpus[0]->byte_order == EMUL_BIG_ENDIAN) + encoding = ELFDATA2MSB; + + f = fopen(filename, "r"); + if (f == NULL) { + perror(filename); + exit(1); + } + + if (is_64bit) { + fatal("TODO: 64-bit Mach-O. Not supported yet.\n"); + exit(1); + } + if (is_reversed) { + fatal("TODO: Reversed-endianness. Not supported yet.\n"); + exit(1); + } + + len = fread(buf, 1, sizeof(buf), f); + if (len < 100) { + fatal("Bad Mach-O file?\n"); + exit(1); + } + + unencode(cputype, &buf[4], uint32_t); + unencode(cpusubtype, &buf[8], uint32_t); + unencode(filetype, &buf[12], uint32_t); + unencode(ncmds, &buf[16], uint32_t); + unencode(sizeofcmds, &buf[20], uint32_t); + unencode(flags, &buf[24], uint32_t); + + /* debug("cputype=0x%x cpusubtype=0x%x filetype=0x%x\n", + cputype, cpusubtype, filetype); + debug("ncmds=%i sizeofcmds=0x%08x flags=0x%08x\n", + ncmds, sizeofcmds, flags); */ + + /* + * Compare to "normal" values. + * NOTE/TODO: These were for a Darwin (Macintosh PPC) kernel. + */ + if (cputype != 0x12) { + fatal("Error: Unimplemented cputype 0x%x\n", cputype); + exit(1); + } + if (cpusubtype != 0) { + fatal("Error: Unimplemented cpusubtype 0x%x\n", cpusubtype); + exit(1); + } + /* Filetype 2 means an executable image. */ + if (filetype != 2) { + fatal("Error: Unimplemented filetype 0x%x\n", filetype); + exit(1); + } + if (!(flags & 1)) { + fatal("Error: File has 'undefined references'. Cannot" + " be executed.\n", flags); + exit(1); + } + + /* I've only encountered flags == 1 so far. */ + if (flags != 1) { + fatal("Error: Unimplemented flags 0x%x\n", flags); + exit(1); + } + + /* + * Read all load commands: + */ + pos = is_64bit? 32 : 28; + cmd_type = 0; + do { + /* Read command type and length: */ + unencode(cmd_type, &buf[pos], uint32_t); + unencode(cmd_len, &buf[pos+4], uint32_t); + +#if 0 + debug("cmd %i, len=%i\n", cmd_type, cmd_len); + for (i=8; i= ' ' && ch < 127) + debug("%c", ch); + else + debug("."); + } +#endif + switch (cmd_type) { + case 1: /* LC_SEGMENT */ + debug("seg "); + for (i=0; i<16; i++) { + if (buf[pos + 8 + i] == 0) + break; + debug("%c", buf[pos + 8 + i]); + } + unencode(vmaddr, &buf[pos+8+16+0], uint32_t); + unencode(vmsize, &buf[pos+8+16+4], uint32_t); + unencode(fileoff, &buf[pos+8+16+8], uint32_t); + unencode(filesize, &buf[pos+8+16+12], uint32_t); + debug(": vmaddr=0x%x size=0x%x fileoff=0x%x", + (int)vmaddr, (int)vmsize, (int)fileoff); + + if (filesize == 0) { + debug("\n"); + break; + } + + fseek(f, fileoff, SEEK_SET); + + /* Load data from the file: */ + while (filesize != 0) { + unsigned char buf[32768]; + ssize_t len = filesize > sizeof(buf) ? + sizeof(buf) : filesize; + len = fread(buf, 1, len, f); + + /* printf("fread len=%i vmaddr=%x buf[0..]=" + "%02x %02x %02x\n", (int)len, (int)vmaddr, + buf[0], buf[1], buf[2]); */ + + if (len > 0) { + int len2 = 0; + uint64_t vaddr1 = vmaddr & + ((1 << BITS_PER_MEMBLOCK) - 1); + uint64_t vaddr2 = (vmaddr + + len) & ((1 << BITS_PER_MEMBLOCK)-1); + if (vaddr2 < vaddr1) { + len2 = len - vaddr2; + m->cpus[0]->memory_rw(m->cpus[ + 0], mem, vmaddr, &buf[0], + len2, MEM_WRITE, + NO_EXCEPTIONS); + } + m->cpus[0]->memory_rw(m->cpus[0], mem, + vmaddr + len2, &buf[len2], len-len2, + MEM_WRITE, NO_EXCEPTIONS); + } else { + fprintf(stderr, "error reading\n"); + exit(1); + } + + vmaddr += len; + filesize -= len; + } + + debug("\n"); + break; + + case 2: /* LC_SYMTAB */ + unencode(symoff, &buf[pos+8], uint32_t); + unencode(nsyms, &buf[pos+12], uint32_t); + unencode(stroff, &buf[pos+16], uint32_t); + unencode(strsize, &buf[pos+20], uint32_t); + debug("symtable: %i symbols @ 0x%x (strings at " + "0x%x)\n", nsyms, symoff, stroff); + + symbols = malloc(12 * nsyms); + if (symbols == NULL) { + fprintf(stderr, "out of memory\n"); + exit(1); + } + fseek(f, symoff, SEEK_SET); + fread(symbols, 1, 12 * nsyms, f); + + strings = malloc(strsize); + if (strings == NULL) { + fprintf(stderr, "out of memory\n"); + exit(1); + } + fseek(f, stroff, SEEK_SET); + fread(strings, 1, strsize, f); + + for (i=0; isymbol_context, + n_value, 0, strings + n_strx, 0, -1); + } + + free(symbols); + free(strings); + break; + + case 5: debug("unix thread context: "); + /* See http://cvs.sf.net/viewcvs.py/hte/ + HT%20Editor/machostruc.h or similar for details + on the thread struct. */ + unencode(flavor, &buf[pos+8], uint32_t); + if (flavor != 1) { + fatal("unimplemented flavor %i\n", flavor); + exit(1); + } + + if (arch != ARCH_PPC) { + fatal("non-PPC arch? TODO\n"); + exit(1); + } + + unencode(entry, &buf[pos+16], uint32_t); + entry_set = 1; + debug("pc=0x%x\n", (int)entry); + + for (i=1; i<40; i++) { + uint32_t x; + unencode(x, &buf[pos+16+i*4], uint32_t); + if (x != 0) { + fatal("Entry nr %i in the Mach-O" + " thread struct is non-zero" + " (0x%x). This is not supported" + " yet. TODO\n", i, x); + exit(1); + } + } + break; + + default:fatal("WARNING! Unimplemented load command %i!\n", + cmd_type); + } + + pos += cmd_len; + } while (pos < sizeofcmds && cmd_type != 0); + + fclose(f); + + if (!entry_set) { + fatal("No entry point? Aborting.\n"); + exit(1); + } + + *entrypointp = entry; + + if (encoding == ELFDATA2LSB) + *byte_orderp = EMUL_LITTLE_ENDIAN; + else + *byte_orderp = EMUL_BIG_ENDIAN; + + n_executables_loaded ++; +} + + +/* * file_load_ecoff(): * * Loads an ecoff binary image into the emulated memory. The entry point @@ -474,7 +780,7 @@ if (s_scnptr != 0 && s_size != 0 && s_vaddr != 0 && !(s_flags & 0x02)) { /* Remember the current file offset: */ - oldpos = ftell(f); + oldpos = ftello(f); /* Load the section into emulated memory: */ fseek(f, s_scnptr, SEEK_SET); @@ -563,10 +869,10 @@ debug("%c", sym->name[i]); */ v = sym->value[0] + (sym->value[1] << 8) + (sym->value[2] << 16) - + (sym->value[3] << 24); + + ((uint64_t)sym->value[3] << 24); altname = sym->name[4] + (sym->name[5] << 8) + (sym->name[6] << 16) - + (sym->name[3] << 24); + + ((uint64_t)sym->name[3] << 24); t = (sym->type[1] << 8) + sym->type[0]; /* TODO: big endian COFF? */ /* debug("' value=0x%x type=0x%04x", v, t); */ @@ -575,7 +881,7 @@ memcpy(name, sym->name, 8); name[8] = '\0'; add_symbol_name(&m->symbol_context, - v, 0, name, 0); + v, 0, name, 0, -1); n_real_symbols ++; } else if (t == 0x20 && !sym->name[0]) { off_t ofs; @@ -587,7 +893,7 @@ /* debug(" [altname=0x%x '%s']", altname, name); */ add_symbol_name(&m->symbol_context, - v, 0, name, 0); + v, 0, name, 0, -1); n_real_symbols ++; } @@ -658,7 +964,7 @@ add_symbol_name(&m->symbol_context, extsyms[sym_nr].es_value, 0, - symbol_data + extsyms[sym_nr].es_strindex, 0); + symbol_data + extsyms[sym_nr].es_strindex, 0, -1); } free(extsyms); @@ -797,8 +1103,8 @@ bytes[2]; break; case 3: data_start = 4; - vaddr = (bytes[0] << 24) + (bytes[1] << 16) + - (bytes[2] << 8) + bytes[3]; + vaddr = ((uint64_t)bytes[0] << 24) + + (bytes[1] << 16) + (bytes[2]<<8) + bytes[3]; } m->cpus[0]->memory_rw(m->cpus[0], mem, vaddr, &bytes[data_start], count - 1 - data_start, @@ -810,8 +1116,8 @@ case 9: /* switch again, to get the entry point: */ switch (buf[1]) { - case 7: entry = (bytes[0] << 24) + (bytes[1] << 16) + - (bytes[2] << 8) + bytes[3]; + case 7: entry = ((uint64_t)bytes[0] << 24) + + (bytes[1] << 16) + (bytes[2]<<8) + bytes[3]; break; case 8: entry = (bytes[0] << 16) + (bytes[1] << 8) + bytes[2]; @@ -900,7 +1206,7 @@ } debug("RAW: 0x%llx bytes @ 0x%08llx", - (long long) (ftell(f) - skip), (long long)loadaddr); + (long long) (ftello(f) - skip), (long long)loadaddr); if (skip != 0) debug(" (0x%llx bytes of header skipped)", (long long)skip); debug("\n"); @@ -1060,12 +1366,42 @@ ok = 1; } break; + case ARCH_ARM: + switch (emachine) { + case EM_ARM: + ok = 1; + } + break; + case ARCH_AVR: + switch (emachine) { + case EM_AVR: + ok = 1; + } + break; case ARCH_HPPA: switch (emachine) { case EM_PARISC: ok = 1; } break; + case ARCH_I960: + switch (emachine) { + case EM_960: + ok = 1; + } + break; + case ARCH_IA64: + switch (emachine) { + case EM_IA_64: + ok = 1; + } + break; + case ARCH_M68K: + switch (emachine) { + case EM_68K: + ok = 1; + } + break; case ARCH_MIPS: switch (emachine) { case EM_MIPS: @@ -1080,6 +1416,12 @@ ok = 1; } break; + case ARCH_SH: + switch (emachine) { + case EM_SH: + ok = 1; + } + break; case ARCH_SPARC: switch (emachine) { case EM_SPARC: @@ -1100,12 +1442,6 @@ break; } break; - case ARCH_ARM: - switch (emachine) { - case EM_ARM: - ok = 1; - } - break; default: fatal("file.c: INTERNAL ERROR: Unimplemented arch!\n"); } @@ -1137,8 +1473,7 @@ * TODO: Find out what e_flag actually contains. * TODO 2: This only sets mips16 for cpu 0. Yuck. Fix this! */ - - if (((eflags >> 24) & 0xff) == 0x24) { + if (arch == ARCH_MIPS && ((eflags >> 24) & 0xff) == 0x24) { debug("MIPS16 encoding (e_flags = 0x%08x)\n", eflags); #ifdef ENABLE_MIPS16 m->cpus[0]->cd.mips.mips16 = 1; @@ -1147,7 +1482,7 @@ "(or use the --mips16 configure option)\n"); exit(1); #endif - } else if (eentry & 0x3) { + } else if (arch == ARCH_MIPS && (eentry & 0x3)) { debug("MIPS16 encoding (eentry not 32-bit aligned)\n"); #ifdef ENABLE_MIPS16 m->cpus[0]->cd.mips.mips16 = 1; @@ -1158,6 +1493,15 @@ #endif } + /* + * SH64: 32-bit instruction encoding? TODO + */ + if (arch == ARCH_SH && (eentry & 1)) { + debug("SH64: 32-bit instruction encoding\n"); + m->cpus[0]->cd.sh.compact = 0; + m->cpus[0]->cd.sh.bits = 64; + } + /* Read the program headers: */ for (i=0; isymbol_context, - addr, size, symbol_strings + st_name, 0); + addr, size, symbol_strings + st_name, + 0, -1); } if (strcmp(symbol_strings + st_name, "_gp") == 0) { @@ -1544,7 +1914,7 @@ char *filename, uint64_t *entrypointp, int arch, uint64_t *gpp, int *byte_orderp, uint64_t *tocp) { - int iadd = 4; + int iadd = DEBUG_INDENTATION, old_quiet_mode; FILE *f; unsigned char buf[12]; unsigned char buf2[2]; @@ -1570,9 +1940,13 @@ if (filename[0] == '@') return; - debug("loading %s:\n", filename); + debug("loading %s%s\n", filename, verbose >= 2? ":" : ""); debug_indentation(iadd); + old_quiet_mode = quiet_mode; + if (verbose < 2) + quiet_mode = 1; + f = fopen(filename, "r"); if (f == NULL) { file_load_raw(machine, mem, filename, entrypointp); @@ -1580,7 +1954,7 @@ } fseek(f, 0, SEEK_END); - size = ftell(f); + size = ftello(f); fseek(f, 0, SEEK_SET); memset(buf, 0, sizeof(buf)); @@ -1609,12 +1983,31 @@ entrypointp, arch, byte_orderp); goto ret; } + if (buf[0]==0x00 && buf[1]==0x87 && buf[2]==0x01 && buf[3]==0x08) { + /* M68K a.out */ + file_load_aout(machine, mem, filename, + AOUT_FLAG_VADDR_ZERO_HACK /* for OpenBSD/mac68k */, + entrypointp, arch, byte_orderp); + goto ret; + } + if (buf[0]==0x00 && buf[1]==0x8f && buf[2]==0x01 && buf[3]==0x0b) { + /* ARM a.out */ + file_load_aout(machine, mem, filename, AOUT_FLAG_FROM_BEGINNING, + entrypointp, arch, byte_orderp); + goto ret; + } if (buf[0]==0x00 && buf[1]==0x86 && buf[2]==0x01 && buf[3]==0x0b) { /* i386 a.out (old OpenBSD and NetBSD etc) */ file_load_aout(machine, mem, filename, AOUT_FLAG_FROM_BEGINNING, entrypointp, arch, byte_orderp); goto ret; } + if (buf[0]==0x01 && buf[1]==0x03 && buf[2]==0x01 && buf[3]==0x07) { + /* SPARC a.out (old 32-bit NetBSD etc) */ + file_load_aout(machine, mem, filename, AOUT_FLAG_NO_SIZES, + entrypointp, arch, byte_orderp); + goto ret; + } if (buf[0]==0x00 && buf[2]==0x00 && buf[8]==0x7a && buf[9]==0x75) { /* DEC OSF1 on MIPS: */ file_load_aout(machine, mem, filename, AOUT_FLAG_DECOSF1, @@ -1623,6 +2016,22 @@ } /* + * Is it a Mach-O file? + */ + if (buf[0] == 0xfe && buf[1] == 0xed && buf[2] == 0xfa && + (buf[3] == 0xce || buf[3] == 0xcf)) { + file_load_macho(machine, mem, filename, entrypointp, + arch, byte_orderp, buf[3] == 0xcf, 0); + goto ret; + } + if ((buf[0] == 0xce || buf[0] == 0xcf) && buf[1] == 0xfa && + buf[2] == 0xed && buf[3] == 0xfe) { + file_load_macho(machine, mem, filename, entrypointp, + arch, byte_orderp, buf[0] == 0xcf, 1); + goto ret; + } + + /* * Is it an ecoff? * * TODO: What's the deal with the magic value's byte order? Sometimes @@ -1701,5 +2110,6 @@ ret: debug_indentation(-iadd); + quiet_mode = old_quiet_mode; }