/[gxemul]/trunk/src/cpus/generate_arm_multi.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/src/cpus/generate_arm_multi.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 18 - (hide annotations)
Mon Oct 8 16:19:11 2007 UTC (16 years, 5 months ago) by dpavlin
File MIME type: text/plain
File size: 9909 byte(s)
++ trunk/HISTORY	(local)
$Id: HISTORY,v 1.1004 2005/10/27 14:01:10 debug Exp $
20051011        Passing -A as the default boot arg for CATS (works fine with
                OpenBSD/cats).
20051012	Fixing the VGA cursor offset bug, and speeding up framebuffer
		redraws if character cells contain the same thing as during
		the last redraw.
20051013	Adding a slow strd ARM instruction hack.
20051017	Minor updates: Adding a dummy i80321 Verde controller (for
		XScale emulation), fixing the disassembly of the ARM "ldrd"
		instruction, adding "support" for less-than-4KB pages for ARM
		(by not adding them to translation tables).
20051020	Continuing on some HPCarm stuff. A NetBSD/hpcarm kernel prints
		some boot messages on an emulated Jornada 720.
		Making dev_ram work better with dyntrans (speeds up some things
		quite a bit).
20051021	Automatically generating some of the most common ARM load/store
		multiple instructions.
20051022	Better statistics gathering for the ARM load/store multiple.
		Various other dyntrans and device updates.
20051023	Various minor updates.
20051024	Continuing; minor device and dyntrans fine-tuning. Adding the
		first "reasonable" instruction combination hacks for ARM (the
		cores of NetBSD/cats' memset and memcpy).
20051025	Fixing a dyntrans-related bug in dev_vga. Also changing the
		dyntrans low/high access notification to only be updated on
		writes, not reads. Hopefully it will be enough. (dev_vga in
		charcell mode now seems to work correctly with both reads and
		writes.)
		Experimenting with gathering dyntrans statistics (which parts
		of emulated RAM that are actually executed), and adding
		instruction combination hacks for cache cleaning and a part of
		NetBSD's scanc() function.
20051026	Adding a bitmap for ARM emulation which indicates if a page is
		(specifically) user accessible; loads and stores with the t-
		flag set can now use the translation arrays, which results in
		a measurable speedup.
20051027	Dyntrans updates; adding an extra bitmap array for 32-bit
		emulation modes, speeding up the check whether a physical page
		has any code translations or not (O(n) -> O(1)). Doing a
		similar reduction of O(n) to O(1) by avoiding the scan through
		the translation entries on a translation update (32-bit mode
		only).
		Various other minor hacks.
20051029	Quick release, without any testing at all.

==============  RELEASE 0.3.6.2  ==============


1 dpavlin 18 /*
2     * Copyright (C) 2005 Anders Gavare. All rights reserved.
3     *
4     * Redistribution and use in source and binary forms, with or without
5     * modification, are permitted provided that the following conditions are met:
6     *
7     * 1. Redistributions of source code must retain the above copyright
8     * notice, this list of conditions and the following disclaimer.
9     * 2. Redistributions in binary form must reproduce the above copyright
10     * notice, this list of conditions and the following disclaimer in the
11     * documentation and/or other materials provided with the distribution.
12     * 3. The name of the author may not be used to endorse or promote products
13     * derived from this software without specific prior written permission.
14     *
15     * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16     * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18     * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19     * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20     * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21     * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22     * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23     * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24     * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25     * SUCH DAMAGE.
26     *
27     *
28     * $Id: generate_arm_multi.c,v 1.6 2005/10/26 14:37:03 debug Exp $
29     *
30     * Generation of commonly used ARM load/store multiple instructions.
31     * The main idea is to first check whether a load/store would be possible
32     * without going outside a page, and if so, use the host_load or _store
33     * arrays for quick access to emulated RAM. Otherwise, fall back to using
34     * the generic bdt_load() or bdt_store().
35     */
36    
37     #include <stdio.h>
38     #include <stdlib.h>
39     #include "misc.h"
40    
41    
42     /*
43     * generate_opcode():
44     *
45     * Given an ARM load/store multiple opcode, produce equivalent "hardcoded"
46     * C code which emulates the opcode.
47     *
48     * TODO:
49     *
50     * o) On 64-bit hosts, load/store two registers at a time. This
51     * feature depends both on the alignment of the base register,
52     * and the specific set of registers being loaded/stored.
53     *
54     * o) Alignment checks. (Optional?)
55     *
56     * o) For accesses that cross page boundaries, use two pages using
57     * the fast method instead of calling the generic function?
58     */
59     void generate_opcode(uint32_t opcode)
60     {
61     int p, u, s, w, load, r, n_regs, i, x;
62    
63     if ((opcode & 0x0e000000) != 0x08000000) {
64     fprintf(stderr, "opcode 0x%08x is not an ldm/stm\n", opcode);
65     exit(1);
66     }
67    
68     r = (opcode >> 16) & 15;
69     p = opcode & 0x01000000? 1 : 0;
70     u = opcode & 0x00800000? 1 : 0;
71     s = opcode & 0x00400000? 1 : 0;
72     w = opcode & 0x00200000? 1 : 0;
73     load = opcode & 0x00100000? 1 : 0;
74     n_regs = 0;
75     for (i=0; i<16; i++)
76     if (opcode & (1 << i))
77     n_regs ++;
78    
79     /* TODO: Check for register pairs, for 64-bit load/stores */
80    
81     if (n_regs == 0) {
82     fprintf(stderr, "opcode 0x%08x has no registers set\n", opcode);
83     exit(1);
84     }
85    
86     if (s) {
87     fprintf(stderr, "opcode 0x%08x has s-bit set\n", opcode);
88     exit(1);
89     }
90    
91     if (r == 15) {
92     fprintf(stderr, "opcode 0x%08x has r=15\n", opcode);
93     exit(1);
94     }
95    
96     printf("\nX(multi_0x%08x) {\n", opcode);
97    
98     printf("\tunsigned char *page;\n");
99     printf("\tuint32_t addr = cpu->cd.arm.r[%i];\n", r);
100    
101     if (!load && opcode & 0x8000) {
102     printf("\tuint32_t tmp_pc = ((size_t)ic - (size_t)\n\t"
103     " cpu->cd.arm.cur_ic_page) / sizeof(struct "
104     "arm_instr_call);\n"
105     "\ttmp_pc = ((cpu->cd.arm.r[ARM_PC] & "
106     "~((ARM_IC_ENTRIES_PER_PAGE-1)"
107     "\n\t << ARM_INSTR_ALIGNMENT_SHIFT)))\n"
108     "\t + (tmp_pc << ARM_INSTR_ALIGNMENT_SHIFT) + 12;\n");
109     }
110    
111     if (p)
112     printf("\taddr %s 4;\n", u? "+=" : "-=");
113    
114     printf("\tpage = cpu->cd.arm.host_%s[addr >> 12];\n",
115     load? "load" : "store");
116    
117     printf("\taddr &= 0xffc;\n");
118    
119     printf("\tif (");
120     switch (p*2 + u) {
121     case 0: /* post-decrement */
122     if (n_regs > 1)
123     printf("addr >= 0x%x && ", 4*(n_regs-1));
124     break;
125     case 1: /* post-increment */
126     if (n_regs > 1)
127     printf("addr <= 0x%x && ", 0x1000 - 4*n_regs);
128     break;
129     case 2: /* pre-decrement */
130     if (n_regs > 1)
131     printf("addr >= 0x%x && ", 4*(n_regs-1));
132     break;
133     case 3: /* pre-increment */
134     if (n_regs > 1)
135     printf("addr <= 0x%x && ", 0x1000 - 4*n_regs);
136     break;
137     }
138     printf("page != NULL) {\n");
139    
140     printf("\t\tuint32_t *p = (uint32_t *) (page + addr);\n");
141    
142     if (u) {
143     x = 0;
144     for (i=0; i<=15; i++) {
145     if (!(opcode & (1 << i)))
146     continue;
147    
148     if (load && w && i == r) {
149     /* Skip the load if we're using writeback. */
150     } else if (load)
151     printf("\t\tcpu->cd.arm.r[%i] = p[%i];\n", i, x);
152     else {
153     if (i == 15)
154     printf("\t\tp[%i] = tmp_pc;\n", x);
155     else
156     printf("\t\tp[%i] = cpu->cd.arm.r[%i];\n", x, i);
157     }
158    
159     x ++;
160     }
161     } else {
162     /* Decrementing, but do it incrementing anyway: */
163     x = -n_regs;
164     for (i=0; i<=15; i++) {
165     if (!(opcode & (1 << i)))
166     continue;
167    
168     x ++;
169    
170     if (load && w && i == r) {
171     /* Skip the load if we're using writeback. */
172     } else if (load)
173     printf("\t\tcpu->cd.arm.r[%i] = p[%i];\n", i, x);
174     else {
175     if (i == 15)
176     printf("\t\tp[%i] = tmp_pc;\n", x);
177     else
178     printf("\t\tp[%i] = cpu->cd.arm.r[%i];\n", x, i);
179     }
180     }
181     }
182    
183     if (w)
184     printf("\t\tcpu->cd.arm.r[%i] %s %i;\n",
185     r, u? "+=" : "-=", 4*n_regs);
186    
187     if (load && opcode & 0x8000) {
188     printf("\t\tcpu->pc = cpu->cd.arm.r[15];\n"
189     "\t\tarm_pc_to_pointers(cpu);\n");
190     }
191    
192     printf("\t} else\n");
193     printf("\t\tinstr(bdt_%s)(cpu, ic);\n", load? "load" : "store");
194    
195     printf("}\nY(multi_0x%08x)\n", opcode);
196     }
197    
198    
199     /*
200     * main():
201     *
202     * Normal ARM code seems to only use about a few hundred of the 1^24 possible
203     * load/store multiple instructions. (I'm not counting the s-bit now.)
204     * Instead of having a linear array of 100s of entries, we can select a list
205     * to scan based on a few bits (*), and those lists will be shorter.
206     *
207     * (*) By running experiment_arm_multi.c on statistics gathered from running
208     * NetBSD/cats, it seems that choosing the following 8 bits results in
209     * the shortest linear lists:
210     *
211     * xxxx100P USWLnnnn llllllll llllllll
212     * ^ ^ ^ ^ ^ ^ ^ ^ (0x00950154)
213     */
214     int main(int argc, char *argv[])
215     {
216     int i, j;
217     int n_used[256];
218    
219     if (argc < 2) {
220     fprintf(stderr, "usage: %s opcode [..]\n", argv[0]);
221     exit(1);
222     }
223    
224     printf("\n/* AUTOMATICALLY GENERATED! Do not edit. */\n\n");
225    
226     /* Generate the opcode functions: */
227     for (i=1; i<argc; i++)
228     generate_opcode(strtol(argv[i], NULL, 0));
229    
230     /* Generate 256 small lookup tables: */
231     for (j=0; j<256; j++) {
232     int n = 0, zz, zz0;
233     for (i=1; i<argc; i++) {
234     zz = strtol(argv[i], NULL, 0);
235     zz = ((zz & 0x00800000) >> 16)
236     |((zz & 0x00100000) >> 14)
237     |((zz & 0x00040000) >> 13)
238     |((zz & 0x00010000) >> 12)
239     |((zz & 0x00000100) >> 5)
240     |((zz & 0x00000040) >> 4)
241     |((zz & 0x00000010) >> 3)
242     |((zz & 0x00000004) >> 2);
243     if (zz == j)
244     n++;
245     }
246     printf("\nuint32_t multi_opcode_%i[%i] = {\n", j, n+1);
247     for (i=1; i<argc; i++) {
248     zz = zz0 = strtol(argv[i], NULL, 0);
249     zz = ((zz & 0x00800000) >> 16)
250     |((zz & 0x00100000) >> 14)
251     |((zz & 0x00040000) >> 13)
252     |((zz & 0x00010000) >> 12)
253     |((zz & 0x00000100) >> 5)
254     |((zz & 0x00000040) >> 4)
255     |((zz & 0x00000010) >> 3)
256     |((zz & 0x00000004) >> 2);
257     if (zz == j)
258     printf("\t0x%08x,\n", zz0);
259     }
260     printf("0 };\n");
261     }
262    
263     /* Generate 256 tables with function pointers: */
264     for (j=0; j<256; j++) {
265     int n = 0, zz, zz0;
266     for (i=1; i<argc; i++) {
267     zz = strtol(argv[i], NULL, 0);
268     zz = ((zz & 0x00800000) >> 16)
269     |((zz & 0x00100000) >> 14)
270     |((zz & 0x00040000) >> 13)
271     |((zz & 0x00010000) >> 12)
272     |((zz & 0x00000100) >> 5)
273     |((zz & 0x00000040) >> 4)
274     |((zz & 0x00000010) >> 3)
275     |((zz & 0x00000004) >> 2);
276     if (zz == j)
277     n++;
278     }
279     n_used[j] = n;
280     if (n == 0)
281     continue;
282     printf("void (*multi_opcode_f_%i[%i])(struct cpu *,"
283     " struct arm_instr_call *) = {\n", j, n*16);
284     for (i=1; i<argc; i++) {
285     zz = zz0 = strtol(argv[i], NULL, 0);
286     zz = ((zz & 0x00800000) >> 16)
287     |((zz & 0x00100000) >> 14)
288     |((zz & 0x00040000) >> 13)
289     |((zz & 0x00010000) >> 12)
290     |((zz & 0x00000100) >> 5)
291     |((zz & 0x00000040) >> 4)
292     |((zz & 0x00000010) >> 3)
293     |((zz & 0x00000004) >> 2);
294     if (zz == j) {
295     printf("\tarm_instr_multi_0x%08x__eq,\n", zz0);
296     printf("\tarm_instr_multi_0x%08x__ne,\n", zz0);
297     printf("\tarm_instr_multi_0x%08x__cs,\n", zz0);
298     printf("\tarm_instr_multi_0x%08x__cc,\n", zz0);
299     printf("\tarm_instr_multi_0x%08x__mi,\n", zz0);
300     printf("\tarm_instr_multi_0x%08x__pl,\n", zz0);
301     printf("\tarm_instr_multi_0x%08x__vs,\n", zz0);
302     printf("\tarm_instr_multi_0x%08x__vc,\n", zz0);
303     printf("\tarm_instr_multi_0x%08x__hi,\n", zz0);
304     printf("\tarm_instr_multi_0x%08x__ls,\n", zz0);
305     printf("\tarm_instr_multi_0x%08x__ge,\n", zz0);
306     printf("\tarm_instr_multi_0x%08x__lt,\n", zz0);
307     printf("\tarm_instr_multi_0x%08x__gt,\n", zz0);
308     printf("\tarm_instr_multi_0x%08x__le,\n", zz0);
309     printf("\tarm_instr_multi_0x%08x,\n", zz0);
310     printf("\tarm_instr_nop,\n");
311     }
312     }
313     printf("};\n");
314     }
315    
316    
317     printf("\nuint32_t *multi_opcode[256] = {\n");
318     for (i=0; i<256; i++) {
319     printf(" multi_opcode_%i,", i);
320     if ((i % 4) == 0)
321     printf("\n");
322     }
323     printf("};\n");
324    
325     printf("\nvoid (**multi_opcode_f[256])(struct cpu *,"
326     " struct arm_instr_call *) = {\n");
327     for (i=0; i<256; i++) {
328     if (n_used[i] > 0)
329     printf(" multi_opcode_f_%i,", i);
330     else
331     printf(" NULL,");
332     if ((i % 4) == 0)
333     printf("\n");
334     }
335     printf("};\n");
336    
337     return 0;
338     }
339    

  ViewVC Help
Powered by ViewVC 1.1.26