/[gxemul]/trunk/src/cpus/generate_arm_multi.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/src/cpus/generate_arm_multi.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 18 - (show annotations)
Mon Oct 8 16:19:11 2007 UTC (16 years, 6 months ago) by dpavlin
File MIME type: text/plain
File size: 9909 byte(s)
++ trunk/HISTORY	(local)
$Id: HISTORY,v 1.1004 2005/10/27 14:01:10 debug Exp $
20051011        Passing -A as the default boot arg for CATS (works fine with
                OpenBSD/cats).
20051012	Fixing the VGA cursor offset bug, and speeding up framebuffer
		redraws if character cells contain the same thing as during
		the last redraw.
20051013	Adding a slow strd ARM instruction hack.
20051017	Minor updates: Adding a dummy i80321 Verde controller (for
		XScale emulation), fixing the disassembly of the ARM "ldrd"
		instruction, adding "support" for less-than-4KB pages for ARM
		(by not adding them to translation tables).
20051020	Continuing on some HPCarm stuff. A NetBSD/hpcarm kernel prints
		some boot messages on an emulated Jornada 720.
		Making dev_ram work better with dyntrans (speeds up some things
		quite a bit).
20051021	Automatically generating some of the most common ARM load/store
		multiple instructions.
20051022	Better statistics gathering for the ARM load/store multiple.
		Various other dyntrans and device updates.
20051023	Various minor updates.
20051024	Continuing; minor device and dyntrans fine-tuning. Adding the
		first "reasonable" instruction combination hacks for ARM (the
		cores of NetBSD/cats' memset and memcpy).
20051025	Fixing a dyntrans-related bug in dev_vga. Also changing the
		dyntrans low/high access notification to only be updated on
		writes, not reads. Hopefully it will be enough. (dev_vga in
		charcell mode now seems to work correctly with both reads and
		writes.)
		Experimenting with gathering dyntrans statistics (which parts
		of emulated RAM that are actually executed), and adding
		instruction combination hacks for cache cleaning and a part of
		NetBSD's scanc() function.
20051026	Adding a bitmap for ARM emulation which indicates if a page is
		(specifically) user accessible; loads and stores with the t-
		flag set can now use the translation arrays, which results in
		a measurable speedup.
20051027	Dyntrans updates; adding an extra bitmap array for 32-bit
		emulation modes, speeding up the check whether a physical page
		has any code translations or not (O(n) -> O(1)). Doing a
		similar reduction of O(n) to O(1) by avoiding the scan through
		the translation entries on a translation update (32-bit mode
		only).
		Various other minor hacks.
20051029	Quick release, without any testing at all.

==============  RELEASE 0.3.6.2  ==============


1 /*
2 * Copyright (C) 2005 Anders Gavare. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * 3. The name of the author may not be used to endorse or promote products
13 * derived from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 *
28 * $Id: generate_arm_multi.c,v 1.6 2005/10/26 14:37:03 debug Exp $
29 *
30 * Generation of commonly used ARM load/store multiple instructions.
31 * The main idea is to first check whether a load/store would be possible
32 * without going outside a page, and if so, use the host_load or _store
33 * arrays for quick access to emulated RAM. Otherwise, fall back to using
34 * the generic bdt_load() or bdt_store().
35 */
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include "misc.h"
40
41
42 /*
43 * generate_opcode():
44 *
45 * Given an ARM load/store multiple opcode, produce equivalent "hardcoded"
46 * C code which emulates the opcode.
47 *
48 * TODO:
49 *
50 * o) On 64-bit hosts, load/store two registers at a time. This
51 * feature depends both on the alignment of the base register,
52 * and the specific set of registers being loaded/stored.
53 *
54 * o) Alignment checks. (Optional?)
55 *
56 * o) For accesses that cross page boundaries, use two pages using
57 * the fast method instead of calling the generic function?
58 */
59 void generate_opcode(uint32_t opcode)
60 {
61 int p, u, s, w, load, r, n_regs, i, x;
62
63 if ((opcode & 0x0e000000) != 0x08000000) {
64 fprintf(stderr, "opcode 0x%08x is not an ldm/stm\n", opcode);
65 exit(1);
66 }
67
68 r = (opcode >> 16) & 15;
69 p = opcode & 0x01000000? 1 : 0;
70 u = opcode & 0x00800000? 1 : 0;
71 s = opcode & 0x00400000? 1 : 0;
72 w = opcode & 0x00200000? 1 : 0;
73 load = opcode & 0x00100000? 1 : 0;
74 n_regs = 0;
75 for (i=0; i<16; i++)
76 if (opcode & (1 << i))
77 n_regs ++;
78
79 /* TODO: Check for register pairs, for 64-bit load/stores */
80
81 if (n_regs == 0) {
82 fprintf(stderr, "opcode 0x%08x has no registers set\n", opcode);
83 exit(1);
84 }
85
86 if (s) {
87 fprintf(stderr, "opcode 0x%08x has s-bit set\n", opcode);
88 exit(1);
89 }
90
91 if (r == 15) {
92 fprintf(stderr, "opcode 0x%08x has r=15\n", opcode);
93 exit(1);
94 }
95
96 printf("\nX(multi_0x%08x) {\n", opcode);
97
98 printf("\tunsigned char *page;\n");
99 printf("\tuint32_t addr = cpu->cd.arm.r[%i];\n", r);
100
101 if (!load && opcode & 0x8000) {
102 printf("\tuint32_t tmp_pc = ((size_t)ic - (size_t)\n\t"
103 " cpu->cd.arm.cur_ic_page) / sizeof(struct "
104 "arm_instr_call);\n"
105 "\ttmp_pc = ((cpu->cd.arm.r[ARM_PC] & "
106 "~((ARM_IC_ENTRIES_PER_PAGE-1)"
107 "\n\t << ARM_INSTR_ALIGNMENT_SHIFT)))\n"
108 "\t + (tmp_pc << ARM_INSTR_ALIGNMENT_SHIFT) + 12;\n");
109 }
110
111 if (p)
112 printf("\taddr %s 4;\n", u? "+=" : "-=");
113
114 printf("\tpage = cpu->cd.arm.host_%s[addr >> 12];\n",
115 load? "load" : "store");
116
117 printf("\taddr &= 0xffc;\n");
118
119 printf("\tif (");
120 switch (p*2 + u) {
121 case 0: /* post-decrement */
122 if (n_regs > 1)
123 printf("addr >= 0x%x && ", 4*(n_regs-1));
124 break;
125 case 1: /* post-increment */
126 if (n_regs > 1)
127 printf("addr <= 0x%x && ", 0x1000 - 4*n_regs);
128 break;
129 case 2: /* pre-decrement */
130 if (n_regs > 1)
131 printf("addr >= 0x%x && ", 4*(n_regs-1));
132 break;
133 case 3: /* pre-increment */
134 if (n_regs > 1)
135 printf("addr <= 0x%x && ", 0x1000 - 4*n_regs);
136 break;
137 }
138 printf("page != NULL) {\n");
139
140 printf("\t\tuint32_t *p = (uint32_t *) (page + addr);\n");
141
142 if (u) {
143 x = 0;
144 for (i=0; i<=15; i++) {
145 if (!(opcode & (1 << i)))
146 continue;
147
148 if (load && w && i == r) {
149 /* Skip the load if we're using writeback. */
150 } else if (load)
151 printf("\t\tcpu->cd.arm.r[%i] = p[%i];\n", i, x);
152 else {
153 if (i == 15)
154 printf("\t\tp[%i] = tmp_pc;\n", x);
155 else
156 printf("\t\tp[%i] = cpu->cd.arm.r[%i];\n", x, i);
157 }
158
159 x ++;
160 }
161 } else {
162 /* Decrementing, but do it incrementing anyway: */
163 x = -n_regs;
164 for (i=0; i<=15; i++) {
165 if (!(opcode & (1 << i)))
166 continue;
167
168 x ++;
169
170 if (load && w && i == r) {
171 /* Skip the load if we're using writeback. */
172 } else if (load)
173 printf("\t\tcpu->cd.arm.r[%i] = p[%i];\n", i, x);
174 else {
175 if (i == 15)
176 printf("\t\tp[%i] = tmp_pc;\n", x);
177 else
178 printf("\t\tp[%i] = cpu->cd.arm.r[%i];\n", x, i);
179 }
180 }
181 }
182
183 if (w)
184 printf("\t\tcpu->cd.arm.r[%i] %s %i;\n",
185 r, u? "+=" : "-=", 4*n_regs);
186
187 if (load && opcode & 0x8000) {
188 printf("\t\tcpu->pc = cpu->cd.arm.r[15];\n"
189 "\t\tarm_pc_to_pointers(cpu);\n");
190 }
191
192 printf("\t} else\n");
193 printf("\t\tinstr(bdt_%s)(cpu, ic);\n", load? "load" : "store");
194
195 printf("}\nY(multi_0x%08x)\n", opcode);
196 }
197
198
199 /*
200 * main():
201 *
202 * Normal ARM code seems to only use about a few hundred of the 1^24 possible
203 * load/store multiple instructions. (I'm not counting the s-bit now.)
204 * Instead of having a linear array of 100s of entries, we can select a list
205 * to scan based on a few bits (*), and those lists will be shorter.
206 *
207 * (*) By running experiment_arm_multi.c on statistics gathered from running
208 * NetBSD/cats, it seems that choosing the following 8 bits results in
209 * the shortest linear lists:
210 *
211 * xxxx100P USWLnnnn llllllll llllllll
212 * ^ ^ ^ ^ ^ ^ ^ ^ (0x00950154)
213 */
214 int main(int argc, char *argv[])
215 {
216 int i, j;
217 int n_used[256];
218
219 if (argc < 2) {
220 fprintf(stderr, "usage: %s opcode [..]\n", argv[0]);
221 exit(1);
222 }
223
224 printf("\n/* AUTOMATICALLY GENERATED! Do not edit. */\n\n");
225
226 /* Generate the opcode functions: */
227 for (i=1; i<argc; i++)
228 generate_opcode(strtol(argv[i], NULL, 0));
229
230 /* Generate 256 small lookup tables: */
231 for (j=0; j<256; j++) {
232 int n = 0, zz, zz0;
233 for (i=1; i<argc; i++) {
234 zz = strtol(argv[i], NULL, 0);
235 zz = ((zz & 0x00800000) >> 16)
236 |((zz & 0x00100000) >> 14)
237 |((zz & 0x00040000) >> 13)
238 |((zz & 0x00010000) >> 12)
239 |((zz & 0x00000100) >> 5)
240 |((zz & 0x00000040) >> 4)
241 |((zz & 0x00000010) >> 3)
242 |((zz & 0x00000004) >> 2);
243 if (zz == j)
244 n++;
245 }
246 printf("\nuint32_t multi_opcode_%i[%i] = {\n", j, n+1);
247 for (i=1; i<argc; i++) {
248 zz = zz0 = strtol(argv[i], NULL, 0);
249 zz = ((zz & 0x00800000) >> 16)
250 |((zz & 0x00100000) >> 14)
251 |((zz & 0x00040000) >> 13)
252 |((zz & 0x00010000) >> 12)
253 |((zz & 0x00000100) >> 5)
254 |((zz & 0x00000040) >> 4)
255 |((zz & 0x00000010) >> 3)
256 |((zz & 0x00000004) >> 2);
257 if (zz == j)
258 printf("\t0x%08x,\n", zz0);
259 }
260 printf("0 };\n");
261 }
262
263 /* Generate 256 tables with function pointers: */
264 for (j=0; j<256; j++) {
265 int n = 0, zz, zz0;
266 for (i=1; i<argc; i++) {
267 zz = strtol(argv[i], NULL, 0);
268 zz = ((zz & 0x00800000) >> 16)
269 |((zz & 0x00100000) >> 14)
270 |((zz & 0x00040000) >> 13)
271 |((zz & 0x00010000) >> 12)
272 |((zz & 0x00000100) >> 5)
273 |((zz & 0x00000040) >> 4)
274 |((zz & 0x00000010) >> 3)
275 |((zz & 0x00000004) >> 2);
276 if (zz == j)
277 n++;
278 }
279 n_used[j] = n;
280 if (n == 0)
281 continue;
282 printf("void (*multi_opcode_f_%i[%i])(struct cpu *,"
283 " struct arm_instr_call *) = {\n", j, n*16);
284 for (i=1; i<argc; i++) {
285 zz = zz0 = strtol(argv[i], NULL, 0);
286 zz = ((zz & 0x00800000) >> 16)
287 |((zz & 0x00100000) >> 14)
288 |((zz & 0x00040000) >> 13)
289 |((zz & 0x00010000) >> 12)
290 |((zz & 0x00000100) >> 5)
291 |((zz & 0x00000040) >> 4)
292 |((zz & 0x00000010) >> 3)
293 |((zz & 0x00000004) >> 2);
294 if (zz == j) {
295 printf("\tarm_instr_multi_0x%08x__eq,\n", zz0);
296 printf("\tarm_instr_multi_0x%08x__ne,\n", zz0);
297 printf("\tarm_instr_multi_0x%08x__cs,\n", zz0);
298 printf("\tarm_instr_multi_0x%08x__cc,\n", zz0);
299 printf("\tarm_instr_multi_0x%08x__mi,\n", zz0);
300 printf("\tarm_instr_multi_0x%08x__pl,\n", zz0);
301 printf("\tarm_instr_multi_0x%08x__vs,\n", zz0);
302 printf("\tarm_instr_multi_0x%08x__vc,\n", zz0);
303 printf("\tarm_instr_multi_0x%08x__hi,\n", zz0);
304 printf("\tarm_instr_multi_0x%08x__ls,\n", zz0);
305 printf("\tarm_instr_multi_0x%08x__ge,\n", zz0);
306 printf("\tarm_instr_multi_0x%08x__lt,\n", zz0);
307 printf("\tarm_instr_multi_0x%08x__gt,\n", zz0);
308 printf("\tarm_instr_multi_0x%08x__le,\n", zz0);
309 printf("\tarm_instr_multi_0x%08x,\n", zz0);
310 printf("\tarm_instr_nop,\n");
311 }
312 }
313 printf("};\n");
314 }
315
316
317 printf("\nuint32_t *multi_opcode[256] = {\n");
318 for (i=0; i<256; i++) {
319 printf(" multi_opcode_%i,", i);
320 if ((i % 4) == 0)
321 printf("\n");
322 }
323 printf("};\n");
324
325 printf("\nvoid (**multi_opcode_f[256])(struct cpu *,"
326 " struct arm_instr_call *) = {\n");
327 for (i=0; i<256; i++) {
328 if (n_used[i] > 0)
329 printf(" multi_opcode_f_%i,", i);
330 else
331 printf(" NULL,");
332 if ((i % 4) == 0)
333 printf("\n");
334 }
335 printf("};\n");
336
337 return 0;
338 }
339

  ViewVC Help
Powered by ViewVC 1.1.26