1 |
/* |
2 |
* Copyright (C) 2005 Anders Gavare. All rights reserved. |
3 |
* |
4 |
* Redistribution and use in source and binary forms, with or without |
5 |
* modification, are permitted provided that the following conditions are met: |
6 |
* |
7 |
* 1. Redistributions of source code must retain the above copyright |
8 |
* notice, this list of conditions and the following disclaimer. |
9 |
* 2. Redistributions in binary form must reproduce the above copyright |
10 |
* notice, this list of conditions and the following disclaimer in the |
11 |
* documentation and/or other materials provided with the distribution. |
12 |
* 3. The name of the author may not be used to endorse or promote products |
13 |
* derived from this software without specific prior written permission. |
14 |
* |
15 |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
16 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 |
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
19 |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
21 |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
23 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
24 |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
25 |
* SUCH DAMAGE. |
26 |
* |
27 |
* |
28 |
* $Id: generate_arm_multi.c,v 1.12 2005/11/30 16:23:08 debug Exp $ |
29 |
* |
30 |
* Generation of commonly used ARM load/store multiple instructions. |
31 |
* |
32 |
* The main idea is to first check whether a load/store would be possible |
33 |
* without going outside a page, and if so, use the host_load or _store |
34 |
* arrays for quick access to emulated RAM. Otherwise, fall back to using |
35 |
* the generic bdt_load() or bdt_store(). |
36 |
*/ |
37 |
|
38 |
#include <stdio.h> |
39 |
#include <stdlib.h> |
40 |
#include "misc.h" |
41 |
|
42 |
|
43 |
/* |
44 |
* generate_opcode(): |
45 |
* |
46 |
* Given an ARM load/store multiple opcode, produce equivalent "hardcoded" |
47 |
* C code which emulates the opcode. |
48 |
* |
49 |
* TODO: |
50 |
* |
51 |
* o) On 64-bit hosts, load/store two registers at a time. This |
52 |
* feature depends both on the alignment of the base register, |
53 |
* and the specific set of registers being loaded/stored. |
54 |
* |
55 |
* o) Alignment checks. (Optional?) |
56 |
* |
57 |
* o) For accesses that cross page boundaries, use two pages using |
58 |
* the fast method instead of calling the generic function? |
59 |
*/ |
60 |
void generate_opcode(uint32_t opcode) |
61 |
{ |
62 |
int p, u, s, w, load, r, n_regs, i, x; |
63 |
|
64 |
if ((opcode & 0x0e000000) != 0x08000000) { |
65 |
fprintf(stderr, "opcode 0x%08x is not an ldm/stm\n", opcode); |
66 |
exit(1); |
67 |
} |
68 |
|
69 |
r = (opcode >> 16) & 15; |
70 |
p = opcode & 0x01000000? 1 : 0; |
71 |
u = opcode & 0x00800000? 1 : 0; |
72 |
s = opcode & 0x00400000? 1 : 0; |
73 |
w = opcode & 0x00200000? 1 : 0; |
74 |
load = opcode & 0x00100000? 1 : 0; |
75 |
n_regs = 0; |
76 |
for (i=0; i<16; i++) |
77 |
if (opcode & (1 << i)) |
78 |
n_regs ++; |
79 |
|
80 |
/* TODO: Check for register pairs, for 64-bit load/stores */ |
81 |
|
82 |
if (n_regs == 0) { |
83 |
fprintf(stderr, "opcode 0x%08x has no registers set\n", opcode); |
84 |
exit(1); |
85 |
} |
86 |
|
87 |
if (s) { |
88 |
fprintf(stderr, "opcode 0x%08x has s-bit set\n", opcode); |
89 |
exit(1); |
90 |
} |
91 |
|
92 |
if (r == 15) { |
93 |
fprintf(stderr, "opcode 0x%08x has r=15\n", opcode); |
94 |
exit(1); |
95 |
} |
96 |
|
97 |
printf("\nvoid arm_instr_multi_0x%08x(struct cpu *cpu," |
98 |
" struct arm_instr_call *ic) {\n", opcode); |
99 |
|
100 |
printf("\tunsigned char *page;\n"); |
101 |
printf("\tuint32_t addr = cpu->cd.arm.r[%i];\n", r); |
102 |
|
103 |
if (!load && opcode & 0x8000) { |
104 |
printf("\tuint32_t tmp_pc = ((size_t)ic - (size_t)\n\t" |
105 |
" cpu->cd.arm.cur_ic_page) / sizeof(struct " |
106 |
"arm_instr_call);\n" |
107 |
"\ttmp_pc = ((cpu->pc & ~((ARM_IC_ENTRIES_PER_PAGE-1)" |
108 |
"\n\t << ARM_INSTR_ALIGNMENT_SHIFT)))\n" |
109 |
"\t + (tmp_pc << ARM_INSTR_ALIGNMENT_SHIFT) + 12;\n"); |
110 |
} |
111 |
|
112 |
if (p) |
113 |
printf("\taddr %s 4;\n", u? "+=" : "-="); |
114 |
|
115 |
printf("\tpage = cpu->cd.arm.host_%s[addr >> 12];\n", |
116 |
load? "load" : "store"); |
117 |
|
118 |
printf("\taddr &= 0xffc;\n"); |
119 |
|
120 |
printf("\tif ("); |
121 |
switch (p*2 + u) { |
122 |
case 0: /* post-decrement */ |
123 |
if (n_regs > 1) |
124 |
printf("addr >= 0x%x && ", 4*(n_regs-1)); |
125 |
break; |
126 |
case 1: /* post-increment */ |
127 |
if (n_regs > 1) |
128 |
printf("addr <= 0x%x && ", 0x1000 - 4*n_regs); |
129 |
break; |
130 |
case 2: /* pre-decrement */ |
131 |
if (n_regs > 1) |
132 |
printf("addr >= 0x%x && ", 4*(n_regs-1)); |
133 |
break; |
134 |
case 3: /* pre-increment */ |
135 |
if (n_regs > 1) |
136 |
printf("addr <= 0x%x && ", 0x1000 - 4*n_regs); |
137 |
break; |
138 |
} |
139 |
printf("page != NULL) {\n"); |
140 |
|
141 |
printf("\t\tuint32_t *p = (uint32_t *) (page + addr);\n"); |
142 |
|
143 |
if (u) { |
144 |
x = 0; |
145 |
for (i=0; i<=15; i++) { |
146 |
if (!(opcode & (1 << i))) |
147 |
continue; |
148 |
|
149 |
if (load && w && i == r) { |
150 |
/* Skip the load if we're using writeback. */ |
151 |
} else if (load) { |
152 |
if (i == 15) |
153 |
printf("\t\tcpu->pc = p[%i];\n", x); |
154 |
else |
155 |
printf("\t\tcpu->cd.arm.r[%i] = " |
156 |
"p[%i];\n", i, x); |
157 |
} else { |
158 |
if (i == 15) |
159 |
printf("\t\tp[%i] = tmp_pc;\n", x); |
160 |
else |
161 |
printf("\t\tp[%i] = cpu->cd.arm.r" |
162 |
"[%i];\n", x, i); |
163 |
} |
164 |
|
165 |
x ++; |
166 |
} |
167 |
} else { |
168 |
/* Decrementing, but do it incrementing anyway: */ |
169 |
x = -n_regs; |
170 |
for (i=0; i<=15; i++) { |
171 |
if (!(opcode & (1 << i))) |
172 |
continue; |
173 |
|
174 |
x ++; |
175 |
|
176 |
if (load && w && i == r) { |
177 |
/* Skip the load if we're using writeback. */ |
178 |
} else if (load) { |
179 |
if (i == 15) |
180 |
printf("\t\tcpu->pc = p[%i];\n", x); |
181 |
else |
182 |
printf("\t\tcpu->cd.arm.r[%i] = " |
183 |
"p[%i];\n", i, x); |
184 |
} else { |
185 |
if (i == 15) |
186 |
printf("\t\tp[%i] = tmp_pc;\n", x); |
187 |
else |
188 |
printf("\t\tp[%i] = " |
189 |
"cpu->cd.arm.r[%i];\n", x, i); |
190 |
} |
191 |
} |
192 |
} |
193 |
|
194 |
if (w) |
195 |
printf("\t\tcpu->cd.arm.r[%i] %s %i;\n", |
196 |
r, u? "+=" : "-=", 4*n_regs); |
197 |
|
198 |
if (load && opcode & 0x8000) { |
199 |
printf("\t\tquick_pc_to_pointers(cpu);\n"); |
200 |
} |
201 |
|
202 |
printf("\t} else\n"); |
203 |
printf("\t\tinstr(bdt_%s)(cpu, ic);\n", load? "load" : "store"); |
204 |
|
205 |
printf("}\nY(multi_0x%08x)\n", opcode); |
206 |
} |
207 |
|
208 |
|
209 |
/* |
210 |
* main(): |
211 |
* |
212 |
* Normal ARM code seems to only use about a few hundred of the 1^24 possible |
213 |
* load/store multiple instructions. (I'm not counting the s-bit now.) |
214 |
* Instead of having a linear array of 100s of entries, we can select a list |
215 |
* to scan based on a few bits (*), and those lists will be shorter. |
216 |
* |
217 |
* (*) By running experiment_arm_multi.c on statistics gathered from running |
218 |
* NetBSD/cats, it seems that choosing the following 8 bits results in |
219 |
* the shortest linear lists: |
220 |
* |
221 |
* xxxx100P USWLnnnn llllllll llllllll |
222 |
* ^ ^ ^ ^ ^ ^ ^ ^ (0x00950154) |
223 |
*/ |
224 |
int main(int argc, char *argv[]) |
225 |
{ |
226 |
int i, j; |
227 |
int n_used[256]; |
228 |
|
229 |
if (argc < 2) { |
230 |
fprintf(stderr, "usage: %s opcode [..]\n", argv[0]); |
231 |
exit(1); |
232 |
} |
233 |
|
234 |
printf("\n/* AUTOMATICALLY GENERATED! Do not edit. */\n\n" |
235 |
"#include <stdio.h>\n" |
236 |
"#include <stdlib.h>\n" |
237 |
"#include \"cpu.h\"\n" |
238 |
"#include \"misc.h\"\n" |
239 |
"#define DYNTRANS_PC_TO_POINTERS arm_pc_to_pointers\n" |
240 |
"#include \"quick_pc_to_pointers.h\"\n" |
241 |
"#include \"arm_tmphead_1.h\"\n" |
242 |
"\n#define instr(x) arm_instr_ ## x\n"); |
243 |
printf("extern void arm_pc_to_pointers(struct cpu *);\n"); |
244 |
printf("extern void arm_instr_nop(struct cpu *, " |
245 |
"struct arm_instr_call *);\n"); |
246 |
printf("extern void arm_instr_bdt_load(struct cpu *, " |
247 |
"struct arm_instr_call *);\n"); |
248 |
printf("extern void arm_instr_bdt_store(struct cpu *, " |
249 |
"struct arm_instr_call *);\n"); |
250 |
printf("\n\n"); |
251 |
|
252 |
/* Generate the opcode functions: */ |
253 |
for (i=1; i<argc; i++) |
254 |
generate_opcode(strtol(argv[i], NULL, 0)); |
255 |
|
256 |
/* Generate 256 small lookup tables: */ |
257 |
for (j=0; j<256; j++) { |
258 |
int n = 0, zz, zz0; |
259 |
for (i=1; i<argc; i++) { |
260 |
zz = strtol(argv[i], NULL, 0); |
261 |
zz = ((zz & 0x00800000) >> 16) |
262 |
|((zz & 0x00100000) >> 14) |
263 |
|((zz & 0x00040000) >> 13) |
264 |
|((zz & 0x00010000) >> 12) |
265 |
|((zz & 0x00000100) >> 5) |
266 |
|((zz & 0x00000040) >> 4) |
267 |
|((zz & 0x00000010) >> 3) |
268 |
|((zz & 0x00000004) >> 2); |
269 |
if (zz == j) |
270 |
n++; |
271 |
} |
272 |
printf("\nuint32_t multi_opcode_%i[%i] = {\n", j, n+1); |
273 |
for (i=1; i<argc; i++) { |
274 |
zz = zz0 = strtol(argv[i], NULL, 0); |
275 |
zz = ((zz & 0x00800000) >> 16) |
276 |
|((zz & 0x00100000) >> 14) |
277 |
|((zz & 0x00040000) >> 13) |
278 |
|((zz & 0x00010000) >> 12) |
279 |
|((zz & 0x00000100) >> 5) |
280 |
|((zz & 0x00000040) >> 4) |
281 |
|((zz & 0x00000010) >> 3) |
282 |
|((zz & 0x00000004) >> 2); |
283 |
if (zz == j) |
284 |
printf("\t0x%08x,\n", zz0); |
285 |
} |
286 |
printf("0 };\n"); |
287 |
} |
288 |
|
289 |
/* Generate 256 tables with function pointers: */ |
290 |
for (j=0; j<256; j++) { |
291 |
int n = 0, zz, zz0; |
292 |
for (i=1; i<argc; i++) { |
293 |
zz = strtol(argv[i], NULL, 0); |
294 |
zz = ((zz & 0x00800000) >> 16) |
295 |
|((zz & 0x00100000) >> 14) |
296 |
|((zz & 0x00040000) >> 13) |
297 |
|((zz & 0x00010000) >> 12) |
298 |
|((zz & 0x00000100) >> 5) |
299 |
|((zz & 0x00000040) >> 4) |
300 |
|((zz & 0x00000010) >> 3) |
301 |
|((zz & 0x00000004) >> 2); |
302 |
if (zz == j) |
303 |
n++; |
304 |
} |
305 |
n_used[j] = n; |
306 |
if (n == 0) |
307 |
continue; |
308 |
printf("void (*multi_opcode_f_%i[%i])(struct cpu *," |
309 |
" struct arm_instr_call *) = {\n", j, n*16); |
310 |
for (i=1; i<argc; i++) { |
311 |
zz = zz0 = strtol(argv[i], NULL, 0); |
312 |
zz = ((zz & 0x00800000) >> 16) |
313 |
|((zz & 0x00100000) >> 14) |
314 |
|((zz & 0x00040000) >> 13) |
315 |
|((zz & 0x00010000) >> 12) |
316 |
|((zz & 0x00000100) >> 5) |
317 |
|((zz & 0x00000040) >> 4) |
318 |
|((zz & 0x00000010) >> 3) |
319 |
|((zz & 0x00000004) >> 2); |
320 |
if (zz == j) { |
321 |
printf("\tarm_instr_multi_0x%08x__eq,\n", zz0); |
322 |
printf("\tarm_instr_multi_0x%08x__ne,\n", zz0); |
323 |
printf("\tarm_instr_multi_0x%08x__cs,\n", zz0); |
324 |
printf("\tarm_instr_multi_0x%08x__cc,\n", zz0); |
325 |
printf("\tarm_instr_multi_0x%08x__mi,\n", zz0); |
326 |
printf("\tarm_instr_multi_0x%08x__pl,\n", zz0); |
327 |
printf("\tarm_instr_multi_0x%08x__vs,\n", zz0); |
328 |
printf("\tarm_instr_multi_0x%08x__vc,\n", zz0); |
329 |
printf("\tarm_instr_multi_0x%08x__hi,\n", zz0); |
330 |
printf("\tarm_instr_multi_0x%08x__ls,\n", zz0); |
331 |
printf("\tarm_instr_multi_0x%08x__ge,\n", zz0); |
332 |
printf("\tarm_instr_multi_0x%08x__lt,\n", zz0); |
333 |
printf("\tarm_instr_multi_0x%08x__gt,\n", zz0); |
334 |
printf("\tarm_instr_multi_0x%08x__le,\n", zz0); |
335 |
printf("\tarm_instr_multi_0x%08x,\n", zz0); |
336 |
printf("\tarm_instr_nop,\n"); |
337 |
} |
338 |
} |
339 |
printf("};\n"); |
340 |
} |
341 |
|
342 |
|
343 |
printf("\nuint32_t *multi_opcode[256] = {\n"); |
344 |
for (i=0; i<256; i++) { |
345 |
printf(" multi_opcode_%i,", i); |
346 |
if ((i % 4) == 0) |
347 |
printf("\n"); |
348 |
} |
349 |
printf("};\n"); |
350 |
|
351 |
printf("\nvoid (**multi_opcode_f[256])(struct cpu *," |
352 |
" struct arm_instr_call *) = {\n"); |
353 |
for (i=0; i<256; i++) { |
354 |
if (n_used[i] > 0) |
355 |
printf(" multi_opcode_f_%i,", i); |
356 |
else |
357 |
printf(" NULL,"); |
358 |
if ((i % 4) == 0) |
359 |
printf("\n"); |
360 |
} |
361 |
printf("};\n"); |
362 |
|
363 |
return 0; |
364 |
} |
365 |
|