1 |
dpavlin |
18 |
/* |
2 |
|
|
* Copyright (C) 2005 Anders Gavare. All rights reserved. |
3 |
|
|
* |
4 |
|
|
* Redistribution and use in source and binary forms, with or without |
5 |
|
|
* modification, are permitted provided that the following conditions are met: |
6 |
|
|
* |
7 |
|
|
* 1. Redistributions of source code must retain the above copyright |
8 |
|
|
* notice, this list of conditions and the following disclaimer. |
9 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
10 |
|
|
* notice, this list of conditions and the following disclaimer in the |
11 |
|
|
* documentation and/or other materials provided with the distribution. |
12 |
|
|
* 3. The name of the author may not be used to endorse or promote products |
13 |
|
|
* derived from this software without specific prior written permission. |
14 |
|
|
* |
15 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
16 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
19 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
21 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
23 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
24 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
25 |
|
|
* SUCH DAMAGE. |
26 |
|
|
* |
27 |
|
|
* |
28 |
dpavlin |
22 |
* $Id: generate_arm_multi.c,v 1.12 2005/11/30 16:23:08 debug Exp $ |
29 |
dpavlin |
18 |
* |
30 |
|
|
* Generation of commonly used ARM load/store multiple instructions. |
31 |
dpavlin |
20 |
* |
32 |
dpavlin |
18 |
* The main idea is to first check whether a load/store would be possible |
33 |
|
|
* without going outside a page, and if so, use the host_load or _store |
34 |
|
|
* arrays for quick access to emulated RAM. Otherwise, fall back to using |
35 |
|
|
* the generic bdt_load() or bdt_store(). |
36 |
|
|
*/ |
37 |
|
|
|
38 |
|
|
#include <stdio.h> |
39 |
|
|
#include <stdlib.h> |
40 |
|
|
#include "misc.h" |
41 |
|
|
|
42 |
|
|
|
43 |
|
|
/* |
44 |
|
|
* generate_opcode(): |
45 |
|
|
* |
46 |
|
|
* Given an ARM load/store multiple opcode, produce equivalent "hardcoded" |
47 |
|
|
* C code which emulates the opcode. |
48 |
|
|
* |
49 |
|
|
* TODO: |
50 |
|
|
* |
51 |
|
|
* o) On 64-bit hosts, load/store two registers at a time. This |
52 |
|
|
* feature depends both on the alignment of the base register, |
53 |
|
|
* and the specific set of registers being loaded/stored. |
54 |
|
|
* |
55 |
|
|
* o) Alignment checks. (Optional?) |
56 |
|
|
* |
57 |
|
|
* o) For accesses that cross page boundaries, use two pages using |
58 |
|
|
* the fast method instead of calling the generic function? |
59 |
|
|
*/ |
60 |
|
|
void generate_opcode(uint32_t opcode) |
61 |
|
|
{ |
62 |
|
|
int p, u, s, w, load, r, n_regs, i, x; |
63 |
|
|
|
64 |
|
|
if ((opcode & 0x0e000000) != 0x08000000) { |
65 |
|
|
fprintf(stderr, "opcode 0x%08x is not an ldm/stm\n", opcode); |
66 |
|
|
exit(1); |
67 |
|
|
} |
68 |
|
|
|
69 |
|
|
r = (opcode >> 16) & 15; |
70 |
|
|
p = opcode & 0x01000000? 1 : 0; |
71 |
|
|
u = opcode & 0x00800000? 1 : 0; |
72 |
|
|
s = opcode & 0x00400000? 1 : 0; |
73 |
|
|
w = opcode & 0x00200000? 1 : 0; |
74 |
|
|
load = opcode & 0x00100000? 1 : 0; |
75 |
|
|
n_regs = 0; |
76 |
|
|
for (i=0; i<16; i++) |
77 |
|
|
if (opcode & (1 << i)) |
78 |
|
|
n_regs ++; |
79 |
|
|
|
80 |
|
|
/* TODO: Check for register pairs, for 64-bit load/stores */ |
81 |
|
|
|
82 |
|
|
if (n_regs == 0) { |
83 |
|
|
fprintf(stderr, "opcode 0x%08x has no registers set\n", opcode); |
84 |
|
|
exit(1); |
85 |
|
|
} |
86 |
|
|
|
87 |
|
|
if (s) { |
88 |
|
|
fprintf(stderr, "opcode 0x%08x has s-bit set\n", opcode); |
89 |
|
|
exit(1); |
90 |
|
|
} |
91 |
|
|
|
92 |
|
|
if (r == 15) { |
93 |
|
|
fprintf(stderr, "opcode 0x%08x has r=15\n", opcode); |
94 |
|
|
exit(1); |
95 |
|
|
} |
96 |
|
|
|
97 |
dpavlin |
20 |
printf("\nvoid arm_instr_multi_0x%08x(struct cpu *cpu," |
98 |
|
|
" struct arm_instr_call *ic) {\n", opcode); |
99 |
dpavlin |
18 |
|
100 |
|
|
printf("\tunsigned char *page;\n"); |
101 |
|
|
printf("\tuint32_t addr = cpu->cd.arm.r[%i];\n", r); |
102 |
|
|
|
103 |
|
|
if (!load && opcode & 0x8000) { |
104 |
|
|
printf("\tuint32_t tmp_pc = ((size_t)ic - (size_t)\n\t" |
105 |
|
|
" cpu->cd.arm.cur_ic_page) / sizeof(struct " |
106 |
|
|
"arm_instr_call);\n" |
107 |
dpavlin |
20 |
"\ttmp_pc = ((cpu->pc & ~((ARM_IC_ENTRIES_PER_PAGE-1)" |
108 |
dpavlin |
18 |
"\n\t << ARM_INSTR_ALIGNMENT_SHIFT)))\n" |
109 |
|
|
"\t + (tmp_pc << ARM_INSTR_ALIGNMENT_SHIFT) + 12;\n"); |
110 |
|
|
} |
111 |
|
|
|
112 |
|
|
if (p) |
113 |
|
|
printf("\taddr %s 4;\n", u? "+=" : "-="); |
114 |
|
|
|
115 |
|
|
printf("\tpage = cpu->cd.arm.host_%s[addr >> 12];\n", |
116 |
|
|
load? "load" : "store"); |
117 |
|
|
|
118 |
|
|
printf("\taddr &= 0xffc;\n"); |
119 |
|
|
|
120 |
|
|
printf("\tif ("); |
121 |
|
|
switch (p*2 + u) { |
122 |
|
|
case 0: /* post-decrement */ |
123 |
|
|
if (n_regs > 1) |
124 |
|
|
printf("addr >= 0x%x && ", 4*(n_regs-1)); |
125 |
|
|
break; |
126 |
|
|
case 1: /* post-increment */ |
127 |
|
|
if (n_regs > 1) |
128 |
|
|
printf("addr <= 0x%x && ", 0x1000 - 4*n_regs); |
129 |
|
|
break; |
130 |
|
|
case 2: /* pre-decrement */ |
131 |
|
|
if (n_regs > 1) |
132 |
|
|
printf("addr >= 0x%x && ", 4*(n_regs-1)); |
133 |
|
|
break; |
134 |
|
|
case 3: /* pre-increment */ |
135 |
|
|
if (n_regs > 1) |
136 |
|
|
printf("addr <= 0x%x && ", 0x1000 - 4*n_regs); |
137 |
|
|
break; |
138 |
|
|
} |
139 |
|
|
printf("page != NULL) {\n"); |
140 |
|
|
|
141 |
|
|
printf("\t\tuint32_t *p = (uint32_t *) (page + addr);\n"); |
142 |
|
|
|
143 |
|
|
if (u) { |
144 |
|
|
x = 0; |
145 |
|
|
for (i=0; i<=15; i++) { |
146 |
|
|
if (!(opcode & (1 << i))) |
147 |
|
|
continue; |
148 |
|
|
|
149 |
|
|
if (load && w && i == r) { |
150 |
|
|
/* Skip the load if we're using writeback. */ |
151 |
dpavlin |
20 |
} else if (load) { |
152 |
dpavlin |
18 |
if (i == 15) |
153 |
dpavlin |
20 |
printf("\t\tcpu->pc = p[%i];\n", x); |
154 |
|
|
else |
155 |
|
|
printf("\t\tcpu->cd.arm.r[%i] = " |
156 |
|
|
"p[%i];\n", i, x); |
157 |
|
|
} else { |
158 |
|
|
if (i == 15) |
159 |
dpavlin |
18 |
printf("\t\tp[%i] = tmp_pc;\n", x); |
160 |
|
|
else |
161 |
dpavlin |
20 |
printf("\t\tp[%i] = cpu->cd.arm.r" |
162 |
|
|
"[%i];\n", x, i); |
163 |
dpavlin |
18 |
} |
164 |
|
|
|
165 |
|
|
x ++; |
166 |
|
|
} |
167 |
|
|
} else { |
168 |
|
|
/* Decrementing, but do it incrementing anyway: */ |
169 |
|
|
x = -n_regs; |
170 |
|
|
for (i=0; i<=15; i++) { |
171 |
|
|
if (!(opcode & (1 << i))) |
172 |
|
|
continue; |
173 |
|
|
|
174 |
|
|
x ++; |
175 |
|
|
|
176 |
|
|
if (load && w && i == r) { |
177 |
|
|
/* Skip the load if we're using writeback. */ |
178 |
dpavlin |
20 |
} else if (load) { |
179 |
dpavlin |
18 |
if (i == 15) |
180 |
dpavlin |
20 |
printf("\t\tcpu->pc = p[%i];\n", x); |
181 |
|
|
else |
182 |
|
|
printf("\t\tcpu->cd.arm.r[%i] = " |
183 |
|
|
"p[%i];\n", i, x); |
184 |
|
|
} else { |
185 |
|
|
if (i == 15) |
186 |
dpavlin |
18 |
printf("\t\tp[%i] = tmp_pc;\n", x); |
187 |
|
|
else |
188 |
dpavlin |
20 |
printf("\t\tp[%i] = " |
189 |
|
|
"cpu->cd.arm.r[%i];\n", x, i); |
190 |
dpavlin |
18 |
} |
191 |
|
|
} |
192 |
|
|
} |
193 |
|
|
|
194 |
|
|
if (w) |
195 |
|
|
printf("\t\tcpu->cd.arm.r[%i] %s %i;\n", |
196 |
|
|
r, u? "+=" : "-=", 4*n_regs); |
197 |
|
|
|
198 |
|
|
if (load && opcode & 0x8000) { |
199 |
dpavlin |
20 |
printf("\t\tquick_pc_to_pointers(cpu);\n"); |
200 |
dpavlin |
18 |
} |
201 |
|
|
|
202 |
|
|
printf("\t} else\n"); |
203 |
|
|
printf("\t\tinstr(bdt_%s)(cpu, ic);\n", load? "load" : "store"); |
204 |
|
|
|
205 |
|
|
printf("}\nY(multi_0x%08x)\n", opcode); |
206 |
|
|
} |
207 |
|
|
|
208 |
|
|
|
209 |
|
|
/* |
210 |
|
|
* main(): |
211 |
|
|
* |
212 |
|
|
* Normal ARM code seems to only use about a few hundred of the 1^24 possible |
213 |
|
|
* load/store multiple instructions. (I'm not counting the s-bit now.) |
214 |
|
|
* Instead of having a linear array of 100s of entries, we can select a list |
215 |
|
|
* to scan based on a few bits (*), and those lists will be shorter. |
216 |
|
|
* |
217 |
|
|
* (*) By running experiment_arm_multi.c on statistics gathered from running |
218 |
|
|
* NetBSD/cats, it seems that choosing the following 8 bits results in |
219 |
|
|
* the shortest linear lists: |
220 |
|
|
* |
221 |
|
|
* xxxx100P USWLnnnn llllllll llllllll |
222 |
|
|
* ^ ^ ^ ^ ^ ^ ^ ^ (0x00950154) |
223 |
|
|
*/ |
224 |
|
|
int main(int argc, char *argv[]) |
225 |
|
|
{ |
226 |
|
|
int i, j; |
227 |
|
|
int n_used[256]; |
228 |
|
|
|
229 |
|
|
if (argc < 2) { |
230 |
|
|
fprintf(stderr, "usage: %s opcode [..]\n", argv[0]); |
231 |
|
|
exit(1); |
232 |
|
|
} |
233 |
|
|
|
234 |
dpavlin |
20 |
printf("\n/* AUTOMATICALLY GENERATED! Do not edit. */\n\n" |
235 |
|
|
"#include <stdio.h>\n" |
236 |
|
|
"#include <stdlib.h>\n" |
237 |
|
|
"#include \"cpu.h\"\n" |
238 |
|
|
"#include \"misc.h\"\n" |
239 |
dpavlin |
22 |
"#define DYNTRANS_PC_TO_POINTERS arm_pc_to_pointers\n" |
240 |
|
|
"#include \"quick_pc_to_pointers.h\"\n" |
241 |
dpavlin |
20 |
"#include \"arm_tmphead_1.h\"\n" |
242 |
|
|
"\n#define instr(x) arm_instr_ ## x\n"); |
243 |
dpavlin |
22 |
printf("extern void arm_pc_to_pointers(struct cpu *);\n"); |
244 |
dpavlin |
20 |
printf("extern void arm_instr_nop(struct cpu *, " |
245 |
|
|
"struct arm_instr_call *);\n"); |
246 |
|
|
printf("extern void arm_instr_bdt_load(struct cpu *, " |
247 |
|
|
"struct arm_instr_call *);\n"); |
248 |
|
|
printf("extern void arm_instr_bdt_store(struct cpu *, " |
249 |
|
|
"struct arm_instr_call *);\n"); |
250 |
|
|
printf("\n\n"); |
251 |
dpavlin |
18 |
|
252 |
|
|
/* Generate the opcode functions: */ |
253 |
|
|
for (i=1; i<argc; i++) |
254 |
|
|
generate_opcode(strtol(argv[i], NULL, 0)); |
255 |
|
|
|
256 |
|
|
/* Generate 256 small lookup tables: */ |
257 |
|
|
for (j=0; j<256; j++) { |
258 |
|
|
int n = 0, zz, zz0; |
259 |
|
|
for (i=1; i<argc; i++) { |
260 |
|
|
zz = strtol(argv[i], NULL, 0); |
261 |
|
|
zz = ((zz & 0x00800000) >> 16) |
262 |
|
|
|((zz & 0x00100000) >> 14) |
263 |
|
|
|((zz & 0x00040000) >> 13) |
264 |
|
|
|((zz & 0x00010000) >> 12) |
265 |
|
|
|((zz & 0x00000100) >> 5) |
266 |
|
|
|((zz & 0x00000040) >> 4) |
267 |
|
|
|((zz & 0x00000010) >> 3) |
268 |
|
|
|((zz & 0x00000004) >> 2); |
269 |
|
|
if (zz == j) |
270 |
|
|
n++; |
271 |
|
|
} |
272 |
|
|
printf("\nuint32_t multi_opcode_%i[%i] = {\n", j, n+1); |
273 |
|
|
for (i=1; i<argc; i++) { |
274 |
|
|
zz = zz0 = strtol(argv[i], NULL, 0); |
275 |
|
|
zz = ((zz & 0x00800000) >> 16) |
276 |
|
|
|((zz & 0x00100000) >> 14) |
277 |
|
|
|((zz & 0x00040000) >> 13) |
278 |
|
|
|((zz & 0x00010000) >> 12) |
279 |
|
|
|((zz & 0x00000100) >> 5) |
280 |
|
|
|((zz & 0x00000040) >> 4) |
281 |
|
|
|((zz & 0x00000010) >> 3) |
282 |
|
|
|((zz & 0x00000004) >> 2); |
283 |
|
|
if (zz == j) |
284 |
|
|
printf("\t0x%08x,\n", zz0); |
285 |
|
|
} |
286 |
|
|
printf("0 };\n"); |
287 |
|
|
} |
288 |
|
|
|
289 |
|
|
/* Generate 256 tables with function pointers: */ |
290 |
|
|
for (j=0; j<256; j++) { |
291 |
|
|
int n = 0, zz, zz0; |
292 |
|
|
for (i=1; i<argc; i++) { |
293 |
|
|
zz = strtol(argv[i], NULL, 0); |
294 |
|
|
zz = ((zz & 0x00800000) >> 16) |
295 |
|
|
|((zz & 0x00100000) >> 14) |
296 |
|
|
|((zz & 0x00040000) >> 13) |
297 |
|
|
|((zz & 0x00010000) >> 12) |
298 |
|
|
|((zz & 0x00000100) >> 5) |
299 |
|
|
|((zz & 0x00000040) >> 4) |
300 |
|
|
|((zz & 0x00000010) >> 3) |
301 |
|
|
|((zz & 0x00000004) >> 2); |
302 |
|
|
if (zz == j) |
303 |
|
|
n++; |
304 |
|
|
} |
305 |
|
|
n_used[j] = n; |
306 |
|
|
if (n == 0) |
307 |
|
|
continue; |
308 |
|
|
printf("void (*multi_opcode_f_%i[%i])(struct cpu *," |
309 |
|
|
" struct arm_instr_call *) = {\n", j, n*16); |
310 |
|
|
for (i=1; i<argc; i++) { |
311 |
|
|
zz = zz0 = strtol(argv[i], NULL, 0); |
312 |
|
|
zz = ((zz & 0x00800000) >> 16) |
313 |
|
|
|((zz & 0x00100000) >> 14) |
314 |
|
|
|((zz & 0x00040000) >> 13) |
315 |
|
|
|((zz & 0x00010000) >> 12) |
316 |
|
|
|((zz & 0x00000100) >> 5) |
317 |
|
|
|((zz & 0x00000040) >> 4) |
318 |
|
|
|((zz & 0x00000010) >> 3) |
319 |
|
|
|((zz & 0x00000004) >> 2); |
320 |
|
|
if (zz == j) { |
321 |
|
|
printf("\tarm_instr_multi_0x%08x__eq,\n", zz0); |
322 |
|
|
printf("\tarm_instr_multi_0x%08x__ne,\n", zz0); |
323 |
|
|
printf("\tarm_instr_multi_0x%08x__cs,\n", zz0); |
324 |
|
|
printf("\tarm_instr_multi_0x%08x__cc,\n", zz0); |
325 |
|
|
printf("\tarm_instr_multi_0x%08x__mi,\n", zz0); |
326 |
|
|
printf("\tarm_instr_multi_0x%08x__pl,\n", zz0); |
327 |
|
|
printf("\tarm_instr_multi_0x%08x__vs,\n", zz0); |
328 |
|
|
printf("\tarm_instr_multi_0x%08x__vc,\n", zz0); |
329 |
|
|
printf("\tarm_instr_multi_0x%08x__hi,\n", zz0); |
330 |
|
|
printf("\tarm_instr_multi_0x%08x__ls,\n", zz0); |
331 |
|
|
printf("\tarm_instr_multi_0x%08x__ge,\n", zz0); |
332 |
|
|
printf("\tarm_instr_multi_0x%08x__lt,\n", zz0); |
333 |
|
|
printf("\tarm_instr_multi_0x%08x__gt,\n", zz0); |
334 |
|
|
printf("\tarm_instr_multi_0x%08x__le,\n", zz0); |
335 |
|
|
printf("\tarm_instr_multi_0x%08x,\n", zz0); |
336 |
|
|
printf("\tarm_instr_nop,\n"); |
337 |
|
|
} |
338 |
|
|
} |
339 |
|
|
printf("};\n"); |
340 |
|
|
} |
341 |
|
|
|
342 |
|
|
|
343 |
|
|
printf("\nuint32_t *multi_opcode[256] = {\n"); |
344 |
|
|
for (i=0; i<256; i++) { |
345 |
|
|
printf(" multi_opcode_%i,", i); |
346 |
|
|
if ((i % 4) == 0) |
347 |
|
|
printf("\n"); |
348 |
|
|
} |
349 |
|
|
printf("};\n"); |
350 |
|
|
|
351 |
|
|
printf("\nvoid (**multi_opcode_f[256])(struct cpu *," |
352 |
|
|
" struct arm_instr_call *) = {\n"); |
353 |
|
|
for (i=0; i<256; i++) { |
354 |
|
|
if (n_used[i] > 0) |
355 |
|
|
printf(" multi_opcode_f_%i,", i); |
356 |
|
|
else |
357 |
|
|
printf(" NULL,"); |
358 |
|
|
if ((i % 4) == 0) |
359 |
|
|
printf("\n"); |
360 |
|
|
} |
361 |
|
|
printf("};\n"); |
362 |
|
|
|
363 |
|
|
return 0; |
364 |
|
|
} |
365 |
|
|
|