1 |
/* |
2 |
* Copyright (C) 2005 Anders Gavare. All rights reserved. |
3 |
* |
4 |
* Redistribution and use in source and binary forms, with or without |
5 |
* modification, are permitted provided that the following conditions are met: |
6 |
* |
7 |
* 1. Redistributions of source code must retain the above copyright |
8 |
* notice, this list of conditions and the following disclaimer. |
9 |
* 2. Redistributions in binary form must reproduce the above copyright |
10 |
* notice, this list of conditions and the following disclaimer in the |
11 |
* documentation and/or other materials provided with the distribution. |
12 |
* 3. The name of the author may not be used to endorse or promote products |
13 |
* derived from this software without specific prior written permission. |
14 |
* |
15 |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
16 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 |
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
19 |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
21 |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
23 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
24 |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
25 |
* SUCH DAMAGE. |
26 |
* |
27 |
* |
28 |
* $Id: generate_arm_multi.c,v 1.6 2005/10/26 14:37:03 debug Exp $ |
29 |
* |
30 |
* Generation of commonly used ARM load/store multiple instructions. |
31 |
* The main idea is to first check whether a load/store would be possible |
32 |
* without going outside a page, and if so, use the host_load or _store |
33 |
* arrays for quick access to emulated RAM. Otherwise, fall back to using |
34 |
* the generic bdt_load() or bdt_store(). |
35 |
*/ |
36 |
|
37 |
#include <stdio.h> |
38 |
#include <stdlib.h> |
39 |
#include "misc.h" |
40 |
|
41 |
|
42 |
/* |
43 |
* generate_opcode(): |
44 |
* |
45 |
* Given an ARM load/store multiple opcode, produce equivalent "hardcoded" |
46 |
* C code which emulates the opcode. |
47 |
* |
48 |
* TODO: |
49 |
* |
50 |
* o) On 64-bit hosts, load/store two registers at a time. This |
51 |
* feature depends both on the alignment of the base register, |
52 |
* and the specific set of registers being loaded/stored. |
53 |
* |
54 |
* o) Alignment checks. (Optional?) |
55 |
* |
56 |
* o) For accesses that cross page boundaries, use two pages using |
57 |
* the fast method instead of calling the generic function? |
58 |
*/ |
59 |
void generate_opcode(uint32_t opcode) |
60 |
{ |
61 |
int p, u, s, w, load, r, n_regs, i, x; |
62 |
|
63 |
if ((opcode & 0x0e000000) != 0x08000000) { |
64 |
fprintf(stderr, "opcode 0x%08x is not an ldm/stm\n", opcode); |
65 |
exit(1); |
66 |
} |
67 |
|
68 |
r = (opcode >> 16) & 15; |
69 |
p = opcode & 0x01000000? 1 : 0; |
70 |
u = opcode & 0x00800000? 1 : 0; |
71 |
s = opcode & 0x00400000? 1 : 0; |
72 |
w = opcode & 0x00200000? 1 : 0; |
73 |
load = opcode & 0x00100000? 1 : 0; |
74 |
n_regs = 0; |
75 |
for (i=0; i<16; i++) |
76 |
if (opcode & (1 << i)) |
77 |
n_regs ++; |
78 |
|
79 |
/* TODO: Check for register pairs, for 64-bit load/stores */ |
80 |
|
81 |
if (n_regs == 0) { |
82 |
fprintf(stderr, "opcode 0x%08x has no registers set\n", opcode); |
83 |
exit(1); |
84 |
} |
85 |
|
86 |
if (s) { |
87 |
fprintf(stderr, "opcode 0x%08x has s-bit set\n", opcode); |
88 |
exit(1); |
89 |
} |
90 |
|
91 |
if (r == 15) { |
92 |
fprintf(stderr, "opcode 0x%08x has r=15\n", opcode); |
93 |
exit(1); |
94 |
} |
95 |
|
96 |
printf("\nX(multi_0x%08x) {\n", opcode); |
97 |
|
98 |
printf("\tunsigned char *page;\n"); |
99 |
printf("\tuint32_t addr = cpu->cd.arm.r[%i];\n", r); |
100 |
|
101 |
if (!load && opcode & 0x8000) { |
102 |
printf("\tuint32_t tmp_pc = ((size_t)ic - (size_t)\n\t" |
103 |
" cpu->cd.arm.cur_ic_page) / sizeof(struct " |
104 |
"arm_instr_call);\n" |
105 |
"\ttmp_pc = ((cpu->cd.arm.r[ARM_PC] & " |
106 |
"~((ARM_IC_ENTRIES_PER_PAGE-1)" |
107 |
"\n\t << ARM_INSTR_ALIGNMENT_SHIFT)))\n" |
108 |
"\t + (tmp_pc << ARM_INSTR_ALIGNMENT_SHIFT) + 12;\n"); |
109 |
} |
110 |
|
111 |
if (p) |
112 |
printf("\taddr %s 4;\n", u? "+=" : "-="); |
113 |
|
114 |
printf("\tpage = cpu->cd.arm.host_%s[addr >> 12];\n", |
115 |
load? "load" : "store"); |
116 |
|
117 |
printf("\taddr &= 0xffc;\n"); |
118 |
|
119 |
printf("\tif ("); |
120 |
switch (p*2 + u) { |
121 |
case 0: /* post-decrement */ |
122 |
if (n_regs > 1) |
123 |
printf("addr >= 0x%x && ", 4*(n_regs-1)); |
124 |
break; |
125 |
case 1: /* post-increment */ |
126 |
if (n_regs > 1) |
127 |
printf("addr <= 0x%x && ", 0x1000 - 4*n_regs); |
128 |
break; |
129 |
case 2: /* pre-decrement */ |
130 |
if (n_regs > 1) |
131 |
printf("addr >= 0x%x && ", 4*(n_regs-1)); |
132 |
break; |
133 |
case 3: /* pre-increment */ |
134 |
if (n_regs > 1) |
135 |
printf("addr <= 0x%x && ", 0x1000 - 4*n_regs); |
136 |
break; |
137 |
} |
138 |
printf("page != NULL) {\n"); |
139 |
|
140 |
printf("\t\tuint32_t *p = (uint32_t *) (page + addr);\n"); |
141 |
|
142 |
if (u) { |
143 |
x = 0; |
144 |
for (i=0; i<=15; i++) { |
145 |
if (!(opcode & (1 << i))) |
146 |
continue; |
147 |
|
148 |
if (load && w && i == r) { |
149 |
/* Skip the load if we're using writeback. */ |
150 |
} else if (load) |
151 |
printf("\t\tcpu->cd.arm.r[%i] = p[%i];\n", i, x); |
152 |
else { |
153 |
if (i == 15) |
154 |
printf("\t\tp[%i] = tmp_pc;\n", x); |
155 |
else |
156 |
printf("\t\tp[%i] = cpu->cd.arm.r[%i];\n", x, i); |
157 |
} |
158 |
|
159 |
x ++; |
160 |
} |
161 |
} else { |
162 |
/* Decrementing, but do it incrementing anyway: */ |
163 |
x = -n_regs; |
164 |
for (i=0; i<=15; i++) { |
165 |
if (!(opcode & (1 << i))) |
166 |
continue; |
167 |
|
168 |
x ++; |
169 |
|
170 |
if (load && w && i == r) { |
171 |
/* Skip the load if we're using writeback. */ |
172 |
} else if (load) |
173 |
printf("\t\tcpu->cd.arm.r[%i] = p[%i];\n", i, x); |
174 |
else { |
175 |
if (i == 15) |
176 |
printf("\t\tp[%i] = tmp_pc;\n", x); |
177 |
else |
178 |
printf("\t\tp[%i] = cpu->cd.arm.r[%i];\n", x, i); |
179 |
} |
180 |
} |
181 |
} |
182 |
|
183 |
if (w) |
184 |
printf("\t\tcpu->cd.arm.r[%i] %s %i;\n", |
185 |
r, u? "+=" : "-=", 4*n_regs); |
186 |
|
187 |
if (load && opcode & 0x8000) { |
188 |
printf("\t\tcpu->pc = cpu->cd.arm.r[15];\n" |
189 |
"\t\tarm_pc_to_pointers(cpu);\n"); |
190 |
} |
191 |
|
192 |
printf("\t} else\n"); |
193 |
printf("\t\tinstr(bdt_%s)(cpu, ic);\n", load? "load" : "store"); |
194 |
|
195 |
printf("}\nY(multi_0x%08x)\n", opcode); |
196 |
} |
197 |
|
198 |
|
199 |
/* |
200 |
* main(): |
201 |
* |
202 |
* Normal ARM code seems to only use about a few hundred of the 1^24 possible |
203 |
* load/store multiple instructions. (I'm not counting the s-bit now.) |
204 |
* Instead of having a linear array of 100s of entries, we can select a list |
205 |
* to scan based on a few bits (*), and those lists will be shorter. |
206 |
* |
207 |
* (*) By running experiment_arm_multi.c on statistics gathered from running |
208 |
* NetBSD/cats, it seems that choosing the following 8 bits results in |
209 |
* the shortest linear lists: |
210 |
* |
211 |
* xxxx100P USWLnnnn llllllll llllllll |
212 |
* ^ ^ ^ ^ ^ ^ ^ ^ (0x00950154) |
213 |
*/ |
214 |
int main(int argc, char *argv[]) |
215 |
{ |
216 |
int i, j; |
217 |
int n_used[256]; |
218 |
|
219 |
if (argc < 2) { |
220 |
fprintf(stderr, "usage: %s opcode [..]\n", argv[0]); |
221 |
exit(1); |
222 |
} |
223 |
|
224 |
printf("\n/* AUTOMATICALLY GENERATED! Do not edit. */\n\n"); |
225 |
|
226 |
/* Generate the opcode functions: */ |
227 |
for (i=1; i<argc; i++) |
228 |
generate_opcode(strtol(argv[i], NULL, 0)); |
229 |
|
230 |
/* Generate 256 small lookup tables: */ |
231 |
for (j=0; j<256; j++) { |
232 |
int n = 0, zz, zz0; |
233 |
for (i=1; i<argc; i++) { |
234 |
zz = strtol(argv[i], NULL, 0); |
235 |
zz = ((zz & 0x00800000) >> 16) |
236 |
|((zz & 0x00100000) >> 14) |
237 |
|((zz & 0x00040000) >> 13) |
238 |
|((zz & 0x00010000) >> 12) |
239 |
|((zz & 0x00000100) >> 5) |
240 |
|((zz & 0x00000040) >> 4) |
241 |
|((zz & 0x00000010) >> 3) |
242 |
|((zz & 0x00000004) >> 2); |
243 |
if (zz == j) |
244 |
n++; |
245 |
} |
246 |
printf("\nuint32_t multi_opcode_%i[%i] = {\n", j, n+1); |
247 |
for (i=1; i<argc; i++) { |
248 |
zz = zz0 = strtol(argv[i], NULL, 0); |
249 |
zz = ((zz & 0x00800000) >> 16) |
250 |
|((zz & 0x00100000) >> 14) |
251 |
|((zz & 0x00040000) >> 13) |
252 |
|((zz & 0x00010000) >> 12) |
253 |
|((zz & 0x00000100) >> 5) |
254 |
|((zz & 0x00000040) >> 4) |
255 |
|((zz & 0x00000010) >> 3) |
256 |
|((zz & 0x00000004) >> 2); |
257 |
if (zz == j) |
258 |
printf("\t0x%08x,\n", zz0); |
259 |
} |
260 |
printf("0 };\n"); |
261 |
} |
262 |
|
263 |
/* Generate 256 tables with function pointers: */ |
264 |
for (j=0; j<256; j++) { |
265 |
int n = 0, zz, zz0; |
266 |
for (i=1; i<argc; i++) { |
267 |
zz = strtol(argv[i], NULL, 0); |
268 |
zz = ((zz & 0x00800000) >> 16) |
269 |
|((zz & 0x00100000) >> 14) |
270 |
|((zz & 0x00040000) >> 13) |
271 |
|((zz & 0x00010000) >> 12) |
272 |
|((zz & 0x00000100) >> 5) |
273 |
|((zz & 0x00000040) >> 4) |
274 |
|((zz & 0x00000010) >> 3) |
275 |
|((zz & 0x00000004) >> 2); |
276 |
if (zz == j) |
277 |
n++; |
278 |
} |
279 |
n_used[j] = n; |
280 |
if (n == 0) |
281 |
continue; |
282 |
printf("void (*multi_opcode_f_%i[%i])(struct cpu *," |
283 |
" struct arm_instr_call *) = {\n", j, n*16); |
284 |
for (i=1; i<argc; i++) { |
285 |
zz = zz0 = strtol(argv[i], NULL, 0); |
286 |
zz = ((zz & 0x00800000) >> 16) |
287 |
|((zz & 0x00100000) >> 14) |
288 |
|((zz & 0x00040000) >> 13) |
289 |
|((zz & 0x00010000) >> 12) |
290 |
|((zz & 0x00000100) >> 5) |
291 |
|((zz & 0x00000040) >> 4) |
292 |
|((zz & 0x00000010) >> 3) |
293 |
|((zz & 0x00000004) >> 2); |
294 |
if (zz == j) { |
295 |
printf("\tarm_instr_multi_0x%08x__eq,\n", zz0); |
296 |
printf("\tarm_instr_multi_0x%08x__ne,\n", zz0); |
297 |
printf("\tarm_instr_multi_0x%08x__cs,\n", zz0); |
298 |
printf("\tarm_instr_multi_0x%08x__cc,\n", zz0); |
299 |
printf("\tarm_instr_multi_0x%08x__mi,\n", zz0); |
300 |
printf("\tarm_instr_multi_0x%08x__pl,\n", zz0); |
301 |
printf("\tarm_instr_multi_0x%08x__vs,\n", zz0); |
302 |
printf("\tarm_instr_multi_0x%08x__vc,\n", zz0); |
303 |
printf("\tarm_instr_multi_0x%08x__hi,\n", zz0); |
304 |
printf("\tarm_instr_multi_0x%08x__ls,\n", zz0); |
305 |
printf("\tarm_instr_multi_0x%08x__ge,\n", zz0); |
306 |
printf("\tarm_instr_multi_0x%08x__lt,\n", zz0); |
307 |
printf("\tarm_instr_multi_0x%08x__gt,\n", zz0); |
308 |
printf("\tarm_instr_multi_0x%08x__le,\n", zz0); |
309 |
printf("\tarm_instr_multi_0x%08x,\n", zz0); |
310 |
printf("\tarm_instr_nop,\n"); |
311 |
} |
312 |
} |
313 |
printf("};\n"); |
314 |
} |
315 |
|
316 |
|
317 |
printf("\nuint32_t *multi_opcode[256] = {\n"); |
318 |
for (i=0; i<256; i++) { |
319 |
printf(" multi_opcode_%i,", i); |
320 |
if ((i % 4) == 0) |
321 |
printf("\n"); |
322 |
} |
323 |
printf("};\n"); |
324 |
|
325 |
printf("\nvoid (**multi_opcode_f[256])(struct cpu *," |
326 |
" struct arm_instr_call *) = {\n"); |
327 |
for (i=0; i<256; i++) { |
328 |
if (n_used[i] > 0) |
329 |
printf(" multi_opcode_f_%i,", i); |
330 |
else |
331 |
printf(" NULL,"); |
332 |
if ((i % 4) == 0) |
333 |
printf("\n"); |
334 |
} |
335 |
printf("};\n"); |
336 |
|
337 |
return 0; |
338 |
} |
339 |
|