/[gxemul]/trunk/src/cpus/cpu_arm_instr.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/src/cpus/cpu_arm_instr.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 18 - (show annotations)
Mon Oct 8 16:19:11 2007 UTC (16 years, 6 months ago) by dpavlin
File MIME type: text/plain
File size: 59394 byte(s)
++ trunk/HISTORY	(local)
$Id: HISTORY,v 1.1004 2005/10/27 14:01:10 debug Exp $
20051011        Passing -A as the default boot arg for CATS (works fine with
                OpenBSD/cats).
20051012	Fixing the VGA cursor offset bug, and speeding up framebuffer
		redraws if character cells contain the same thing as during
		the last redraw.
20051013	Adding a slow strd ARM instruction hack.
20051017	Minor updates: Adding a dummy i80321 Verde controller (for
		XScale emulation), fixing the disassembly of the ARM "ldrd"
		instruction, adding "support" for less-than-4KB pages for ARM
		(by not adding them to translation tables).
20051020	Continuing on some HPCarm stuff. A NetBSD/hpcarm kernel prints
		some boot messages on an emulated Jornada 720.
		Making dev_ram work better with dyntrans (speeds up some things
		quite a bit).
20051021	Automatically generating some of the most common ARM load/store
		multiple instructions.
20051022	Better statistics gathering for the ARM load/store multiple.
		Various other dyntrans and device updates.
20051023	Various minor updates.
20051024	Continuing; minor device and dyntrans fine-tuning. Adding the
		first "reasonable" instruction combination hacks for ARM (the
		cores of NetBSD/cats' memset and memcpy).
20051025	Fixing a dyntrans-related bug in dev_vga. Also changing the
		dyntrans low/high access notification to only be updated on
		writes, not reads. Hopefully it will be enough. (dev_vga in
		charcell mode now seems to work correctly with both reads and
		writes.)
		Experimenting with gathering dyntrans statistics (which parts
		of emulated RAM that are actually executed), and adding
		instruction combination hacks for cache cleaning and a part of
		NetBSD's scanc() function.
20051026	Adding a bitmap for ARM emulation which indicates if a page is
		(specifically) user accessible; loads and stores with the t-
		flag set can now use the translation arrays, which results in
		a measurable speedup.
20051027	Dyntrans updates; adding an extra bitmap array for 32-bit
		emulation modes, speeding up the check whether a physical page
		has any code translations or not (O(n) -> O(1)). Doing a
		similar reduction of O(n) to O(1) by avoiding the scan through
		the translation entries on a translation update (32-bit mode
		only).
		Various other minor hacks.
20051029	Quick release, without any testing at all.

==============  RELEASE 0.3.6.2  ==============


1 /*
2 * Copyright (C) 2005 Anders Gavare. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * 3. The name of the author may not be used to endorse or promote products
13 * derived from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 *
28 * $Id: cpu_arm_instr.c,v 1.39 2005/10/27 14:01:13 debug Exp $
29 *
30 * ARM instructions.
31 *
32 * Individual functions should keep track of cpu->n_translated_instrs.
33 * (If no instruction was executed, then it should be decreased. If, say, 4
34 * instructions were combined into one function and executed, then it should
35 * be increased by 3.)
36 */
37
38
39 #include "arm_quick_pc_to_pointers.h"
40
41 /* #define GATHER_BDT_STATISTICS */
42
43
44 #ifdef GATHER_BDT_STATISTICS
45 /*
46 * update_bdt_statistics():
47 *
48 * Gathers statistics about load/store multiple instructions.
49 *
50 * NOTE/TODO: Perhaps it would be more memory efficient to swap the high
51 * and low parts of the instruction word, so that the lllllll bits become
52 * the high bits; this would cause fewer host pages to be used. Anyway, the
53 * current implementation works on hosts with lots of RAM.
54 *
55 * The resulting file, bdt_statistics.txt, should then be processed like
56 * this to give a new cpu_arm_multi.txt:
57 *
58 * uniq -c bdt_statistics.txt|sort -nr|head -256|cut -f 2 > cpu_arm_multi.txt
59 */
60 static void update_bdt_statistics(uint32_t iw)
61 {
62 static FILE *f = NULL;
63 static long long *counts;
64 static char *counts_used;
65 static long long n = 0;
66
67 if (f == NULL) {
68 size_t s = (1 << 24) * sizeof(long long);
69 f = fopen("bdt_statistics.txt", "w");
70 if (f == NULL) {
71 fprintf(stderr, "update_bdt_statistics(): :-(\n");
72 exit(1);
73 }
74 counts = zeroed_alloc(s);
75 counts_used = zeroed_alloc(65536);
76 }
77
78 /* Drop the s-bit: xxxx100P USWLnnnn llllllll llllllll */
79 iw = ((iw & 0x01800000) >> 1) | (iw & 0x003fffff);
80
81 counts_used[iw & 0xffff] = 1;
82 counts[iw] ++;
83
84 n ++;
85 if ((n % 500000) == 0) {
86 int i;
87 long long j;
88 fatal("[ update_bdt_statistics(): n = %lli ]\n", (long long) n);
89 fseek(f, 0, SEEK_SET);
90 for (i=0; i<0x1000000; i++)
91 if (counts_used[i & 0xffff] && counts[i] != 0) {
92 /* Recreate the opcode: */
93 uint32_t opcode = ((i & 0x00c00000) << 1)
94 | (i & 0x003fffff) | 0x08000000;
95 for (j=0; j<counts[i]; j++)
96 fprintf(f, "0x%08x\n", opcode);
97 }
98 fflush(f);
99 }
100 }
101 #endif
102
103
104 /*****************************************************************************/
105
106
107 /*
108 * Helper definitions:
109 *
110 * Each instruction is defined like this:
111 *
112 * X(foo)
113 * {
114 * code for foo;
115 * }
116 * Y(foo)
117 *
118 * The Y macro defines 14 copies of the instruction, one for each possible
119 * condition code. (The NV condition code is not included, and the AL code
120 * uses the main foo function.) Y also defines an array with pointers to
121 * all of these functions.
122 *
123 * If the compiler is good enough (i.e. allows long enough code sequences
124 * to be inlined), then the Y functions will be compiled as full (inlined)
125 * functions, otherwise they will simply call the X function.
126 */
127
128 #define Y(n) void arm_instr_ ## n ## __eq(struct cpu *cpu, \
129 struct arm_instr_call *ic) \
130 { if (cpu->cd.arm.cpsr & ARM_FLAG_Z) \
131 arm_instr_ ## n (cpu, ic); } \
132 void arm_instr_ ## n ## __ne(struct cpu *cpu, \
133 struct arm_instr_call *ic) \
134 { if (!(cpu->cd.arm.cpsr & ARM_FLAG_Z)) \
135 arm_instr_ ## n (cpu, ic); } \
136 void arm_instr_ ## n ## __cs(struct cpu *cpu, \
137 struct arm_instr_call *ic) \
138 { if (cpu->cd.arm.cpsr & ARM_FLAG_C) \
139 arm_instr_ ## n (cpu, ic); } \
140 void arm_instr_ ## n ## __cc(struct cpu *cpu, \
141 struct arm_instr_call *ic) \
142 { if (!(cpu->cd.arm.cpsr & ARM_FLAG_C)) \
143 arm_instr_ ## n (cpu, ic); } \
144 void arm_instr_ ## n ## __mi(struct cpu *cpu, \
145 struct arm_instr_call *ic) \
146 { if (cpu->cd.arm.cpsr & ARM_FLAG_N) \
147 arm_instr_ ## n (cpu, ic); } \
148 void arm_instr_ ## n ## __pl(struct cpu *cpu, \
149 struct arm_instr_call *ic) \
150 { if (!(cpu->cd.arm.cpsr & ARM_FLAG_N)) \
151 arm_instr_ ## n (cpu, ic); } \
152 void arm_instr_ ## n ## __vs(struct cpu *cpu, \
153 struct arm_instr_call *ic) \
154 { if (cpu->cd.arm.cpsr & ARM_FLAG_V) \
155 arm_instr_ ## n (cpu, ic); } \
156 void arm_instr_ ## n ## __vc(struct cpu *cpu, \
157 struct arm_instr_call *ic) \
158 { if (!(cpu->cd.arm.cpsr & ARM_FLAG_V)) \
159 arm_instr_ ## n (cpu, ic); } \
160 void arm_instr_ ## n ## __hi(struct cpu *cpu, \
161 struct arm_instr_call *ic) \
162 { if (cpu->cd.arm.cpsr & ARM_FLAG_C && \
163 !(cpu->cd.arm.cpsr & ARM_FLAG_Z)) \
164 arm_instr_ ## n (cpu, ic); } \
165 void arm_instr_ ## n ## __ls(struct cpu *cpu, \
166 struct arm_instr_call *ic) \
167 { if (cpu->cd.arm.cpsr & ARM_FLAG_Z || \
168 !(cpu->cd.arm.cpsr & ARM_FLAG_C)) \
169 arm_instr_ ## n (cpu, ic); } \
170 void arm_instr_ ## n ## __ge(struct cpu *cpu, \
171 struct arm_instr_call *ic) \
172 { if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) == \
173 ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0)) \
174 arm_instr_ ## n (cpu, ic); } \
175 void arm_instr_ ## n ## __lt(struct cpu *cpu, \
176 struct arm_instr_call *ic) \
177 { if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) != \
178 ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0)) \
179 arm_instr_ ## n (cpu, ic); } \
180 void arm_instr_ ## n ## __gt(struct cpu *cpu, \
181 struct arm_instr_call *ic) \
182 { if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) == \
183 ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0) && \
184 !(cpu->cd.arm.cpsr & ARM_FLAG_Z)) \
185 arm_instr_ ## n (cpu, ic); } \
186 void arm_instr_ ## n ## __le(struct cpu *cpu, \
187 struct arm_instr_call *ic) \
188 { if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) != \
189 ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0) || \
190 (cpu->cd.arm.cpsr & ARM_FLAG_Z)) \
191 arm_instr_ ## n (cpu, ic); } \
192 void (*arm_cond_instr_ ## n [16])(struct cpu *, \
193 struct arm_instr_call *) = { \
194 arm_instr_ ## n ## __eq, arm_instr_ ## n ## __ne, \
195 arm_instr_ ## n ## __cs, arm_instr_ ## n ## __cc, \
196 arm_instr_ ## n ## __mi, arm_instr_ ## n ## __pl, \
197 arm_instr_ ## n ## __vs, arm_instr_ ## n ## __vc, \
198 arm_instr_ ## n ## __hi, arm_instr_ ## n ## __ls, \
199 arm_instr_ ## n ## __ge, arm_instr_ ## n ## __lt, \
200 arm_instr_ ## n ## __gt, arm_instr_ ## n ## __le, \
201 arm_instr_ ## n , arm_instr_nop };
202
203 #define cond_instr(n) ( arm_cond_instr_ ## n [condition_code] )
204
205
206 /*****************************************************************************/
207
208
209 /*
210 * nop: Do nothing.
211 * invalid: Invalid instructions end up here.
212 */
213 X(nop) { }
214 X(invalid) {
215 uint32_t low_pc;
216 low_pc = ((size_t)ic - (size_t)
217 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
218 cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1)
219 << ARM_INSTR_ALIGNMENT_SHIFT);
220 cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
221 cpu->pc = cpu->cd.arm.r[ARM_PC];
222
223 fatal("Invalid ARM instruction: pc=0x%08x\n", (int)cpu->pc);
224
225 cpu->running = 0;
226 cpu->running_translated = 0;
227 cpu->n_translated_instrs --;
228 cpu->cd.arm.next_ic = &nothing_call;
229 }
230
231
232 /*
233 * b: Branch (to a different translated page)
234 *
235 * arg[0] = relative offset
236 */
237 X(b)
238 {
239 uint32_t low_pc;
240
241 /* Calculate new PC from this instruction + arg[0] */
242 low_pc = ((size_t)ic - (size_t)
243 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
244 cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1)
245 << ARM_INSTR_ALIGNMENT_SHIFT);
246 cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
247 cpu->cd.arm.r[ARM_PC] += (int32_t)ic->arg[0];
248 cpu->pc = cpu->cd.arm.r[ARM_PC];
249
250 /* Find the new physical page and update the translation pointers: */
251 quick_pc_to_pointers(cpu);
252 }
253 Y(b)
254
255
256 /*
257 * b_samepage: Branch (to within the same translated page)
258 *
259 * arg[0] = pointer to new arm_instr_call
260 */
261 X(b_samepage)
262 {
263 cpu->cd.arm.next_ic = (struct arm_instr_call *) ic->arg[0];
264 }
265 Y(b_samepage)
266
267
268 /*
269 * bx: Branch, potentially exchanging Thumb/ARM encoding
270 *
271 * arg[0] = ptr to rm
272 */
273 X(bx)
274 {
275 cpu->pc = cpu->cd.arm.r[ARM_PC] = reg(ic->arg[0]);
276 if (cpu->pc & 1) {
277 fatal("thumb: TODO\n");
278 exit(1);
279 }
280 cpu->pc &= ~3;
281
282 /* Find the new physical page and update the translation pointers: */
283 quick_pc_to_pointers(cpu);
284 }
285 Y(bx)
286
287
288 /*
289 * bx_trace: As bx, but with trace enabled, arg[0] = the link register.
290 *
291 * arg[0] = ignored
292 */
293 X(bx_trace)
294 {
295 cpu->pc = cpu->cd.arm.r[ARM_PC] = cpu->cd.arm.r[ARM_LR];
296 if (cpu->pc & 1) {
297 fatal("thumb: TODO\n");
298 exit(1);
299 }
300 cpu->pc &= ~3;
301
302 cpu_functioncall_trace_return(cpu);
303
304 /* Find the new physical page and update the translation pointers: */
305 quick_pc_to_pointers(cpu);
306 }
307 Y(bx_trace)
308
309
310 /*
311 * bl: Branch and Link (to a different translated page)
312 *
313 * arg[0] = relative address
314 */
315 X(bl)
316 {
317 uint32_t lr, low_pc;
318
319 /* Figure out what the return (link) address will be: */
320 low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t)
321 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
322 lr = cpu->cd.arm.r[ARM_PC];
323 lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT);
324 lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
325
326 /* Link: */
327 cpu->cd.arm.r[ARM_LR] = lr;
328
329 /* Calculate new PC from this instruction + arg[0] */
330 cpu->pc = cpu->cd.arm.r[ARM_PC] = lr - 4 + (int32_t)ic->arg[0];
331
332 /* Find the new physical page and update the translation pointers: */
333 quick_pc_to_pointers(cpu);
334 }
335 Y(bl)
336
337
338 /*
339 * blx: Branch and Link, potentially exchanging Thumb/ARM encoding
340 *
341 * arg[0] = ptr to rm
342 */
343 X(blx)
344 {
345 uint32_t lr, low_pc;
346
347 /* Figure out what the return (link) address will be: */
348 low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t)
349 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
350 lr = cpu->cd.arm.r[ARM_PC];
351 lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT);
352 lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
353
354 /* Link: */
355 cpu->cd.arm.r[ARM_LR] = lr;
356
357 cpu->pc = cpu->cd.arm.r[ARM_PC] = reg(ic->arg[0]);
358 if (cpu->pc & 1) {
359 fatal("thumb: TODO\n");
360 exit(1);
361 }
362 cpu->pc &= ~3;
363
364 /* Find the new physical page and update the translation pointers: */
365 quick_pc_to_pointers(cpu);
366 }
367 Y(blx)
368
369
370 /*
371 * bl_trace: Branch and Link (to a different translated page), with trace
372 *
373 * Same as for bl.
374 */
375 X(bl_trace)
376 {
377 uint32_t lr, low_pc;
378
379 /* Figure out what the return (link) address will be: */
380 low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t)
381 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
382 lr = cpu->cd.arm.r[ARM_PC];
383 lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT);
384 lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
385
386 /* Link: */
387 cpu->cd.arm.r[ARM_LR] = lr;
388
389 /* Calculate new PC from this instruction + arg[0] */
390 cpu->pc = cpu->cd.arm.r[ARM_PC] = lr - 4 + (int32_t)ic->arg[0];
391
392 cpu_functioncall_trace(cpu, cpu->pc);
393
394 /* Find the new physical page and update the translation pointers: */
395 quick_pc_to_pointers(cpu);
396 }
397 Y(bl_trace)
398
399
400 /*
401 * bl_samepage: A branch + link within the same page
402 *
403 * arg[0] = pointer to new arm_instr_call
404 */
405 X(bl_samepage)
406 {
407 uint32_t lr, low_pc;
408
409 /* Figure out what the return (link) address will be: */
410 low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t)
411 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
412 lr = cpu->cd.arm.r[ARM_PC];
413 lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT);
414 lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
415
416 /* Link: */
417 cpu->cd.arm.r[ARM_LR] = lr;
418
419 /* Branch: */
420 cpu->cd.arm.next_ic = (struct arm_instr_call *) ic->arg[0];
421 }
422 Y(bl_samepage)
423
424
425 /*
426 * bl_samepage_trace: Branch and Link (to the same page), with trace
427 *
428 * Same as for bl_samepage.
429 */
430 X(bl_samepage_trace)
431 {
432 uint32_t tmp_pc, lr, low_pc;
433
434 /* Figure out what the return (link) address will be: */
435 low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t)
436 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
437 lr = cpu->cd.arm.r[ARM_PC];
438 lr &= ~((ARM_IC_ENTRIES_PER_PAGE-1) << ARM_INSTR_ALIGNMENT_SHIFT);
439 lr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
440
441 /* Link: */
442 cpu->cd.arm.r[ARM_LR] = lr;
443
444 /* Branch: */
445 cpu->cd.arm.next_ic = (struct arm_instr_call *) ic->arg[0];
446
447 low_pc = ((size_t)cpu->cd.arm.next_ic - (size_t)
448 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
449 tmp_pc = cpu->cd.arm.r[ARM_PC];
450 tmp_pc &= ~((ARM_IC_ENTRIES_PER_PAGE-1)
451 << ARM_INSTR_ALIGNMENT_SHIFT);
452 tmp_pc += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
453 cpu_functioncall_trace(cpu, tmp_pc);
454 }
455 Y(bl_samepage_trace)
456
457
458 #include "cpu_arm_instr_misc.c"
459
460
461 /*
462 * mul: Multiplication
463 *
464 * arg[0] = ptr to rd
465 * arg[1] = ptr to rm
466 * arg[2] = ptr to rs
467 */
468 X(mul)
469 {
470 reg(ic->arg[0]) = reg(ic->arg[1]) * reg(ic->arg[2]);
471 }
472 Y(mul)
473 X(muls)
474 {
475 uint32_t result;
476 result = reg(ic->arg[1]) * reg(ic->arg[2]);
477 cpu->cd.arm.cpsr &= ~(ARM_FLAG_Z | ARM_FLAG_N);
478 if (result == 0)
479 cpu->cd.arm.cpsr |= ARM_FLAG_Z;
480 if (result & 0x80000000)
481 cpu->cd.arm.cpsr |= ARM_FLAG_N;
482 reg(ic->arg[0]) = result;
483 }
484 Y(muls)
485
486
487 /*
488 * mla: Multiplication with addition
489 *
490 * arg[0] = copy of instruction word
491 */
492 X(mla)
493 {
494 /* xxxx0000 00ASdddd nnnnssss 1001mmmm (Rd,Rm,Rs[,Rn]) */
495 uint32_t iw = ic->arg[0];
496 int rd, rs, rn, rm;
497 rd = (iw >> 16) & 15; rn = (iw >> 12) & 15,
498 rs = (iw >> 8) & 15; rm = iw & 15;
499 cpu->cd.arm.r[rd] = cpu->cd.arm.r[rm] * cpu->cd.arm.r[rs]
500 + cpu->cd.arm.r[rn];
501 }
502 Y(mla)
503 X(mlas)
504 {
505 /* xxxx0000 00ASdddd nnnnssss 1001mmmm (Rd,Rm,Rs[,Rn]) */
506 uint32_t iw = ic->arg[0];
507 int rd, rs, rn, rm;
508 rd = (iw >> 16) & 15; rn = (iw >> 12) & 15,
509 rs = (iw >> 8) & 15; rm = iw & 15;
510 cpu->cd.arm.r[rd] = cpu->cd.arm.r[rm] * cpu->cd.arm.r[rs]
511 + cpu->cd.arm.r[rn];
512 cpu->cd.arm.cpsr &= ~(ARM_FLAG_Z | ARM_FLAG_N);
513 if (cpu->cd.arm.r[rd] == 0)
514 cpu->cd.arm.cpsr |= ARM_FLAG_Z;
515 if (cpu->cd.arm.r[rd] & 0x80000000)
516 cpu->cd.arm.cpsr |= ARM_FLAG_N;
517 }
518 Y(mlas)
519
520
521 /*
522 * mull: Long multiplication
523 *
524 * arg[0] = copy of instruction word
525 */
526 X(mull)
527 {
528 /* xxxx0000 1UAShhhh llllssss 1001mmmm */
529 uint32_t iw; uint64_t tmp; int u_bit, a_bit;
530 iw = ic->arg[0];
531 u_bit = (iw >> 22) & 1; a_bit = (iw >> 21) & 1;
532 tmp = cpu->cd.arm.r[iw & 15];
533 if (u_bit)
534 tmp = (int64_t)(int32_t)tmp
535 * (int64_t)(int32_t)cpu->cd.arm.r[(iw >> 8) & 15];
536 else
537 tmp *= (uint64_t)cpu->cd.arm.r[(iw >> 8) & 15];
538 if (a_bit) {
539 uint64_t x = ((uint64_t)cpu->cd.arm.r[(iw >> 16) & 15] << 32)
540 | cpu->cd.arm.r[(iw >> 12) & 15];
541 x += tmp;
542 cpu->cd.arm.r[(iw >> 16) & 15] = (x >> 32);
543 cpu->cd.arm.r[(iw >> 12) & 15] = x;
544 } else {
545 cpu->cd.arm.r[(iw >> 16) & 15] = (tmp >> 32);
546 cpu->cd.arm.r[(iw >> 12) & 15] = tmp;
547 }
548 }
549 Y(mull)
550
551
552 /*
553 * mov_reg_reg: Move a register to another.
554 *
555 * arg[0] = ptr to source register
556 * arg[1] = ptr to destination register
557 */
558 X(mov_reg_reg)
559 {
560 reg(ic->arg[1]) = reg(ic->arg[0]);
561 }
562 Y(mov_reg_reg)
563
564
565 /*
566 * ret_trace: "mov pc,lr" with trace enabled
567 * ret: "mov pc,lr" without trace enabled
568 *
569 * arg[0] = ignored
570 */
571 X(ret_trace)
572 {
573 uint32_t old_pc, mask_within_page;
574 old_pc = cpu->cd.arm.r[ARM_PC];
575 mask_within_page = ((ARM_IC_ENTRIES_PER_PAGE-1)
576 << ARM_INSTR_ALIGNMENT_SHIFT) |
577 ((1 << ARM_INSTR_ALIGNMENT_SHIFT) - 1);
578
579 /* Update the PC register: */
580 cpu->pc = cpu->cd.arm.r[ARM_PC] = cpu->cd.arm.r[ARM_LR];
581
582 cpu_functioncall_trace_return(cpu);
583
584 /*
585 * Is this a return to code within the same page? Then there is no
586 * need to update all pointers, just next_ic.
587 */
588 if ((old_pc & ~mask_within_page) == (cpu->pc & ~mask_within_page)) {
589 cpu->cd.arm.next_ic = cpu->cd.arm.cur_ic_page +
590 ((cpu->pc & mask_within_page) >> ARM_INSTR_ALIGNMENT_SHIFT);
591 } else {
592 /* Find the new physical page and update pointers: */
593 quick_pc_to_pointers(cpu);
594 }
595 }
596 Y(ret_trace)
597 X(ret)
598 {
599 cpu->pc = cpu->cd.arm.r[ARM_PC] = cpu->cd.arm.r[ARM_LR];
600 quick_pc_to_pointers(cpu);
601 }
602 Y(ret)
603
604
605 /*
606 * msr: Move to status register from a normal register or immediate value.
607 *
608 * arg[0] = immediate value
609 * arg[1] = mask
610 * arg[2] = pointer to rm
611 *
612 * msr_imm and msr_imm_spsr use arg[1] and arg[0].
613 * msr and msr_spsr use arg[1] and arg[2].
614 */
615 X(msr_imm)
616 {
617 uint32_t mask = ic->arg[1];
618 int switch_register_banks = (mask & ARM_FLAG_MODE) &&
619 ((cpu->cd.arm.cpsr & ARM_FLAG_MODE) !=
620 (ic->arg[0] & ARM_FLAG_MODE));
621 uint32_t new_value = ic->arg[0];
622
623 if (switch_register_banks)
624 arm_save_register_bank(cpu);
625
626 cpu->cd.arm.cpsr &= ~mask;
627 cpu->cd.arm.cpsr |= (new_value & mask);
628
629 if (switch_register_banks)
630 arm_load_register_bank(cpu);
631 }
632 Y(msr_imm)
633 X(msr)
634 {
635 ic->arg[0] = reg(ic->arg[2]);
636 instr(msr_imm)(cpu, ic);
637 }
638 Y(msr)
639 X(msr_imm_spsr)
640 {
641 uint32_t mask = ic->arg[1];
642 uint32_t new_value = ic->arg[0];
643 switch (cpu->cd.arm.cpsr & ARM_FLAG_MODE) {
644 case ARM_MODE_FIQ32:
645 cpu->cd.arm.spsr_fiq &= ~mask;
646 cpu->cd.arm.spsr_fiq |= (new_value & mask);
647 break;
648 case ARM_MODE_ABT32:
649 cpu->cd.arm.spsr_abt &= ~mask;
650 cpu->cd.arm.spsr_abt |= (new_value & mask);
651 break;
652 case ARM_MODE_UND32:
653 cpu->cd.arm.spsr_und &= ~mask;
654 cpu->cd.arm.spsr_und |= (new_value & mask);
655 break;
656 case ARM_MODE_IRQ32:
657 cpu->cd.arm.spsr_irq &= ~mask;
658 cpu->cd.arm.spsr_irq |= (new_value & mask);
659 break;
660 case ARM_MODE_SVC32:
661 cpu->cd.arm.spsr_svc &= ~mask;
662 cpu->cd.arm.spsr_svc |= (new_value & mask);
663 break;
664 default:fatal("msr_spsr: unimplemented mode %i\n",
665 cpu->cd.arm.cpsr & ARM_FLAG_MODE);
666 {
667 /* Synchronize the program counter: */
668 uint32_t old_pc, low_pc = ((size_t)ic - (size_t)
669 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
670 cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1)
671 << ARM_INSTR_ALIGNMENT_SHIFT);
672 cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
673 old_pc = cpu->pc = cpu->cd.arm.r[ARM_PC];
674 printf("msr_spsr: old pc = 0x%08x\n", old_pc);
675 }
676 exit(1);
677 }
678 }
679 Y(msr_imm_spsr)
680 X(msr_spsr)
681 {
682 ic->arg[0] = reg(ic->arg[2]);
683 instr(msr_imm_spsr)(cpu, ic);
684 }
685 Y(msr_spsr)
686
687
688 /*
689 * mrs: Move from status/flag register to a normal register.
690 *
691 * arg[0] = pointer to rd
692 */
693 X(mrs)
694 {
695 reg(ic->arg[0]) = cpu->cd.arm.cpsr;
696 }
697 Y(mrs)
698
699
700 /*
701 * mrs: Move from status/flag register to a normal register.
702 *
703 * arg[0] = pointer to rd
704 */
705 X(mrs_spsr)
706 {
707 switch (cpu->cd.arm.cpsr & ARM_FLAG_MODE) {
708 case ARM_MODE_FIQ32: reg(ic->arg[0]) = cpu->cd.arm.spsr_fiq; break;
709 case ARM_MODE_ABT32: reg(ic->arg[0]) = cpu->cd.arm.spsr_abt; break;
710 case ARM_MODE_UND32: reg(ic->arg[0]) = cpu->cd.arm.spsr_und; break;
711 case ARM_MODE_IRQ32: reg(ic->arg[0]) = cpu->cd.arm.spsr_irq; break;
712 case ARM_MODE_SVC32: reg(ic->arg[0]) = cpu->cd.arm.spsr_svc; break;
713 case ARM_MODE_USR32:
714 case ARM_MODE_SYS32: reg(ic->arg[0]) = 0; break;
715 default:fatal("mrs_spsr: unimplemented mode %i\n",
716 cpu->cd.arm.cpsr & ARM_FLAG_MODE);
717 exit(1);
718 }
719 }
720 Y(mrs_spsr)
721
722
723 /*
724 * mcr_mrc: Coprocessor move
725 * cdp: Coprocessor operation
726 *
727 * arg[0] = copy of the instruction word
728 */
729 X(mcr_mrc) {
730 uint32_t low_pc;
731 low_pc = ((size_t)ic - (size_t)
732 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
733 cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1)
734 << ARM_INSTR_ALIGNMENT_SHIFT);
735 cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
736 cpu->pc = cpu->cd.arm.r[ARM_PC];
737 arm_mcr_mrc(cpu, ic->arg[0]);
738 }
739 Y(mcr_mrc)
740 X(cdp) {
741 uint32_t low_pc;
742 low_pc = ((size_t)ic - (size_t)
743 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
744 cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1)
745 << ARM_INSTR_ALIGNMENT_SHIFT);
746 cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
747 cpu->pc = cpu->cd.arm.r[ARM_PC];
748 arm_cdp(cpu, ic->arg[0]);
749 }
750 Y(cdp)
751
752
753 /*
754 * openfirmware:
755 */
756 X(openfirmware)
757 {
758 of_emul(cpu);
759 cpu->pc = cpu->cd.arm.r[ARM_PC] = cpu->cd.arm.r[ARM_LR];
760 if (cpu->machine->show_trace_tree)
761 cpu_functioncall_trace_return(cpu);
762 quick_pc_to_pointers(cpu);
763 }
764
765
766 /*
767 * swi_useremul: Syscall.
768 *
769 * arg[0] = swi number
770 */
771 X(swi_useremul)
772 {
773 /* Synchronize the program counter: */
774 uint32_t old_pc, low_pc = ((size_t)ic - (size_t)
775 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
776 cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1)
777 << ARM_INSTR_ALIGNMENT_SHIFT);
778 cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
779 old_pc = cpu->pc = cpu->cd.arm.r[ARM_PC];
780
781 useremul_syscall(cpu, ic->arg[0]);
782
783 if (!cpu->running) {
784 cpu->running_translated = 0;
785 cpu->n_translated_instrs --;
786 cpu->cd.arm.next_ic = &nothing_call;
787 } else if (cpu->pc != old_pc) {
788 /* PC was changed by the SWI call. Find the new physical
789 page and update the translation pointers: */
790 quick_pc_to_pointers(cpu);
791 }
792 }
793 Y(swi_useremul)
794
795
796 /*
797 * swi: Software interrupt.
798 */
799 X(swi)
800 {
801 /* Synchronize the program counter: */
802 uint32_t low_pc = ((size_t)ic - (size_t)
803 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
804 cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1)
805 << ARM_INSTR_ALIGNMENT_SHIFT);
806 cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
807 cpu->pc = cpu->cd.arm.r[ARM_PC];
808
809 arm_exception(cpu, ARM_EXCEPTION_SWI);
810 }
811 Y(swi)
812
813
814 /*
815 * swp, swpb: Swap (word or byte).
816 *
817 * arg[0] = ptr to rd
818 * arg[1] = ptr to rm
819 * arg[2] = ptr to rn
820 */
821 X(swp)
822 {
823 uint32_t addr = reg(ic->arg[2]), data, data2;
824 unsigned char d[4];
825 /* Synchronize the program counter: */
826 uint32_t low_pc = ((size_t)ic - (size_t)
827 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
828 cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1)
829 << ARM_INSTR_ALIGNMENT_SHIFT);
830 cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
831 cpu->pc = cpu->cd.arm.r[ARM_PC];
832
833 if (!cpu->memory_rw(cpu, cpu->mem, addr, d, sizeof(d), MEM_READ,
834 CACHE_DATA)) {
835 fatal("swp: load failed\n");
836 return;
837 }
838 data = d[0] + (d[1] << 8) + (d[2] << 16) + (d[3] << 24);
839 data2 = reg(ic->arg[1]);
840 d[0] = data2; d[1] = data2 >> 8; d[2] = data2 >> 16; d[3] = data2 >> 24;
841 if (!cpu->memory_rw(cpu, cpu->mem, addr, d, sizeof(d), MEM_WRITE,
842 CACHE_DATA)) {
843 fatal("swp: store failed\n");
844 return;
845 }
846 reg(ic->arg[0]) = data;
847 }
848 Y(swp)
849 X(swpb)
850 {
851 uint32_t addr = reg(ic->arg[2]), data;
852 unsigned char d[1];
853 /* Synchronize the program counter: */
854 uint32_t low_pc = ((size_t)ic - (size_t)
855 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
856 cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1)
857 << ARM_INSTR_ALIGNMENT_SHIFT);
858 cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
859 cpu->pc = cpu->cd.arm.r[ARM_PC];
860
861 if (!cpu->memory_rw(cpu, cpu->mem, addr, d, sizeof(d), MEM_READ,
862 CACHE_DATA)) {
863 fatal("swp: load failed\n");
864 return;
865 }
866 data = d[0];
867 d[0] = reg(ic->arg[1]);
868 if (!cpu->memory_rw(cpu, cpu->mem, addr, d, sizeof(d), MEM_WRITE,
869 CACHE_DATA)) {
870 fatal("swp: store failed\n");
871 return;
872 }
873 reg(ic->arg[0]) = data;
874 }
875 Y(swpb)
876
877
878 extern void (*arm_load_store_instr[1024])(struct cpu *,
879 struct arm_instr_call *);
880 X(store_w0_byte_u1_p0_imm);
881 X(store_w0_word_u1_p0_imm);
882 X(load_w0_word_u1_p0_imm);
883 X(load_w0_byte_u1_p1_imm);
884 X(load_w0_byte_u1_p1_reg);
885
886 extern void (*arm_load_store_instr_pc[1024])(struct cpu *,
887 struct arm_instr_call *);
888
889 extern void (*arm_load_store_instr_3[2048])(struct cpu *,
890 struct arm_instr_call *);
891
892 extern void (*arm_load_store_instr_3_pc[2048])(struct cpu *,
893 struct arm_instr_call *);
894
895 extern uint32_t (*arm_r[8192])(struct cpu *, struct arm_instr_call *);
896
897 extern void (*arm_dpi_instr[2 * 2 * 2 * 16 * 16])(struct cpu *,
898 struct arm_instr_call *);
899 X(cmps);
900 X(sub);
901 X(add);
902 X(subs);
903
904
905
906 /*
907 * bdt_load: Block Data Transfer, Load
908 *
909 * arg[0] = pointer to uint32_t in host memory, pointing to the base register
910 * arg[1] = 32-bit instruction word. Most bits are read from this.
911 */
912 X(bdt_load)
913 {
914 unsigned char data[4];
915 uint32_t *np = (uint32_t *)ic->arg[0];
916 uint32_t addr = *np, low_pc;
917 unsigned char *page;
918 uint32_t iw = ic->arg[1]; /* xxxx100P USWLnnnn llllllll llllllll */
919 int p_bit = iw & 0x01000000;
920 int u_bit = iw & 0x00800000;
921 int s_bit = iw & 0x00400000;
922 int w_bit = iw & 0x00200000;
923 int i, return_flag = 0;
924 uint32_t new_values[16];
925
926 #ifdef GATHER_BDT_STATISTICS
927 if (!s_bit)
928 update_bdt_statistics(iw);
929 #endif
930
931 /* Synchronize the program counter: */
932 low_pc = ((size_t)ic - (size_t)
933 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
934 cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) <<
935 ARM_INSTR_ALIGNMENT_SHIFT);
936 cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
937 cpu->pc = cpu->cd.arm.r[ARM_PC];
938
939 if (s_bit) {
940 /* Load to USR registers: */
941 if ((cpu->cd.arm.cpsr & ARM_FLAG_MODE) == ARM_MODE_USR32) {
942 fatal("[ bdt_load: s-bit: in usermode? ]\n");
943 s_bit = 0;
944 }
945 if (iw & 0x8000) {
946 s_bit = 0;
947 return_flag = 1;
948 }
949 }
950
951 for (i=(u_bit? 0 : 15); i>=0 && i<=15; i+=(u_bit? 1 : -1)) {
952 uint32_t value;
953
954 if (!((iw >> i) & 1)) {
955 /* Skip register i: */
956 continue;
957 }
958
959 if (p_bit) {
960 if (u_bit)
961 addr += sizeof(uint32_t);
962 else
963 addr -= sizeof(uint32_t);
964 }
965
966 page = cpu->cd.arm.host_load[addr >> 12];
967 if (page != NULL) {
968 uint32_t *p32 = (uint32_t *) page;
969 value = p32[(addr & 0xfff) >> 2];
970 /* Change byte order of value if
971 host and emulated endianness differ: */
972 #ifdef HOST_LITTLE_ENDIAN
973 if (cpu->byte_order == EMUL_BIG_ENDIAN)
974 #else
975 if (cpu->byte_order == EMUL_LITTLE_ENDIAN)
976 #endif
977 value = ((value & 0xff) << 24) |
978 ((value & 0xff00) << 8) |
979 ((value & 0xff0000) >> 8) |
980 ((value & 0xff000000) >> 24);
981 } else {
982 if (!cpu->memory_rw(cpu, cpu->mem, addr, data,
983 sizeof(data), MEM_READ, CACHE_DATA)) {
984 /* load failed */
985 return;
986 }
987 if (cpu->byte_order == EMUL_LITTLE_ENDIAN) {
988 value = data[0] +
989 (data[1] << 8) + (data[2] << 16)
990 + (data[3] << 24);
991 } else {
992 value = data[3] +
993 (data[2] << 8) + (data[1] << 16)
994 + (data[0] << 24);
995 }
996 }
997
998 new_values[i] = value;
999
1000 if (!p_bit) {
1001 if (u_bit)
1002 addr += sizeof(uint32_t);
1003 else
1004 addr -= sizeof(uint32_t);
1005 }
1006 }
1007
1008 for (i=(u_bit? 0 : 15); i>=0 && i<=15; i+=(u_bit? 1 : -1)) {
1009 if (!((iw >> i) & 1)) {
1010 /* Skip register i: */
1011 continue;
1012 }
1013
1014 if (!s_bit) {
1015 cpu->cd.arm.r[i] = new_values[i];
1016 } else {
1017 switch (cpu->cd.arm.cpsr & ARM_FLAG_MODE) {
1018 case ARM_MODE_USR32:
1019 case ARM_MODE_SYS32:
1020 cpu->cd.arm.r[i] = new_values[i];
1021 break;
1022 case ARM_MODE_FIQ32:
1023 if (i >= 8 && i <= 14)
1024 cpu->cd.arm.default_r8_r14[i-8] =
1025 new_values[i];
1026 else
1027 cpu->cd.arm.r[i] = new_values[i];
1028 break;
1029 case ARM_MODE_SVC32:
1030 case ARM_MODE_ABT32:
1031 case ARM_MODE_UND32:
1032 case ARM_MODE_IRQ32:
1033 if (i >= 13 && i <= 14)
1034 cpu->cd.arm.default_r8_r14[i-8] =
1035 new_values[i];
1036 else
1037 cpu->cd.arm.r[i] = new_values[i];
1038 break;
1039 }
1040 }
1041 }
1042
1043 if (w_bit)
1044 *np = addr;
1045
1046 if (return_flag) {
1047 uint32_t new_cpsr;
1048 int switch_register_banks;
1049
1050 switch (cpu->cd.arm.cpsr & ARM_FLAG_MODE) {
1051 case ARM_MODE_FIQ32:
1052 new_cpsr = cpu->cd.arm.spsr_fiq; break;
1053 case ARM_MODE_ABT32:
1054 new_cpsr = cpu->cd.arm.spsr_abt; break;
1055 case ARM_MODE_UND32:
1056 new_cpsr = cpu->cd.arm.spsr_und; break;
1057 case ARM_MODE_IRQ32:
1058 new_cpsr = cpu->cd.arm.spsr_irq; break;
1059 case ARM_MODE_SVC32:
1060 new_cpsr = cpu->cd.arm.spsr_svc; break;
1061 default:fatal("bdt_load: unimplemented mode %i\n",
1062 cpu->cd.arm.cpsr & ARM_FLAG_MODE);
1063 exit(1);
1064 }
1065
1066 switch_register_banks = (cpu->cd.arm.cpsr & ARM_FLAG_MODE) !=
1067 (new_cpsr & ARM_FLAG_MODE);
1068
1069 if (switch_register_banks)
1070 arm_save_register_bank(cpu);
1071
1072 cpu->cd.arm.cpsr = new_cpsr;
1073
1074 if (switch_register_banks)
1075 arm_load_register_bank(cpu);
1076 }
1077
1078 /* NOTE: Special case: Loading the PC */
1079 if (iw & 0x8000) {
1080 cpu->cd.arm.r[ARM_PC] &= ~3;
1081 cpu->pc = cpu->cd.arm.r[ARM_PC];
1082 if (cpu->machine->show_trace_tree)
1083 cpu_functioncall_trace_return(cpu);
1084 /* TODO: There is no need to update the
1085 pointers if this is a return to the
1086 same page! */
1087 /* Find the new physical page and update the
1088 translation pointers: */
1089 quick_pc_to_pointers(cpu);
1090 }
1091 }
1092 Y(bdt_load)
1093
1094
1095 /*
1096 * bdt_store: Block Data Transfer, Store
1097 *
1098 * arg[0] = pointer to uint32_t in host memory, pointing to the base register
1099 * arg[1] = 32-bit instruction word. Most bits are read from this.
1100 */
1101 X(bdt_store)
1102 {
1103 unsigned char data[4];
1104 uint32_t *np = (uint32_t *)ic->arg[0];
1105 uint32_t low_pc, value, addr = *np;
1106 uint32_t iw = ic->arg[1]; /* xxxx100P USWLnnnn llllllll llllllll */
1107 unsigned char *page;
1108 int p_bit = iw & 0x01000000;
1109 int u_bit = iw & 0x00800000;
1110 int s_bit = iw & 0x00400000;
1111 int w_bit = iw & 0x00200000;
1112 int i;
1113
1114 #ifdef GATHER_BDT_STATISTICS
1115 if (!s_bit)
1116 update_bdt_statistics(iw);
1117 #endif
1118
1119 /* Synchronize the program counter: */
1120 low_pc = ((size_t)ic - (size_t)
1121 cpu->cd.arm.cur_ic_page) / sizeof(struct arm_instr_call);
1122 cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1) <<
1123 ARM_INSTR_ALIGNMENT_SHIFT);
1124 cpu->cd.arm.r[ARM_PC] += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
1125 cpu->pc = cpu->cd.arm.r[ARM_PC];
1126
1127 for (i=(u_bit? 0 : 15); i>=0 && i<=15; i+=(u_bit? 1 : -1)) {
1128 if (!((iw >> i) & 1)) {
1129 /* Skip register i: */
1130 continue;
1131 }
1132
1133 value = cpu->cd.arm.r[i];
1134
1135 if (s_bit) {
1136 switch (cpu->cd.arm.cpsr & ARM_FLAG_MODE) {
1137 case ARM_MODE_FIQ32:
1138 if (i >= 8 && i <= 14)
1139 value = cpu->cd.arm.default_r8_r14[i-8];
1140 break;
1141 case ARM_MODE_ABT32:
1142 case ARM_MODE_UND32:
1143 case ARM_MODE_IRQ32:
1144 case ARM_MODE_SVC32:
1145 if (i >= 13 && i <= 14)
1146 value = cpu->cd.arm.default_r8_r14[i-8];
1147 break;
1148 case ARM_MODE_USR32:
1149 case ARM_MODE_SYS32:
1150 break;
1151 }
1152 }
1153
1154 if (i == ARM_PC)
1155 value += 12; /* NOTE/TODO: 8 on some ARMs */
1156
1157 if (p_bit) {
1158 if (u_bit)
1159 addr += sizeof(uint32_t);
1160 else
1161 addr -= sizeof(uint32_t);
1162 }
1163
1164 page = cpu->cd.arm.host_store[addr >> 12];
1165 if (page != NULL) {
1166 uint32_t *p32 = (uint32_t *) page;
1167 /* Change byte order of value if
1168 host and emulated endianness differ: */
1169 #ifdef HOST_LITTLE_ENDIAN
1170 if (cpu->byte_order == EMUL_BIG_ENDIAN)
1171 #else
1172 if (cpu->byte_order == EMUL_LITTLE_ENDIAN)
1173 #endif
1174 value = ((value & 0xff) << 24) |
1175 ((value & 0xff00) << 8) |
1176 ((value & 0xff0000) >> 8) |
1177 ((value & 0xff000000) >> 24);
1178 p32[(addr & 0xfff) >> 2] = value;
1179 } else {
1180 if (cpu->byte_order == EMUL_LITTLE_ENDIAN) {
1181 data[0] = value;
1182 data[1] = value >> 8;
1183 data[2] = value >> 16;
1184 data[3] = value >> 24;
1185 } else {
1186 data[0] = value >> 24;
1187 data[1] = value >> 16;
1188 data[2] = value >> 8;
1189 data[3] = value;
1190 }
1191 if (!cpu->memory_rw(cpu, cpu->mem, addr, data,
1192 sizeof(data), MEM_WRITE, CACHE_DATA)) {
1193 /* store failed */
1194 return;
1195 }
1196 }
1197
1198 if (!p_bit) {
1199 if (u_bit)
1200 addr += sizeof(uint32_t);
1201 else
1202 addr -= sizeof(uint32_t);
1203 }
1204 }
1205
1206 if (w_bit)
1207 *np = addr;
1208 }
1209 Y(bdt_store)
1210
1211
1212 /* Various load/store multiple instructions: */
1213 #include "tmp_arm_multi.c"
1214
1215
1216 /*****************************************************************************/
1217
1218
1219 /*
1220 * fill_loop_test:
1221 *
1222 * A byte-fill loop. Fills at most one page at a time. If the page was not
1223 * in the host_store table, then the original sequence (beginning with
1224 * cmps rZ,#0) is executed instead.
1225 *
1226 * L: cmps rZ,#0 ic[0]
1227 * strb rX,[rY],#1 ic[1]
1228 * sub rZ,rZ,#1 ic[2]
1229 * bgt L ic[3]
1230 *
1231 * A maximum of 4 pages are filled before returning.
1232 */
1233 X(fill_loop_test)
1234 {
1235 int max_pages_left = 4;
1236 uint32_t addr, a, n, ofs, maxlen;
1237 uint32_t *rzp = (uint32_t *)(size_t)ic[0].arg[0];
1238 unsigned char *page;
1239
1240 restart_loop:
1241 addr = reg(ic[1].arg[0]);
1242 page = cpu->cd.arm.host_store[addr >> 12];
1243 if (page == NULL) {
1244 instr(cmps)(cpu, ic);
1245 return;
1246 }
1247
1248 n = reg(rzp) + 1;
1249 ofs = addr & 0xfff;
1250 maxlen = 4096 - ofs;
1251 if (n > maxlen)
1252 n = maxlen;
1253
1254 /* printf("x = %x, n = %i\n", reg(ic[1].arg[2]), n); */
1255 memset(page + ofs, reg(ic[1].arg[2]), n);
1256
1257 reg(ic[1].arg[0]) = addr + n;
1258
1259 reg(rzp) -= n;
1260 cpu->n_translated_instrs += (4 * n);
1261
1262 a = reg(rzp);
1263
1264 cpu->cd.arm.cpsr &=
1265 ~(ARM_FLAG_Z | ARM_FLAG_N | ARM_FLAG_V | ARM_FLAG_C);
1266 if (a != 0)
1267 cpu->cd.arm.cpsr |= ARM_FLAG_C;
1268 else
1269 cpu->cd.arm.cpsr |= ARM_FLAG_Z;
1270 if ((int32_t)a < 0)
1271 cpu->cd.arm.cpsr |= ARM_FLAG_N;
1272
1273 if (max_pages_left-- > 0 && (int32_t)a > 0)
1274 goto restart_loop;
1275
1276 cpu->n_translated_instrs --;
1277
1278 if ((int32_t)a > 0)
1279 cpu->cd.arm.next_ic = ic;
1280 else
1281 cpu->cd.arm.next_ic = &ic[4];
1282 }
1283
1284
1285 /*
1286 * fill_loop_test2:
1287 *
1288 * A word-fill loop. Fills at most one page at a time. If the page was not
1289 * in the host_store table, then the original sequence (beginning with
1290 * cmps rZ,#0) is executed instead.
1291 *
1292 * L: str rX,[rY],#4 ic[0]
1293 * subs rZ,rZ,#4 ic[1]
1294 * bgt L ic[2]
1295 *
1296 * A maximum of 5 pages are filled before returning.
1297 */
1298 X(fill_loop_test2)
1299 {
1300 int max_pages_left = 5;
1301 unsigned char x1,x2,x3,x4;
1302 uint32_t addr, a, n, x, ofs, maxlen;
1303 uint32_t *rzp = (uint32_t *)(size_t)ic[1].arg[0];
1304 unsigned char *page;
1305
1306 x = reg(ic[0].arg[2]);
1307 x1 = x; x2 = x >> 8; x3 = x >> 16; x4 = x >> 24;
1308 if (x1 != x2 || x1 != x3 || x1 != x4) {
1309 instr(store_w0_word_u1_p0_imm)(cpu, ic);
1310 return;
1311 }
1312
1313 restart_loop:
1314 addr = reg(ic[0].arg[0]);
1315 page = cpu->cd.arm.host_store[addr >> 12];
1316 if (page == NULL || (addr & 3) != 0) {
1317 instr(store_w0_word_u1_p0_imm)(cpu, ic);
1318 return;
1319 }
1320
1321 /* printf("addr = 0x%08x, page = %p\n", addr, page);
1322 printf("*rzp = 0x%08x\n", reg(rzp)); */
1323
1324 n = reg(rzp) / 4;
1325 if (n == 0)
1326 n++;
1327 /* n = nr of _words_ */
1328 ofs = addr & 0xfff;
1329 maxlen = 4096 - ofs;
1330 if (n*4 > maxlen)
1331 n = maxlen / 4;
1332
1333 /* printf("x = %x, n = %i\n", x1, n); */
1334 memset(page + ofs, x1, n * 4);
1335
1336 reg(ic[0].arg[0]) = addr + n * 4;
1337
1338 reg(rzp) -= (n * 4);
1339 cpu->n_translated_instrs += (3 * n);
1340
1341 a = reg(rzp);
1342
1343 cpu->cd.arm.cpsr &=
1344 ~(ARM_FLAG_Z | ARM_FLAG_N | ARM_FLAG_V | ARM_FLAG_C);
1345 if (a != 0)
1346 cpu->cd.arm.cpsr |= ARM_FLAG_C;
1347 else
1348 cpu->cd.arm.cpsr |= ARM_FLAG_Z;
1349 if ((int32_t)a < 0)
1350 cpu->cd.arm.cpsr |= ARM_FLAG_N;
1351
1352 if (max_pages_left-- > 0 && (int32_t)a > 0)
1353 goto restart_loop;
1354
1355 cpu->n_translated_instrs --;
1356
1357 if ((int32_t)a > 0)
1358 cpu->cd.arm.next_ic = ic;
1359 else
1360 cpu->cd.arm.next_ic = &ic[3];
1361 }
1362
1363
1364 /*
1365 * netbsd_memset:
1366 *
1367 * The core of a NetBSD/arm memset.
1368 *
1369 * f01bc420: e25XX080 subs rX,rX,#0x80
1370 * f01bc424: a8ac000c stmgeia ip!,{r2,r3} (16 of these)
1371 * ..
1372 * f01bc464: caffffed bgt 0xf01bc420 <memset+0x38>
1373 */
1374 X(netbsd_memset)
1375 {
1376 unsigned char *page;
1377 uint32_t addr;
1378
1379 do {
1380 addr = cpu->cd.arm.r[ARM_IP];
1381
1382 instr(subs)(cpu, ic);
1383
1384 if (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) !=
1385 ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0)) {
1386 cpu->n_translated_instrs += 16;
1387 /* Skip the store multiples: */
1388 cpu->cd.arm.next_ic = &ic[17];
1389 return;
1390 }
1391
1392 /* Crossing a page boundary? Then continue non-combined. */
1393 if ((addr & 0xfff) + 128 > 0x1000)
1394 return;
1395
1396 /* R2/R3 non-zero? Not allowed here. */
1397 if (cpu->cd.arm.r[2] != 0 || cpu->cd.arm.r[3] != 0)
1398 return;
1399
1400 /* printf("addr = 0x%08x\n", addr); */
1401
1402 page = cpu->cd.arm.host_store[addr >> 12];
1403 /* No page translation? Continue non-combined. */
1404 if (page == NULL)
1405 return;
1406
1407 /* Clear: */
1408 memset(page + (addr & 0xfff), 0, 128);
1409 cpu->cd.arm.r[ARM_IP] = addr + 128;
1410 cpu->n_translated_instrs += 16;
1411
1412 /* Branch back if greater: */
1413 cpu->n_translated_instrs += 1;
1414 } while (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) ==
1415 ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0) &&
1416 !(cpu->cd.arm.cpsr & ARM_FLAG_Z));
1417
1418 /* Continue at the instruction after the bgt: */
1419 cpu->cd.arm.next_ic = &ic[18];
1420 }
1421
1422
1423 /*
1424 * netbsd_memcpy:
1425 *
1426 * The core of a NetBSD/arm memcpy.
1427 *
1428 * f01bc530: e8b15018 ldmia r1!,{r3,r4,ip,lr}
1429 * f01bc534: e8a05018 stmia r0!,{r3,r4,ip,lr}
1430 * f01bc538: e8b15018 ldmia r1!,{r3,r4,ip,lr}
1431 * f01bc53c: e8a05018 stmia r0!,{r3,r4,ip,lr}
1432 * f01bc540: e2522020 subs r2,r2,#0x20
1433 * f01bc544: aafffff9 bge 0xf01bc530
1434 */
1435 X(netbsd_memcpy)
1436 {
1437 unsigned char *page_0, *page_1;
1438 uint32_t addr_r0, addr_r1;
1439
1440 do {
1441 addr_r0 = cpu->cd.arm.r[0];
1442 addr_r1 = cpu->cd.arm.r[1];
1443
1444 /* printf("addr_r0 = %08x r1 = %08x\n", addr_r0, addr_r1); */
1445
1446 /* Crossing a page boundary? Then continue non-combined. */
1447 if ((addr_r0 & 0xfff) + 32 > 0x1000 ||
1448 (addr_r1 & 0xfff) + 32 > 0x1000) {
1449 instr(multi_0x08b15018)(cpu, ic);
1450 return;
1451 }
1452
1453 page_0 = cpu->cd.arm.host_store[addr_r0 >> 12];
1454 page_1 = cpu->cd.arm.host_store[addr_r1 >> 12];
1455
1456 /* No page translations? Continue non-combined. */
1457 if (page_0 == NULL || page_1 == NULL) {
1458 instr(multi_0x08b15018)(cpu, ic);
1459 return;
1460 }
1461
1462 memcpy(page_0 + (addr_r0 & 0xfff),
1463 page_1 + (addr_r1 & 0xfff), 32);
1464 cpu->cd.arm.r[0] = addr_r0 + 32;
1465 cpu->cd.arm.r[1] = addr_r1 + 32;
1466
1467 cpu->n_translated_instrs += 4;
1468
1469 instr(subs)(cpu, ic + 4);
1470 cpu->n_translated_instrs ++;
1471
1472 /* Loop while greater or equal: */
1473 cpu->n_translated_instrs ++;
1474 } while (((cpu->cd.arm.cpsr & ARM_FLAG_N)?1:0) ==
1475 ((cpu->cd.arm.cpsr & ARM_FLAG_V)?1:0));
1476
1477 /* Continue at the instruction after the bge: */
1478 cpu->cd.arm.next_ic = &ic[6];
1479 cpu->n_translated_instrs --;
1480 }
1481
1482
1483 /*
1484 * netbsd_cacheclean:
1485 *
1486 * The core of a NetBSD/arm cache clean routine, variant 1:
1487 *
1488 * f015f88c: e4902020 ldr r2,[r0],#32
1489 * f015f890: e2511020 subs r1,r1,#0x20
1490 * f015f894: 1afffffc bne 0xf015f88c
1491 * f015f898: ee070f9a mcr 15,0,r0,cr7,cr10,4
1492 */
1493 X(netbsd_cacheclean)
1494 {
1495 uint32_t r1 = cpu->cd.arm.r[1];
1496 cpu->n_translated_instrs += ((r1 >> 5) * 3);
1497 cpu->cd.arm.next_ic = &ic[4];
1498 }
1499
1500
1501 /*
1502 * netbsd_cacheclean2:
1503 *
1504 * The core of a NetBSD/arm cache clean routine, variant 2:
1505 *
1506 * f015f93c: ee070f3a mcr 15,0,r0,cr7,cr10,1
1507 * f015f940: ee070f36 mcr 15,0,r0,cr7,cr6,1
1508 * f015f944: e2800020 add r0,r0,#0x20
1509 * f015f948: e2511020 subs r1,r1,#0x20
1510 * f015f94c: 8afffffa bhi 0xf015f93c
1511 */
1512 X(netbsd_cacheclean2)
1513 {
1514 cpu->n_translated_instrs += ((cpu->cd.arm.r[1] >> 5) * 5) - 1;
1515 cpu->cd.arm.next_ic = &ic[5];
1516 }
1517
1518
1519 /*
1520 * netbsd_scanc:
1521 *
1522 * f01bccbc: e5d13000 ldrb r3,[r1]
1523 * f01bccc0: e7d23003 ldrb r3,[r2,r3]
1524 * f01bccc4: e113000c tsts r3,ip
1525 */
1526 X(netbsd_scanc)
1527 {
1528 unsigned char *page = cpu->cd.arm.host_load[cpu->cd.arm.r[1] >> 12];
1529 uint32_t t;
1530
1531 if (page == NULL) {
1532 instr(load_w0_byte_u1_p1_imm)(cpu, ic);
1533 return;
1534 }
1535
1536 t = page[cpu->cd.arm.r[1] & 0xfff];
1537 t += cpu->cd.arm.r[2];
1538 page = cpu->cd.arm.host_load[t >> 12];
1539
1540 if (page == NULL) {
1541 instr(load_w0_byte_u1_p1_imm)(cpu, ic);
1542 return;
1543 }
1544
1545 cpu->cd.arm.r[3] = page[t & 0xfff];
1546
1547 t = cpu->cd.arm.r[3] & cpu->cd.arm.r[ARM_IP];
1548 cpu->cd.arm.cpsr &= ~(ARM_FLAG_Z | ARM_FLAG_N);
1549 if (t == 0)
1550 cpu->cd.arm.cpsr |= ARM_FLAG_Z;
1551
1552 cpu->n_translated_instrs += 2;
1553 cpu->cd.arm.next_ic = &ic[3];
1554 }
1555
1556
1557 /*****************************************************************************/
1558
1559
1560 X(end_of_page)
1561 {
1562 /* Update the PC: (offset 0, but on the next page) */
1563 cpu->cd.arm.r[ARM_PC] &= ~((ARM_IC_ENTRIES_PER_PAGE-1)
1564 << ARM_INSTR_ALIGNMENT_SHIFT);
1565 cpu->cd.arm.r[ARM_PC] += (ARM_IC_ENTRIES_PER_PAGE
1566 << ARM_INSTR_ALIGNMENT_SHIFT);
1567 cpu->pc = cpu->cd.arm.r[ARM_PC];
1568
1569 /* Find the new physical page and update the translation pointers: */
1570 quick_pc_to_pointers(cpu);
1571
1572 /* end_of_page doesn't count as an executed instruction: */
1573 cpu->n_translated_instrs --;
1574 }
1575
1576
1577 /*****************************************************************************/
1578
1579
1580 /*
1581 * arm_combine_netbsd_memset():
1582 *
1583 * Check for the core of a NetBSD/arm memset; large memsets use a sequence
1584 * of 16 store-multiple instructions, each storing 2 registers at a time.
1585 */
1586 void arm_combine_netbsd_memset(struct cpu *cpu, struct arm_instr_call *ic,
1587 int low_addr)
1588 {
1589 int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT)
1590 & (ARM_IC_ENTRIES_PER_PAGE-1);
1591
1592 if (n_back >= 17) {
1593 int i;
1594 for (i=-16; i<=-1; i++)
1595 if (ic[i].f != instr(multi_0x08ac000c__ge))
1596 return;
1597 if (ic[-17].f == instr(subs) &&
1598 ic[-17].arg[0]==ic[-17].arg[2] && ic[-17].arg[1] == 128 &&
1599 ic[ 0].f == instr(b_samepage__gt) &&
1600 ic[ 0].arg[0] == (size_t)&ic[-17]) {
1601 ic[-17].f = instr(netbsd_memset);
1602 combined;
1603 }
1604 }
1605 }
1606
1607
1608 /*
1609 * arm_combine_netbsd_memcpy():
1610 *
1611 * Check for the core of a NetBSD/arm memcpy; large memcpys use a
1612 * sequence of ldmia instructions.
1613 */
1614 void arm_combine_netbsd_memcpy(struct cpu *cpu, struct arm_instr_call *ic,
1615 int low_addr)
1616 {
1617 int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT)
1618 & (ARM_IC_ENTRIES_PER_PAGE-1);
1619
1620 if (n_back >= 5) {
1621 if (ic[-5].f==instr(multi_0x08b15018) &&
1622 ic[-4].f==instr(multi_0x08a05018) &&
1623 ic[-3].f==instr(multi_0x08b15018) &&
1624 ic[-2].f==instr(multi_0x08a05018) &&
1625 ic[-1].f == instr(subs) &&
1626 ic[-1].arg[0]==ic[-1].arg[2] && ic[-1].arg[1] == 0x20 &&
1627 ic[ 0].f == instr(b_samepage__ge) &&
1628 ic[ 0].arg[0] == (size_t)&ic[-5]) {
1629 ic[-5].f = instr(netbsd_memcpy);
1630 combined;
1631 }
1632 }
1633 }
1634
1635
1636 /*
1637 * arm_combine_netbsd_cacheclean():
1638 *
1639 * Check for the core of a NetBSD/arm cache clean. (There are two variants.)
1640 */
1641 void arm_combine_netbsd_cacheclean(struct cpu *cpu, struct arm_instr_call *ic,
1642 int low_addr)
1643 {
1644 int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT)
1645 & (ARM_IC_ENTRIES_PER_PAGE-1);
1646
1647 if (n_back >= 3) {
1648 if (ic[-3].f==instr(load_w0_word_u1_p0_imm) &&
1649 ic[-2].f == instr(subs) &&
1650 ic[-2].arg[0]==ic[-2].arg[2] && ic[-2].arg[1] == 0x20 &&
1651 ic[-1].f == instr(b_samepage__ne) &&
1652 ic[-1].arg[0] == (size_t)&ic[-3]) {
1653 ic[-3].f = instr(netbsd_cacheclean);
1654 combined;
1655 }
1656 }
1657 }
1658
1659
1660 /*
1661 * arm_combine_netbsd_cacheclean2():
1662 *
1663 * Check for the core of a NetBSD/arm cache clean. (Second variant.)
1664 */
1665 void arm_combine_netbsd_cacheclean2(struct cpu *cpu, struct arm_instr_call *ic,
1666 int low_addr)
1667 {
1668 int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT)
1669 & (ARM_IC_ENTRIES_PER_PAGE-1);
1670
1671 if (n_back >= 4) {
1672 if (ic[-4].f == instr(mcr_mrc) && ic[-4].arg[0] == 0xee070f3a &&
1673 ic[-3].f == instr(mcr_mrc) && ic[-3].arg[0] == 0xee070f36 &&
1674 ic[-2].f == instr(add) &&
1675 ic[-2].arg[0]==ic[-2].arg[2] && ic[-2].arg[1] == 0x20 &&
1676 ic[-1].f == instr(subs) &&
1677 ic[-1].arg[0]==ic[-1].arg[2] && ic[-1].arg[1] == 0x20) {
1678 ic[-4].f = instr(netbsd_cacheclean2);
1679 combined;
1680 }
1681 }
1682 }
1683
1684
1685 /*
1686 * arm_combine_netbsd_scanc():
1687 */
1688 void arm_combine_netbsd_scanc(struct cpu *cpu, struct arm_instr_call *ic,
1689 int low_addr)
1690 {
1691 int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT)
1692 & (ARM_IC_ENTRIES_PER_PAGE-1);
1693
1694 if (n_back >= 2) {
1695 if (ic[-2].f == instr(load_w0_byte_u1_p1_imm) &&
1696 ic[-1].f == instr(load_w0_byte_u1_p1_reg)) {
1697 ic[-2].f = instr(netbsd_scanc);
1698 combined;
1699 }
1700 }
1701 }
1702
1703
1704 /*
1705 * arm_combine_test2():
1706 */
1707 void arm_combine_test2(struct cpu *cpu, struct arm_instr_call *ic, int low_addr)
1708 {
1709 int n_back = (low_addr >> ARM_INSTR_ALIGNMENT_SHIFT)
1710 & (ARM_IC_ENTRIES_PER_PAGE-1);
1711
1712 if (n_back >= 2) {
1713 if (ic[-2].f == instr(store_w0_word_u1_p0_imm) &&
1714 ic[-2].arg[1] == 4 &&
1715 ic[-1].f == instr(subs) &&
1716 ic[-1].arg[0] == ic[-1].arg[2] && ic[-1].arg[1] == 4 &&
1717 ic[ 0].f == instr(b_samepage__gt) &&
1718 ic[ 0].arg[0] == (size_t)&ic[-2]) {
1719 ic[-2].f = instr(fill_loop_test2);
1720 printf("YO test2\n");
1721 combined;
1722 }
1723 }
1724 }
1725
1726
1727 #if 0
1728 /* TODO: This is another test hack. */
1729
1730 if (n_back >= 3) {
1731 if (ic[-3].f == instr(cmps) &&
1732 ic[-3].arg[0] == ic[-1].arg[0] &&
1733 ic[-3].arg[1] == 0 &&
1734 ic[-2].f == instr(store_w0_byte_u1_p0_imm) &&
1735 ic[-2].arg[1] == 1 &&
1736 ic[-1].f == instr(sub) &&
1737 ic[-1].arg[0] == ic[-1].arg[2] && ic[-1].arg[1] == 1 &&
1738 ic[ 0].f == instr(b_samepage__gt) &&
1739 ic[ 0].arg[0] == (size_t)&ic[-3]) {
1740 ic[-3].f = instr(fill_loop_test);
1741 combined;
1742 }
1743 }
1744 /* TODO: Combine forward as well */
1745 #endif
1746
1747
1748 /*****************************************************************************/
1749
1750
1751 /*
1752 * arm_instr_to_be_translated():
1753 *
1754 * Translate an instruction word into an arm_instr_call. ic is filled in with
1755 * valid data for the translated instruction, or a "nothing" instruction if
1756 * there was a translation failure. The newly translated instruction is then
1757 * executed.
1758 */
1759 X(to_be_translated)
1760 {
1761 uint32_t addr, low_pc, iword, imm = 0;
1762 unsigned char *page;
1763 unsigned char ib[4];
1764 int condition_code, main_opcode, secondary_opcode, s_bit, rn, rd, r8;
1765 int p_bit, u_bit, b_bit, w_bit, l_bit, regform, rm, c, t, any_pc_reg;
1766 void (*samepage_function)(struct cpu *, struct arm_instr_call *);
1767
1768 /* Figure out the address of the instruction: */
1769 low_pc = ((size_t)ic - (size_t)cpu->cd.arm.cur_ic_page)
1770 / sizeof(struct arm_instr_call);
1771 addr = cpu->cd.arm.r[ARM_PC] & ~((ARM_IC_ENTRIES_PER_PAGE-1) <<
1772 ARM_INSTR_ALIGNMENT_SHIFT);
1773 addr += (low_pc << ARM_INSTR_ALIGNMENT_SHIFT);
1774 cpu->pc = cpu->cd.arm.r[ARM_PC] = addr;
1775 addr &= ~((1 << ARM_INSTR_ALIGNMENT_SHIFT) - 1);
1776
1777 /* Read the instruction word from memory: */
1778 page = cpu->cd.arm.host_load[addr >> 12];
1779 if (page != NULL) {
1780 /* fatal("TRANSLATION HIT!\n"); */
1781 memcpy(ib, page + (addr & 0xfff), sizeof(ib));
1782 } else {
1783 /* fatal("TRANSLATION MISS!\n"); */
1784 if (!cpu->memory_rw(cpu, cpu->mem, addr, &ib[0],
1785 sizeof(ib), MEM_READ, CACHE_INSTRUCTION)) {
1786 fatal("to_be_translated(): "
1787 "read failed: TODO\n");
1788 return;
1789 }
1790 }
1791
1792 if (cpu->byte_order == EMUL_LITTLE_ENDIAN)
1793 iword = ib[0] + (ib[1]<<8) + (ib[2]<<16) + (ib[3]<<24);
1794 else
1795 iword = ib[3] + (ib[2]<<8) + (ib[1]<<16) + (ib[0]<<24);
1796
1797
1798 #define DYNTRANS_TO_BE_TRANSLATED_HEAD
1799 #include "cpu_dyntrans.c"
1800 #undef DYNTRANS_TO_BE_TRANSLATED_HEAD
1801
1802
1803 /* The idea of taking bits 27..24 was found here:
1804 http://armphetamine.sourceforge.net/oldinfo.html */
1805 condition_code = iword >> 28;
1806 main_opcode = (iword >> 24) & 15;
1807 secondary_opcode = (iword >> 21) & 15;
1808 u_bit = (iword >> 23) & 1;
1809 b_bit = (iword >> 22) & 1;
1810 w_bit = (iword >> 21) & 1;
1811 s_bit = l_bit = (iword >> 20) & 1;
1812 rn = (iword >> 16) & 15;
1813 rd = (iword >> 12) & 15;
1814 r8 = (iword >> 8) & 15;
1815 c = (iword >> 7) & 31;
1816 t = (iword >> 4) & 7;
1817 rm = iword & 15;
1818
1819 if (condition_code == 0xf) {
1820 if ((iword & 0xfc70f000) == 0xf450f000) {
1821 /* Preload: TODO. Treat as NOP for now. */
1822 ic->f = instr(nop);
1823 goto okay;
1824 }
1825
1826 fatal("TODO: ARM condition code 0x%x\n",
1827 condition_code);
1828 goto bad;
1829 }
1830
1831
1832 /*
1833 * Translate the instruction:
1834 */
1835
1836 switch (main_opcode) {
1837
1838 case 0x0:
1839 case 0x1:
1840 case 0x2:
1841 case 0x3:
1842 /* Check special cases first: */
1843 if ((iword & 0x0fc000f0) == 0x00000090) {
1844 /*
1845 * Multiplication:
1846 * xxxx0000 00ASdddd nnnnssss 1001mmmm (Rd,Rm,Rs[,Rn])
1847 */
1848 if (iword & 0x00200000) {
1849 if (s_bit)
1850 ic->f = cond_instr(mlas);
1851 else
1852 ic->f = cond_instr(mla);
1853 ic->arg[0] = iword;
1854 } else {
1855 if (s_bit)
1856 ic->f = cond_instr(muls);
1857 else
1858 ic->f = cond_instr(mul);
1859 /* NOTE: rn means rd in this case: */
1860 ic->arg[0] = (size_t)(&cpu->cd.arm.r[rn]);
1861 ic->arg[1] = (size_t)(&cpu->cd.arm.r[rm]);
1862 ic->arg[2] = (size_t)(&cpu->cd.arm.r[r8]);
1863 }
1864 break;
1865 }
1866 if ((iword & 0x0f8000f0) == 0x00800090) {
1867 /* Long multiplication: */
1868 if (s_bit) {
1869 fatal("TODO: sbit mull\n");
1870 goto bad;
1871 }
1872 ic->f = cond_instr(mull);
1873 ic->arg[0] = iword;
1874 break;
1875 }
1876 if ((iword & 0x0ff000d0) == 0x01200010) {
1877 /* bx or blx */
1878 if (iword & 0x20)
1879 ic->f = cond_instr(blx);
1880 else {
1881 if (cpu->machine->show_trace_tree &&
1882 rm == ARM_LR)
1883 ic->f = cond_instr(bx_trace);
1884 else
1885 ic->f = cond_instr(bx);
1886 }
1887 ic->arg[0] = (size_t)(&cpu->cd.arm.r[rm]);
1888 break;
1889 }
1890 if ((iword & 0x0fb00ff0) == 0x1000090) {
1891 if (iword & 0x00400000)
1892 ic->f = cond_instr(swpb);
1893 else
1894 ic->f = cond_instr(swp);
1895 ic->arg[0] = (size_t)(&cpu->cd.arm.r[rd]);
1896 ic->arg[1] = (size_t)(&cpu->cd.arm.r[rm]);
1897 ic->arg[2] = (size_t)(&cpu->cd.arm.r[rn]);
1898 break;
1899 }
1900 if ((iword & 0x0fb0fff0) == 0x0120f000 ||
1901 (iword & 0x0fb0f000) == 0x0320f000) {
1902 /* msr: move to [S|C]PSR from a register or
1903 immediate value */
1904 if (rm == ARM_PC) {
1905 fatal("msr PC?\n");
1906 goto bad;
1907 }
1908 if (iword & 0x02000000) {
1909 if (iword & 0x00400000)
1910 ic->f = cond_instr(msr_imm_spsr);
1911 else
1912 ic->f = cond_instr(msr_imm);
1913 } else {
1914 if (iword & 0x00400000)
1915 ic->f = cond_instr(msr_spsr);
1916 else
1917 ic->f = cond_instr(msr);
1918 }
1919 imm = iword & 0xff;
1920 while (r8-- > 0)
1921 imm = (imm >> 2) | ((imm & 3) << 30);
1922 ic->arg[0] = imm;
1923 ic->arg[2] = (size_t)(&cpu->cd.arm.r[rm]);
1924 switch ((iword >> 16) & 15) {
1925 case 1: ic->arg[1] = 0x000000ff; break;
1926 case 8: ic->arg[1] = 0xff000000; break;
1927 case 9: ic->arg[1] = 0xff0000ff; break;
1928 default:fatal("unimpl a: msr regform\n");
1929 goto bad;
1930 }
1931 break;
1932 }
1933 if ((iword & 0x0fbf0fff) == 0x010f0000) {
1934 /* mrs: move from CPSR/SPSR to a register: */
1935 if (rd == ARM_PC) {
1936 fatal("mrs PC?\n");
1937 goto bad;
1938 }
1939 if (iword & 0x00400000)
1940 ic->f = cond_instr(mrs_spsr);
1941 else
1942 ic->f = cond_instr(mrs);
1943 ic->arg[0] = (size_t)(&cpu->cd.arm.r[rd]);
1944 break;
1945 }
1946 if ((iword & 0x0e000090) == 0x00000090) {
1947 int imm = ((iword >> 4) & 0xf0) | (iword & 0xf);
1948 int regform = !(iword & 0x00400000);
1949 p_bit = main_opcode & 1;
1950 ic->arg[0] = (size_t)(&cpu->cd.arm.r[rn]);
1951 ic->arg[2] = (size_t)(&cpu->cd.arm.r[rd]);
1952 if (rd == ARM_PC || rn == ARM_PC) {
1953 ic->f = arm_load_store_instr_3_pc[
1954 condition_code + (l_bit? 16 : 0)
1955 + (iword & 0x40? 32 : 0)
1956 + (w_bit? 64 : 0)
1957 + (iword & 0x20? 128 : 0)
1958 + (u_bit? 256 : 0) + (p_bit? 512 : 0)
1959 + (regform? 1024 : 0)];
1960 if (rn == ARM_PC)
1961 ic->arg[0] = (size_t)
1962 (&cpu->cd.arm.tmp_pc);
1963 if (!l_bit && rd == ARM_PC)
1964 ic->arg[2] = (size_t)
1965 (&cpu->cd.arm.tmp_pc);
1966 } else
1967 ic->f = arm_load_store_instr_3[
1968 condition_code + (l_bit? 16 : 0)
1969 + (iword & 0x40? 32 : 0)
1970 + (w_bit? 64 : 0)
1971 + (iword & 0x20? 128 : 0)
1972 + (u_bit? 256 : 0) + (p_bit? 512 : 0)
1973 + (regform? 1024 : 0)];
1974 if (regform)
1975 ic->arg[1] = (size_t)(void *)arm_r[iword & 0xf];
1976 else
1977 ic->arg[1] = imm;
1978 break;
1979 }
1980
1981 if (iword & 0x80 && !(main_opcode & 2) && iword & 0x10) {
1982 fatal("reg form blah blah\n");
1983 goto bad;
1984 }
1985
1986 /* "mov pc,lr": */
1987 if ((iword & 0x0fffffff) == 0x01a0f00e) {
1988 if (cpu->machine->show_trace_tree)
1989 ic->f = cond_instr(ret_trace);
1990 else
1991 ic->f = cond_instr(ret);
1992 break;
1993 }
1994
1995 /* "mov reg,reg": */
1996 if ((iword & 0x0fff0ff0) == 0x01a00000 &&
1997 (iword&15) != ARM_PC && rd != ARM_PC) {
1998 ic->f = cond_instr(mov_reg_reg);
1999 ic->arg[0] = (size_t)(&cpu->cd.arm.r[iword & 15]);
2000 ic->arg[1] = (size_t)(&cpu->cd.arm.r[rd]);
2001 break;
2002 }
2003
2004 /* "mov reg,#0": */
2005 if ((iword & 0x0fff0fff) == 0x03a03000 && rd != ARM_PC) {
2006 switch (rd) {
2007 case 0: ic->f = cond_instr(clear_r0); break;
2008 case 1: ic->f = cond_instr(clear_r1); break;
2009 case 2: ic->f = cond_instr(clear_r2); break;
2010 case 3: ic->f = cond_instr(clear_r3); break;
2011 case 4: ic->f = cond_instr(clear_r4); break;
2012 case 5: ic->f = cond_instr(clear_r5); break;
2013 case 6: ic->f = cond_instr(clear_r6); break;
2014 case 7: ic->f = cond_instr(clear_r7); break;
2015 case 8: ic->f = cond_instr(clear_r8); break;
2016 case 9: ic->f = cond_instr(clear_r9); break;
2017 case 10: ic->f = cond_instr(clear_r10); break;
2018 case 11: ic->f = cond_instr(clear_r11); break;
2019 case 12: ic->f = cond_instr(clear_r12); break;
2020 case 13: ic->f = cond_instr(clear_r13); break;
2021 case 14: ic->f = cond_instr(clear_r14); break;
2022 }
2023 break;
2024 }
2025
2026 /* "mov reg,#1": */
2027 if ((iword & 0x0fff0fff) == 0x03a03001 && rd != ARM_PC) {
2028 switch (rd) {
2029 case 0: ic->f = cond_instr(mov1_r0); break;
2030 case 1: ic->f = cond_instr(mov1_r1); break;
2031 case 2: ic->f = cond_instr(mov1_r2); break;
2032 case 3: ic->f = cond_instr(mov1_r3); break;
2033 case 4: ic->f = cond_instr(mov1_r4); break;
2034 case 5: ic->f = cond_instr(mov1_r5); break;
2035 case 6: ic->f = cond_instr(mov1_r6); break;
2036 case 7: ic->f = cond_instr(mov1_r7); break;
2037 case 8: ic->f = cond_instr(mov1_r8); break;
2038 case 9: ic->f = cond_instr(mov1_r9); break;
2039 case 10: ic->f = cond_instr(mov1_r10); break;
2040 case 11: ic->f = cond_instr(mov1_r11); break;
2041 case 12: ic->f = cond_instr(mov1_r12); break;
2042 case 13: ic->f = cond_instr(mov1_r13); break;
2043 case 14: ic->f = cond_instr(mov1_r14); break;
2044 }
2045 break;
2046 }
2047
2048 /*
2049 * Generic Data Processing Instructions:
2050 */
2051 if ((main_opcode & 2) == 0)
2052 regform = 1;
2053 else
2054 regform = 0;
2055
2056 if (regform) {
2057 /* 0x1000 signifies Carry bit update on rotation,
2058 which is not necessary for add,adc,sub,sbc,
2059 rsb,rsc,cmp, or cmn, because they update the
2060 Carry bit manually anyway. */
2061 int q = 0x1000;
2062 if (s_bit == 0)
2063 q = 0;
2064 if ((secondary_opcode >= 2 && secondary_opcode <= 7)
2065 || secondary_opcode==0xa || secondary_opcode==0xb)
2066 q = 0;
2067 ic->arg[1] = (size_t)(void *)arm_r[(iword & 0xfff) + q];
2068 } else {
2069 imm = iword & 0xff;
2070 while (r8-- > 0)
2071 imm = (imm >> 2) | ((imm & 3) << 30);
2072 ic->arg[1] = imm;
2073 }
2074
2075 ic->arg[0] = (size_t)(&cpu->cd.arm.r[rn]);
2076 ic->arg[2] = (size_t)(&cpu->cd.arm.r[rd]);
2077 any_pc_reg = 0;
2078 if (rn == ARM_PC || rd == ARM_PC)
2079 any_pc_reg = 1;
2080
2081 ic->f = arm_dpi_instr[condition_code +
2082 16 * secondary_opcode + (s_bit? 256 : 0) +
2083 (any_pc_reg? 512 : 0) + (regform? 1024 : 0)];
2084
2085 if (iword == 0xe113000c)
2086 cpu->combination_check = arm_combine_netbsd_scanc;
2087 break;
2088
2089 case 0x4: /* Load and store... */
2090 case 0x5: /* xxxx010P UBWLnnnn ddddoooo oooooooo Immediate */
2091 case 0x6: /* xxxx011P UBWLnnnn ddddcccc ctt0mmmm Register */
2092 case 0x7:
2093 ic->arg[0] = (size_t)(&cpu->cd.arm.r[rn]);
2094 ic->arg[2] = (size_t)(&cpu->cd.arm.r[rd]);
2095 if (rd == ARM_PC || rn == ARM_PC) {
2096 ic->f = arm_load_store_instr_pc[((iword >> 16)
2097 & 0x3f0) + condition_code];
2098 if (rn == ARM_PC)
2099 ic->arg[0] = (size_t)(&cpu->cd.arm.tmp_pc);
2100 if (!l_bit && rd == ARM_PC)
2101 ic->arg[2] = (size_t)(&cpu->cd.arm.tmp_pc);
2102 } else {
2103 ic->f = arm_load_store_instr[((iword >> 16) &
2104 0x3f0) + condition_code];
2105 }
2106 imm = iword & 0xfff;
2107 if (main_opcode < 6)
2108 ic->arg[1] = imm;
2109 else
2110 ic->arg[1] = (size_t)(void *)arm_r[iword & 0xfff];
2111 if ((iword & 0x0e000010) == 0x06000010) {
2112 fatal("Not a Load/store TODO\n");
2113 goto bad;
2114 }
2115 break;
2116
2117 case 0x8: /* Multiple load/store... (Block data transfer) */
2118 case 0x9: /* xxxx100P USWLnnnn llllllll llllllll */
2119 ic->arg[0] = (size_t)(&cpu->cd.arm.r[rn]);
2120 ic->arg[1] = (size_t)iword;
2121 /* Generic case: */
2122 if (l_bit)
2123 ic->f = cond_instr(bdt_load);
2124 else
2125 ic->f = cond_instr(bdt_store);
2126 #if defined(HOST_LITTLE_ENDIAN) && !defined(GATHER_BDT_STATISTICS)
2127 /*
2128 * Check for availability of optimized implementation:
2129 * xxxx100P USWLnnnn llllllll llllllll
2130 * ^ ^ ^ ^ ^ ^ ^ ^ (0x00950154)
2131 * These bits are used to select which list to scan, and then
2132 * the list is scanned linearly.
2133 *
2134 * The optimized functions do not support show_trace_tree,
2135 * but it's ok to use the unoptimized version in that case.
2136 */
2137 if (!cpu->machine->show_trace_tree) {
2138 int i = 0, j = iword;
2139 j = ((j & 0x00800000) >> 16) | ((j & 0x00100000) >> 14)
2140 | ((j & 0x00040000) >> 13) | ((j & 0x00010000) >> 12)
2141 | ((j & 0x00000100) >> 5) | ((j & 0x00000040) >> 4)
2142 | ((j & 0x00000010) >> 3) | ((j & 0x00000004) >> 2);
2143 while (multi_opcode[j][i] != 0) {
2144 if ((iword & 0x0fffffff) ==
2145 multi_opcode[j][i]) {
2146 ic->f = multi_opcode_f[j]
2147 [i*16 + condition_code];
2148 break;
2149 }
2150 i ++;
2151 }
2152 }
2153 #endif
2154 if (rn == ARM_PC) {
2155 fatal("TODO: bdt with PC as base\n");
2156 goto bad;
2157 }
2158 break;
2159
2160 case 0xa: /* B: branch */
2161 case 0xb: /* BL: branch+link */
2162 if (main_opcode == 0x0a) {
2163 ic->f = cond_instr(b);
2164 samepage_function = cond_instr(b_samepage);
2165 /* if (iword == 0xcafffffc)
2166 cpu->combination_check = arm_combine_test2; */
2167 if (iword == 0xcaffffed)
2168 cpu->combination_check =
2169 arm_combine_netbsd_memset;
2170 if (iword == 0xaafffff9)
2171 cpu->combination_check =
2172 arm_combine_netbsd_memcpy;
2173 } else {
2174 if (cpu->machine->show_trace_tree) {
2175 ic->f = cond_instr(bl_trace);
2176 samepage_function =
2177 cond_instr(bl_samepage_trace);
2178 } else {
2179 ic->f = cond_instr(bl);
2180 samepage_function = cond_instr(bl_samepage);
2181 }
2182 }
2183
2184 ic->arg[0] = (iword & 0x00ffffff) << 2;
2185 /* Sign-extend: */
2186 if (ic->arg[0] & 0x02000000)
2187 ic->arg[0] |= 0xfc000000;
2188 /*
2189 * Branches are calculated as PC + 8 + offset.
2190 */
2191 ic->arg[0] = (int32_t)(ic->arg[0] + 8);
2192
2193 /* Special case: branch within the same page: */
2194 {
2195 uint32_t mask_within_page =
2196 ((ARM_IC_ENTRIES_PER_PAGE-1) <<
2197 ARM_INSTR_ALIGNMENT_SHIFT) |
2198 ((1 << ARM_INSTR_ALIGNMENT_SHIFT) - 1);
2199 uint32_t old_pc = addr;
2200 uint32_t new_pc = old_pc + (int32_t)ic->arg[0];
2201 if ((old_pc & ~mask_within_page) ==
2202 (new_pc & ~mask_within_page)) {
2203 ic->f = samepage_function;
2204 ic->arg[0] = (size_t) (
2205 cpu->cd.arm.cur_ic_page +
2206 ((new_pc & mask_within_page) >>
2207 ARM_INSTR_ALIGNMENT_SHIFT));
2208 }
2209 }
2210
2211 #if 0
2212 /* Hm. This doesn't really increase performance. */
2213 if (iword == 0x8afffffa)
2214 cpu->combination_check = arm_combine_netbsd_cacheclean2;
2215 #endif
2216 break;
2217
2218 case 0xe:
2219 if (iword & 0x10) {
2220 /* xxxx1110 oooLNNNN ddddpppp qqq1MMMM MCR/MRC */
2221 ic->arg[0] = iword;
2222 ic->f = cond_instr(mcr_mrc);
2223 } else {
2224 /* xxxx1110 oooonnnn ddddpppp qqq0mmmm CDP */
2225 ic->arg[0] = iword;
2226 ic->f = cond_instr(cdp);
2227 }
2228 if (iword == 0xee070f9a)
2229 cpu->combination_check = arm_combine_netbsd_cacheclean;
2230 break;
2231
2232 case 0xf:
2233 /* SWI: */
2234 /* Default handler: */
2235 ic->f = cond_instr(swi);
2236 if (iword == 0xef8c64be) {
2237 /* Hack for openfirmware prom emulation: */
2238 ic->f = instr(openfirmware);
2239 } else if (cpu->machine->userland_emul != NULL) {
2240 if ((iword & 0x00f00000) == 0x00a00000) {
2241 ic->arg[0] = iword & 0x00ffffff;
2242 ic->f = cond_instr(swi_useremul);
2243 } else {
2244 fatal("Bad userland SWI?\n");
2245 goto bad;
2246 }
2247 }
2248 break;
2249
2250 default:goto bad;
2251 }
2252
2253 okay:
2254
2255 #define DYNTRANS_TO_BE_TRANSLATED_TAIL
2256 #include "cpu_dyntrans.c"
2257 #undef DYNTRANS_TO_BE_TRANSLATED_TAIL
2258 }
2259

  ViewVC Help
Powered by ViewVC 1.1.26