/[pearpc]/src/cpu/cpu_jitc_x86/x86asm.cc
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /src/cpu/cpu_jitc_x86/x86asm.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Wed Sep 5 17:11:21 2007 UTC (12 years, 3 months ago) by dpavlin
File size: 76860 byte(s)
import upstream CVS
1 /*
2 * PearPC
3 * x86asm.cc
4 *
5 * Copyright (C) 2004 Sebastian Biallas (sb@biallas.net)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21 #include <cstring>
22 #include <cstdlib>
23
24 #include "tools/debug.h"
25 #include "tools/snprintf.h"
26 #include "jitc.h"
27 #include "jitc_asm.h"
28 #include "jitc_debug.h"
29 #include "x86asm.h"
30
31 void x86GetCaps(X86CPUCaps &caps)
32 {
33 memset(&caps, 0, sizeof caps);
34
35 caps.loop_align = 8;
36
37 struct {
38 uint32 level, c, d, b;
39 } id;
40
41 if (!ppc_cpuid_asm(0, &id)) {
42 ht_snprintf(caps.vendor, sizeof caps.vendor, "unknown");
43 return;
44 }
45
46 *((uint32 *)caps.vendor) = id.b;
47 *((uint32 *)(caps.vendor+4)) = id.d;
48 *((uint32 *)(caps.vendor+8)) = id.c;
49 caps.vendor[12] = 0;
50 ht_printf("%s\n", caps.vendor);
51 if (id.level == 0) return;
52
53 struct {
54 uint32 model, features2, features, b;
55 } id2;
56
57 ppc_cpuid_asm(1, &id2);
58 caps.rdtsc = id2.features & (1<<4);
59 caps.cmov = id2.features & (1<<15);
60 caps.mmx = id2.features & (1<<23);
61 caps._3dnow = id2.features & (1<<31);
62 caps._3dnow2 = id2.features & (1<<30);
63 caps.sse = id2.features & (1<<25);
64 caps.sse2 = id2.features & (1<<26);
65 caps.sse3 = id2.features2 & (1<<0);
66
67 ppc_cpuid_asm(0x80000000, &id);
68 if (id.level >= 0x80000001) {
69 // processor supports extended functions
70 // now test for 3dnow
71 ppc_cpuid_asm(0x80000001, &id2);
72
73 caps._3dnow = id2.features & (1<<31);
74 caps._3dnow2 = id2.features & (1<<30);
75 }
76
77 ht_printf("%s%s%s%s%s%s%s\n",
78 caps.cmov?" CMOV":"",
79 caps.mmx?" MMX":"",
80 caps._3dnow?" 3DNOW":"",
81 caps._3dnow2?" 3DNOW+":"",
82 caps.sse?" SSE":"",
83 caps.sse2?" SSE2":"",
84 caps.sse3?" SSE3":"");
85 }
86
87 /*
88 * internal functions
89 */
90
91 static inline void FASTCALL jitcMapRegister(NativeReg nreg, PPC_Register creg)
92 {
93 gJITC.nativeReg[nreg] = creg;
94 gJITC.clientReg[creg] = nreg;
95 }
96
97 static inline void FASTCALL jitcUnmapRegister(NativeReg reg)
98 {
99 gJITC.clientReg[gJITC.nativeReg[reg]] = REG_NO;
100 gJITC.nativeReg[reg] = PPC_REG_NO;
101 }
102
103 static inline void FASTCALL jitcLoadRegister(NativeReg nreg, PPC_Register creg)
104 {
105 asmMOVRegDMem(nreg, (uint32)&gCPU+creg);
106 jitcMapRegister(nreg, creg);
107 gJITC.nativeRegState[nreg] = rsMapped;
108 }
109
110 static inline void FASTCALL jitcStoreRegister(NativeReg nreg, PPC_Register creg)
111 {
112 asmMOVDMemReg((uint32)&gCPU+creg, nreg);
113 }
114
115 static inline void FASTCALL jitcStoreRegisterUndirty(NativeReg nreg, PPC_Register creg)
116 {
117 jitcStoreRegister(nreg, creg);
118 gJITC.nativeRegState[nreg] = rsMapped; // no longer dirty
119 }
120
121 static inline PPC_Register FASTCALL jitcGetRegisterMapping(NativeReg reg)
122 {
123 return gJITC.nativeReg[reg];
124 }
125
126 NativeReg FASTCALL jitcGetClientRegisterMapping(PPC_Register creg)
127 {
128 return gJITC.clientReg[creg];
129 }
130
131 static inline void FASTCALL jitcDiscardRegister(NativeReg r)
132 {
133 // FIXME: move to front of the LRU list
134 gJITC.nativeRegState[r] = rsUnused;
135 }
136
137 /*
138 * Puts native register to the end of the LRU list
139 */
140 void FASTCALL jitcTouchRegister(NativeReg r)
141 {
142 NativeRegType *reg = gJITC.nativeRegsList[r];
143 if (reg->moreRU) {
144 // there's a more recently used register
145 if (reg->lessRU) {
146 reg->lessRU->moreRU = reg->moreRU;
147 reg->moreRU->lessRU = reg->lessRU;
148 } else {
149 // reg was LRUreg
150 gJITC.LRUreg = reg->moreRU;
151 reg->moreRU->lessRU = NULL;
152 }
153 reg->moreRU = NULL;
154 reg->lessRU = gJITC.MRUreg;
155 gJITC.MRUreg->moreRU = reg;
156 gJITC.MRUreg = reg;
157 }
158 }
159
160 /*
161 * clobbers and moves to end of LRU list
162 */
163 static inline void FASTCALL jitcClobberAndTouchRegister(NativeReg reg)
164 {
165 switch (gJITC.nativeRegState[reg]) {
166 case rsDirty:
167 jitcStoreRegisterUndirty(reg, jitcGetRegisterMapping(reg));
168 // fall throu
169 case rsMapped:
170 jitcUnmapRegister(reg);
171 gJITC.nativeRegState[reg] = rsUnused;
172 break;
173 case rsUnused:;
174 }
175 jitcTouchRegister(reg);
176 }
177
178 /*
179 * clobbers and moves to front of LRU list
180 */
181 static inline void FASTCALL jitcClobberAndDiscardRegister(NativeReg reg)
182 {
183 switch (gJITC.nativeRegState[reg]) {
184 case rsDirty:
185 jitcStoreRegisterUndirty(reg, jitcGetRegisterMapping(reg));
186 // fall throu
187 case rsMapped:
188 jitcUnmapRegister(reg);
189 jitcDiscardRegister(reg);
190 break;
191 case rsUnused:;
192 /*
193 * Note: it makes no sense to move this register to
194 * the front of the LRU list here, since only
195 * other unused register can be before it in the list
196 *
197 * Note2: it would even be an error to move it here,
198 * since ESP isn't in the nativeRegsList
199 */
200 }
201 }
202
203 void FASTCALL jitcClobberSingleRegister(NativeReg reg)
204 {
205 switch (gJITC.nativeRegState[reg]) {
206 case rsDirty:
207 jitcStoreRegisterUndirty(reg, jitcGetRegisterMapping(reg));
208 // fall throu
209 case rsMapped:
210 jitcUnmapRegister(reg);
211 gJITC.nativeRegState[reg] = rsUnused;
212 break;
213 case rsUnused:;
214 }
215 }
216
217 /*
218 * Dirty register.
219 * Does *not* touch register
220 * Will not produce code.
221 */
222 NativeReg FASTCALL jitcDirtyRegister(NativeReg r)
223 {
224 gJITC.nativeRegState[r] = rsDirty;
225 return r;
226 }
227
228 NativeReg FASTCALL jitcAllocFixedRegister(NativeReg reg)
229 {
230 jitcClobberAndTouchRegister(reg);
231 return reg;
232 }
233
234 /*
235 * Allocates a native register
236 * May produce a store if no registers are avaiable
237 */
238 NativeReg FASTCALL jitcAllocRegister(int options)
239 {
240 NativeReg reg;
241 if (options & NATIVE_REG) {
242 // allocate fixed register
243 reg = (NativeReg)(options & 0xf);
244 } else if (options & NATIVE_REG_8) {
245 // allocate eax, ecx, edx or ebx
246 NativeRegType *rt = gJITC.LRUreg;
247 while (rt->reg > EBX) rt = rt->moreRU;
248 reg = rt->reg;
249 } else {
250 // allocate random register
251 reg = gJITC.LRUreg->reg;
252 }
253 return jitcAllocFixedRegister(reg);
254 }
255
256 /*
257 * Returns native registers that contains value of
258 * client register or allocates new register which
259 * maps to the client register.
260 * Dirties register.
261 *
262 * May produce a store if no registers are avaiable
263 * May produce a MOV/XCHG to satisfy mapping
264 * Will never produce a load
265 */
266 NativeReg FASTCALL jitcMapClientRegisterDirty(PPC_Register creg, int options)
267 {
268 if (options & NATIVE_REG_8) {
269 // nyi
270 ht_printf("unimpl x86asm:%d\n", __LINE__);
271 exit(-1);
272 }
273 if (options & NATIVE_REG) {
274 NativeReg want_reg = (NativeReg)(options & 0xf);
275 PPC_Register have_mapping = jitcGetRegisterMapping(want_reg);
276
277 if (have_mapping != PPC_REG_NO) {
278 // test if we're lucky
279 if (have_mapping == creg) {
280 jitcDirtyRegister(want_reg);
281 jitcTouchRegister(want_reg);
282 return want_reg;
283 }
284
285 // we're not lucky, get a new register for the old mapping
286 NativeReg temp_reg = jitcAllocRegister();
287 // note that AllocRegister also touches temp_reg
288
289 // make new mapping
290 jitcMapRegister(want_reg, creg);
291
292 gJITC.nativeRegState[temp_reg] = gJITC.nativeRegState[want_reg];
293 // now we can mess with want_reg
294 jitcDirtyRegister(want_reg);
295
296 // maybe the old mapping was discarded and we're done
297 if (temp_reg == want_reg) return want_reg;
298
299 // ok, restore old mapping
300 if (temp_reg == EAX || want_reg == EAX) {
301 asmALURegReg(X86_XCHG, temp_reg, want_reg);
302 } else {
303 asmALURegReg(X86_MOV, temp_reg, want_reg);
304 }
305 jitcMapRegister(temp_reg, have_mapping);
306 } else {
307 // want_reg is free
308 // unmap creg if needed
309 NativeReg reg = jitcGetClientRegisterMapping(creg);
310 if (reg != REG_NO) {
311 jitcUnmapRegister(reg);
312 jitcDiscardRegister(reg);
313 }
314 jitcMapRegister(want_reg, creg);
315 jitcDirtyRegister(want_reg);
316 }
317 jitcTouchRegister(want_reg);
318 return want_reg;
319 } else {
320 NativeReg reg = jitcGetClientRegisterMapping(creg);
321 if (reg == REG_NO) {
322 reg = jitcAllocRegister();
323 jitcMapRegister(reg, creg);
324 } else {
325 jitcTouchRegister(reg);
326 }
327 return jitcDirtyRegister(reg);
328 }
329 }
330
331
332 /*
333 * Returns native registers that contains value of
334 * client register or allocates new register with
335 * this content.
336 *
337 * May produce a store if no registers are avaiable
338 * May produce a load if client registers isn't mapped
339 * May produce a MOV/XCHG to satisfy mapping
340 */
341 NativeReg FASTCALL jitcGetClientRegister(PPC_Register creg, int options)
342 {
343 if (options & NATIVE_REG_8) {
344 NativeReg client_reg_maps_to = jitcGetClientRegisterMapping(creg);
345 if (client_reg_maps_to == REG_NO) {
346 NativeReg reg = jitcAllocRegister(NATIVE_REG_8);
347 jitcLoadRegister(reg, creg);
348 return reg;
349 } else {
350 if (client_reg_maps_to <= EBX) {
351 jitcTouchRegister(client_reg_maps_to);
352 return client_reg_maps_to;
353 }
354 NativeReg want_reg = jitcAllocRegister(NATIVE_REG_8);
355 asmALURegReg(X86_MOV, want_reg, client_reg_maps_to);
356 jitcUnmapRegister(client_reg_maps_to);
357 jitcMapRegister(want_reg, creg);
358 gJITC.nativeRegState[want_reg] = gJITC.nativeRegState[client_reg_maps_to];
359 gJITC.nativeRegState[client_reg_maps_to] = rsUnused;
360 return want_reg;
361 }
362 }
363 if (options & NATIVE_REG) {
364 NativeReg want_reg = (NativeReg)(options & 0xf);
365 PPC_Register native_reg_maps_to = jitcGetRegisterMapping(want_reg);
366 NativeReg client_reg_maps_to = jitcGetClientRegisterMapping(creg);
367 if (native_reg_maps_to != PPC_REG_NO) {
368 // test if we're lucky
369 if (native_reg_maps_to == creg) {
370 jitcTouchRegister(want_reg);
371 } else {
372 // we need to satisfy mapping
373 if (client_reg_maps_to != REG_NO) {
374 asmALURegReg(X86_XCHG, want_reg, client_reg_maps_to);
375 RegisterState rs = gJITC.nativeRegState[want_reg];
376 gJITC.nativeRegState[want_reg] = gJITC.nativeRegState[client_reg_maps_to];
377 gJITC.nativeRegState[client_reg_maps_to] = rs;
378 jitcMapRegister(want_reg, creg);
379 jitcMapRegister(client_reg_maps_to, native_reg_maps_to);
380 jitcTouchRegister(want_reg);
381 } else {
382 // client register isn't mapped
383 jitcAllocFixedRegister(want_reg);
384 jitcLoadRegister(want_reg, creg);
385 }
386 }
387 return want_reg;
388 } else {
389 // want_reg is free
390 jitcTouchRegister(want_reg);
391 if (client_reg_maps_to != REG_NO) {
392 asmALURegReg(X86_MOV, want_reg, client_reg_maps_to);
393 gJITC.nativeRegState[want_reg] = gJITC.nativeRegState[client_reg_maps_to];
394 jitcUnmapRegister(client_reg_maps_to);
395 jitcDiscardRegister(client_reg_maps_to);
396 jitcMapRegister(want_reg, creg);
397 } else {
398 jitcLoadRegister(want_reg, creg);
399 }
400 return want_reg;
401 }
402 } else {
403 NativeReg client_reg_maps_to = jitcGetClientRegisterMapping(creg);
404 if (client_reg_maps_to != REG_NO) {
405 jitcTouchRegister(client_reg_maps_to);
406 return client_reg_maps_to;
407 } else {
408 NativeReg reg = jitcAllocRegister();
409 jitcLoadRegister(reg, creg);
410 return reg;
411 }
412 }
413 }
414
415 /*
416 * Same as jitcGetClientRegister() but also dirties result
417 */
418 NativeReg FASTCALL jitcGetClientRegisterDirty(PPC_Register creg, int options)
419 {
420 return jitcDirtyRegister(jitcGetClientRegister(creg, options));
421 }
422
423 static inline void FASTCALL jitcFlushSingleRegister(NativeReg reg)
424 {
425 if (gJITC.nativeRegState[reg] == rsDirty) {
426 jitcStoreRegisterUndirty(reg, jitcGetRegisterMapping(reg));
427 }
428 }
429
430 static inline void FASTCALL jitcFlushSingleRegisterDirty(NativeReg reg)
431 {
432 if (gJITC.nativeRegState[reg] == rsDirty) {
433 jitcStoreRegister(reg, jitcGetRegisterMapping(reg));
434 }
435 }
436
437 /*
438 * Flushes native register(s).
439 * Resets dirty flags.
440 * Will produce a store if register is dirty.
441 */
442 void FASTCALL jitcFlushRegister(int options)
443 {
444 if (options == NATIVE_REGS_ALL) {
445 for (NativeReg i = EAX; i <= EDI; i = (NativeReg)(i+1)) jitcFlushSingleRegister(i);
446 } else if (options & NATIVE_REG) {
447 NativeReg reg = (NativeReg)(options & 0xf);
448 jitcFlushSingleRegister(reg);
449 }
450 }
451
452 /*
453 * Flushes native register(s).
454 * Doesnt reset dirty flags.
455 * Will produce a store if register is dirty.
456 */
457 void FASTCALL jitcFlushRegisterDirty(int options)
458 {
459 if (options == NATIVE_REGS_ALL) {
460 for (NativeReg i = EAX; i <= EDI; i = (NativeReg)(i+1)) jitcFlushSingleRegisterDirty(i);
461 } else if (options & NATIVE_REG) {
462 NativeReg reg = (NativeReg)(options & 0xf);
463 jitcFlushSingleRegisterDirty(reg);
464 }
465 }
466 /*
467 * Clobbers native register(s).
468 * Register is unused afterwards.
469 * Will produce a store if register was dirty.
470 */
471 void FASTCALL jitcClobberRegister(int options)
472 {
473 if (options == NATIVE_REGS_ALL) {
474 /*
475 * We dont use clobberAndDiscard here
476 * since it make no sense to move one register
477 * if we clobber all
478 */
479 for (NativeReg i = EAX; i <= EDI; i=(NativeReg)(i+1)) jitcClobberSingleRegister(i);
480 } else if (options & NATIVE_REG) {
481 NativeReg reg = (NativeReg)(options & 0xf);
482 jitcClobberAndDiscardRegister(reg);
483 }
484 }
485
486 /*
487 *
488 */
489 void FASTCALL jitcFlushAll()
490 {
491 jitcClobberCarryAndFlags();
492 jitcFlushRegister();
493 jitcFlushVectorRegister();
494 }
495
496 /*
497 *
498 */
499 void FASTCALL jitcClobberAll()
500 {
501 jitcClobberCarryAndFlags();
502 jitcClobberRegister();
503 jitcFloatRegisterClobberAll();
504 jitcTrashVectorRegister();
505 }
506
507 /*
508 * Invalidates all mappings
509 *
510 * Will never produce code
511 */
512 void FASTCALL jitcInvalidateAll()
513 {
514 #if 0
515 for (int i=EAX; i<=EDI; i++) {
516 if(gJITC.nativeRegState[i] != rsDirty) {
517 printf("!!! Unflushed register invalidated!\n");
518 }
519 }
520 #endif
521
522 memset(gJITC.nativeReg, PPC_REG_NO, sizeof gJITC.nativeReg);
523 memset(gJITC.nativeRegState, rsUnused, sizeof gJITC.nativeRegState);
524 memset(gJITC.clientReg, REG_NO, sizeof gJITC.clientReg);
525 gJITC.nativeCarryState = gJITC.nativeFlagsState = rsUnused;
526
527 for (unsigned int i=XMM0; i<=XMM7; i++) {
528 if(gJITC.nativeVectorRegState[i] == rsDirty) {
529 printf("!!! Unflushed vector register invalidated! (XMM%u)\n", i);
530 }
531 }
532
533 memset(gJITC.n2cVectorReg, PPC_VECTREG_NO, sizeof gJITC.n2cVectorReg);
534 memset(gJITC.c2nVectorReg, VECTREG_NO, sizeof gJITC.c2nVectorReg);
535 memset(gJITC.nativeVectorRegState, rsUnused, sizeof gJITC.nativeVectorRegState);
536
537 gJITC.nativeVectorReg = VECTREG_NO;
538 }
539
540 /*
541 * Gets the client carry flags into the native carry flag
542 *
543 *
544 */
545 void FASTCALL jitcGetClientCarry()
546 {
547 if (gJITC.nativeCarryState == rsUnused) {
548 jitcClobberFlags();
549
550 #if 0
551 // bt [gCPU.xer], XER_CA
552 byte modrm[6];
553 asmBTxMemImm(X86_BT, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.xer), 29);
554 #else
555 // bt [gCPU.xer_ca], 0
556 byte modrm[6];
557 asmBTxMemImm(X86_BT, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.xer_ca), 0);
558 #endif
559 gJITC.nativeCarryState = rsMapped;
560 }
561 }
562
563 void FASTCALL jitcMapFlagsDirty(PPC_CRx cr)
564 {
565 gJITC.nativeFlags = cr;
566 gJITC.nativeFlagsState = rsDirty;
567 }
568
569 PPC_CRx FASTCALL jitcGetFlagsMapping()
570 {
571 return gJITC.nativeFlags;
572 }
573
574 bool FASTCALL jitcFlagsMapped()
575 {
576 return gJITC.nativeFlagsState != rsUnused;
577 }
578
579 bool FASTCALL jitcCarryMapped()
580 {
581 return gJITC.nativeCarryState != rsUnused;
582 }
583
584 void FASTCALL jitcMapCarryDirty()
585 {
586 gJITC.nativeCarryState = rsDirty;
587 }
588
589 static inline void FASTCALL jitcFlushCarry()
590 {
591 byte modrm[6];
592 asmSETMem(X86_C, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.xer_ca));
593 }
594
595 #if 0
596
597 static inline void FASTCALL jitcFlushFlags()
598 {
599 asmCALL((NativeAddress)ppc_flush_flags_asm);
600 }
601
602 #else
603
604 uint8 jitcFlagsMapping[257];
605 uint8 jitcFlagsMapping2[256];
606 uint8 jitcFlagsMappingCMP_U[257];
607 uint8 jitcFlagsMappingCMP_L[257];
608
609 static inline void FASTCALL jitcFlushFlags()
610 {
611 #if 1
612 byte modrm[6];
613 NativeReg r = jitcAllocRegister(NATIVE_REG_8);
614 asmSETReg8(X86_S, (NativeReg8)r);
615 asmSETReg8(X86_Z, (NativeReg8)(r+4));
616 asmMOVxxRegReg16(X86_MOVZX, r, r);
617 asmALUMemImm8(X86_AND, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), 0x0f);
618 asmALURegMem8(X86_MOV, (NativeReg8)r, modrm, x86_mem(modrm, r, (uint32)&jitcFlagsMapping));
619 asmALUMemReg8(X86_OR, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), (NativeReg8)r);
620 #else
621 byte modrm[6];
622 jitcAllocRegister(NATIVE_REG | EAX);
623 asmSimple(X86_LAHF);
624 asmMOVxxRegReg8(X86_MOVZX, EAX, AH);
625 asmALUMemImm8(X86_AND, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), 0x0f);
626 asmALURegMem8(X86_MOV, AL, modrm, x86_mem(modrm, EAX, (uint32)&jitcFlagsMapping2));
627 asmALUMemReg8(X86_OR, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), AL);
628 #endif
629 }
630
631 #endif
632
633 static inline void jitcFlushFlagsAfterCMP(X86FlagTest t1, X86FlagTest t2, byte mask, int disp, uint32 map)
634 {
635 byte modrm[6];
636 NativeReg r = jitcAllocRegister(NATIVE_REG_8);
637 asmSETReg8(t1, (NativeReg8)r);
638 asmSETReg8(t2, (NativeReg8)(r+4));
639 asmMOVxxRegReg16(X86_MOVZX, r, r);
640 asmALUMemImm8(X86_AND, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+disp), mask);
641 asmALURegMem8(X86_MOV, (NativeReg8)r, modrm, x86_mem(modrm, r, map));
642 asmALUMemReg8(X86_OR, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+disp), (NativeReg8)r);
643 }
644
645 void FASTCALL jitcFlushFlagsAfterCMPL_U(int disp)
646 {
647 jitcFlushFlagsAfterCMP(X86_A, X86_B, 0x0f, disp, (uint32)&jitcFlagsMappingCMP_U);
648 }
649
650 void FASTCALL jitcFlushFlagsAfterCMPL_L(int disp)
651 {
652 jitcFlushFlagsAfterCMP(X86_A, X86_B, 0xf0, disp, (uint32)&jitcFlagsMappingCMP_L);
653 }
654
655 void FASTCALL jitcFlushFlagsAfterCMP_U(int disp)
656 {
657 jitcFlushFlagsAfterCMP(X86_G, X86_L, 0x0f, disp, (uint32)&jitcFlagsMappingCMP_U);
658 }
659
660 void FASTCALL jitcFlushFlagsAfterCMP_L(int disp)
661 {
662 jitcFlushFlagsAfterCMP(X86_G, X86_L, 0xf0, disp, (uint32)&jitcFlagsMappingCMP_L);
663 }
664
665 void FASTCALL jitcClobberFlags()
666 {
667 if (gJITC.nativeFlagsState == rsDirty) {
668 if (gJITC.nativeCarryState == rsDirty) {
669 jitcFlushCarry();
670 }
671 jitcFlushFlags();
672 gJITC.nativeCarryState = rsUnused;
673 }
674 gJITC.nativeFlagsState = rsUnused;
675 }
676
677 void FASTCALL jitcClobberCarry()
678 {
679 if (gJITC.nativeCarryState == rsDirty) {
680 jitcFlushCarry();
681 }
682 gJITC.nativeCarryState = rsUnused;
683 }
684
685 void FASTCALL jitcClobberCarryAndFlags()
686 {
687 if (gJITC.nativeCarryState == rsDirty) {
688 if (gJITC.nativeFlagsState == rsDirty) {
689 jitcFlushCarry();
690 jitcFlushFlags();
691 gJITC.nativeCarryState = gJITC.nativeFlagsState = rsUnused;
692 } else {
693 jitcClobberCarry();
694 }
695 } else {
696 jitcClobberFlags();
697 }
698 }
699
700 /*
701 * ONLY FOR DEBUG! DON'T CALL (unless you know what you are doing)
702 */
703 void FASTCALL jitcFlushCarryAndFlagsDirty()
704 {
705 if (gJITC.nativeCarryState == rsDirty) {
706 jitcFlushCarry();
707 if (gJITC.nativeFlagsState == rsDirty) {
708 jitcFlushFlags();
709 }
710 } else {
711 if (gJITC.nativeFlagsState == rsDirty) {
712 jitcFlushFlags();
713 }
714 }
715 }
716
717 /*
718 * jitcFloatRegisterToNative converts the stack-independent
719 * register r to a stack-dependent register ST(i)
720 */
721 NativeFloatReg FASTCALL jitcFloatRegisterToNative(JitcFloatReg r)
722 {
723 return X86_FLOAT_ST(gJITC.nativeFloatTOP-gJITC.floatRegPerm[r]);
724 }
725
726 /*
727 * jitcFloatRegisterFromNative converts the stack-dependent
728 * register ST(r) to a stack-independent JitcFloatReg
729 */
730 JitcFloatReg FASTCALL jitcFloatRegisterFromNative(NativeFloatReg r)
731 {
732 ASSERT(gJITC.nativeFloatTOP > r);
733 return gJITC.floatRegPermInverse[gJITC.nativeFloatTOP-r];
734 }
735
736 /*
737 * Returns true iff r is on top of the floating point register
738 * stack.
739 */
740 bool FASTCALL jitcFloatRegisterIsTOP(JitcFloatReg r)
741 {
742 ASSERT(r != JITC_FLOAT_REG_NONE);
743 return gJITC.floatRegPerm[r] == gJITC.nativeFloatTOP;
744 }
745
746 /*
747 * Exchanges r to the front of the stack.
748 */
749 JitcFloatReg FASTCALL jitcFloatRegisterXCHGToFront(JitcFloatReg r)
750 {
751 ASSERT(r != JITC_FLOAT_REG_NONE);
752 if (jitcFloatRegisterIsTOP(r)) return r;
753
754 asmFXCHSTi(jitcFloatRegisterToNative(r));
755 JitcFloatReg s = jitcFloatRegisterFromNative(Float_ST0);
756 ASSERT(s != r);
757 // set floatRegPerm := floatRegPerm * (s r)
758 int tmp = gJITC.floatRegPerm[r];
759 gJITC.floatRegPerm[r] = gJITC.floatRegPerm[s];
760 gJITC.floatRegPerm[s] = tmp;
761
762 // set floatRegPermInverse := (s r) * floatRegPermInverse
763 r = gJITC.floatRegPerm[r];
764 s = gJITC.floatRegPerm[s];
765 tmp = gJITC.floatRegPermInverse[r];
766 gJITC.floatRegPermInverse[r] = gJITC.floatRegPermInverse[s];
767 gJITC.floatRegPermInverse[s] = tmp;
768
769 return r;
770 }
771
772 /*
773 * Dirties r
774 */
775 JitcFloatReg FASTCALL jitcFloatRegisterDirty(JitcFloatReg r)
776 {
777 gJITC.nativeFloatRegState[r] = rsDirty;
778 return r;
779 }
780
781 void FASTCALL jitcFloatRegisterInvalidate(JitcFloatReg r)
782 {
783 jitcFloatRegisterXCHGToFront(r);
784 asmFFREEPSTi(Float_ST0);
785 int creg = gJITC.nativeFloatRegStack[r];
786 gJITC.clientFloatReg[creg] = JITC_FLOAT_REG_NONE;
787 gJITC.nativeFloatTOP--;
788 }
789
790 void FASTCALL jitcPopFloatStack(JitcFloatReg hint1, JitcFloatReg hint2)
791 {
792 ASSERT(gJITC.nativeFloatTOP > 0);
793
794 JitcFloatReg r;
795 for (int i=0; i<4; i++) {
796 r = jitcFloatRegisterFromNative(X86_FLOAT_ST(gJITC.nativeFloatTOP-i-1));
797 if (r != hint1 && r != hint2) break;
798 }
799
800 // we can now free r
801 int creg = gJITC.nativeFloatRegStack[r];
802 jitcFloatRegisterXCHGToFront(r);
803 if (gJITC.nativeFloatRegState[r] == rsDirty) {
804 byte modrm[6];
805 asmFSTPDoubleMem(modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.fpr[creg]));
806 } else {
807 asmFFREEPSTi(Float_ST0);
808 }
809 gJITC.nativeFloatRegState[r] = rsUnused;
810 gJITC.clientFloatReg[creg] = JITC_FLOAT_REG_NONE;
811 gJITC.nativeFloatTOP--;
812 }
813
814 static JitcFloatReg FASTCALL jitcPushFloatStack(int creg)
815 {
816 ASSERT(gJITC.nativeFloatTOP < 8);
817 gJITC.nativeFloatTOP++;
818 int r = gJITC.floatRegPermInverse[gJITC.nativeFloatTOP];
819 byte modrm[6];
820 asmFLDDoubleMem(modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.fpr[creg]));
821 return r;
822 }
823
824 /*
825 * Creates a copy of r on the stack. If the stack is full, it will
826 * clobber an entry. It will not clobber r nor hint.
827 */
828 JitcFloatReg FASTCALL jitcFloatRegisterDup(JitcFloatReg freg, JitcFloatReg hint)
829 {
830 // ht_printf("dup %d\n", freg);
831 if (gJITC.nativeFloatTOP == 8) {
832 // stack is full
833 jitcPopFloatStack(freg, hint);
834 }
835 asmFLDSTi(jitcFloatRegisterToNative(freg));
836 gJITC.nativeFloatTOP++;
837 int r = gJITC.floatRegPermInverse[gJITC.nativeFloatTOP];
838 gJITC.nativeFloatRegState[r] = rsUnused; // not really mapped
839 return r;
840 }
841
842 void FASTCALL jitcFloatRegisterClobberAll()
843 {
844 if (!gJITC.nativeFloatTOP) return;
845
846 do {
847 JitcFloatReg r = jitcFloatRegisterFromNative(Float_ST0);
848 int creg = gJITC.nativeFloatRegStack[r];
849 switch (gJITC.nativeFloatRegState[r]) {
850 case rsDirty: {
851 byte modrm[6];
852 asmFSTPDoubleMem(modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.fpr[creg]));
853 gJITC.clientFloatReg[creg] = JITC_FLOAT_REG_NONE;
854 break;
855 }
856 case rsMapped:
857 asmFFREEPSTi(Float_ST0);
858 gJITC.clientFloatReg[creg] = JITC_FLOAT_REG_NONE;
859 break;
860 case rsUnused: {ASSERT(0);}
861 }
862 } while (--gJITC.nativeFloatTOP);
863 }
864
865 void FASTCALL jitcFloatRegisterStoreAndPopTOP(JitcFloatReg r)
866 {
867 asmFSTDPSTi(jitcFloatRegisterToNative(r));
868 gJITC.nativeFloatTOP--;
869 }
870
871 void FASTCALL jitcClobberClientRegisterForFloat(int creg)
872 {
873 NativeReg r = jitcGetClientRegisterMapping(PPC_FPR_U(creg));
874 if (r != REG_NO) jitcClobberRegister(r | NATIVE_REG);
875 r = jitcGetClientRegisterMapping(PPC_FPR_L(creg));
876 if (r != REG_NO) jitcClobberRegister(r | NATIVE_REG);
877 }
878
879 void FASTCALL jitcInvalidateClientRegisterForFloat(int creg)
880 {
881 // FIXME: no need to clobber, invalidate would be enough
882 jitcClobberClientRegisterForFloat(creg);
883 }
884
885 JitcFloatReg FASTCALL jitcGetClientFloatRegisterMapping(int creg)
886 {
887 return gJITC.clientFloatReg[creg];
888 }
889
890 JitcFloatReg FASTCALL jitcGetClientFloatRegisterUnmapped(int creg, int hint1, int hint2)
891 {
892 JitcFloatReg r = jitcGetClientFloatRegisterMapping(creg);
893 if (r == JITC_FLOAT_REG_NONE) {
894 if (gJITC.nativeFloatTOP == 8) {
895 jitcPopFloatStack(hint1, hint2);
896 }
897 r = jitcPushFloatStack(creg);
898 gJITC.nativeFloatRegState[r] = rsUnused;
899 }
900 return r;
901 }
902
903 JitcFloatReg FASTCALL jitcGetClientFloatRegister(int creg, int hint1, int hint2)
904 {
905 JitcFloatReg r = jitcGetClientFloatRegisterMapping(creg);
906 if (r == JITC_FLOAT_REG_NONE) {
907 if (gJITC.nativeFloatTOP == 8) {
908 jitcPopFloatStack(hint1, hint2);
909 }
910 r = jitcPushFloatStack(creg);
911 gJITC.clientFloatReg[creg] = r;
912 gJITC.nativeFloatRegStack[r] = creg;
913 gJITC.nativeFloatRegState[r] = rsMapped;
914 }
915 return r;
916 }
917
918 JitcFloatReg FASTCALL jitcMapClientFloatRegisterDirty(int creg, JitcFloatReg freg)
919 {
920 if (freg == JITC_FLOAT_REG_NONE) {
921 freg = jitcFloatRegisterFromNative(Float_ST0);
922 }
923 gJITC.clientFloatReg[creg] = freg;
924 gJITC.nativeFloatRegStack[freg] = creg;
925 gJITC.nativeFloatRegState[freg] = rsDirty;
926 return freg;
927 }
928
929 /*
930 *
931 */
932 NativeAddress FASTCALL asmHERE()
933 {
934 return gJITC.currentPage->tcp;
935 }
936
937 void FASTCALL asmNOP(int n)
938 {
939 if (n <= 0) return;
940 byte instr[15];
941 for (int i=0; i < (n-1); i++) {
942 instr[i] = 0x66;
943 }
944 instr[n-1] = 0x90;
945 jitcEmit(instr, n);
946 }
947
948 static void FASTCALL asmSimpleMODRM(uint8 opc, NativeReg reg1, NativeReg reg2)
949 {
950 byte instr[2] = {opc, 0xc0+(reg1<<3)+reg2};
951 jitcEmit(instr, sizeof(instr));
952 }
953
954 static void FASTCALL asmSimpleMODRM(uint8 opc, NativeReg16 reg1, NativeReg16 reg2)
955 {
956 byte instr[3] = {0x66, opc, 0xc0+(reg1<<3)+reg2};
957 jitcEmit(instr, sizeof(instr));
958 }
959
960 static void FASTCALL asmSimpleMODRM(uint8 opc, NativeReg8 reg1, NativeReg8 reg2)
961 {
962 byte instr[2] = {opc, 0xc0+(reg1<<3)+reg2};
963 jitcEmit(instr, sizeof(instr));
964 }
965
966 static void FASTCALL asmTEST_D(NativeReg reg1, uint32 imm)
967 {
968 if (reg1 <= EBX) {
969 if (imm <= 0xff) {
970 // test al, 1
971 if (reg1 == EAX) {
972 byte instr[2] = {0xa8, imm};
973 jitcEmit(instr, sizeof(instr));
974 } else {
975 byte instr[3] = {0xf6, 0xc0+reg1, imm};
976 jitcEmit(instr, sizeof(instr));
977 }
978 return;
979 } else if (!(imm & 0xffff00ff)) {
980 // test ah, 1
981 byte instr[3] = {0xf6, 0xc4+reg1, (imm>>8)};
982 jitcEmit(instr, sizeof(instr));
983 return;
984 }
985 }
986 // test eax, 1001
987 if (reg1 == EAX) {
988 byte instr[5];
989 instr[0] = 0xa9;
990 *((uint32 *)&instr[1]) = imm;
991 jitcEmit(instr, sizeof(instr));
992 } else {
993 byte instr[6];
994 instr[0] = 0xf7;
995 instr[1] = 0xc0+reg1;
996 *((uint32 *)&instr[2]) = imm;
997 jitcEmit(instr, sizeof(instr));
998 }
999 }
1000
1001 static void FASTCALL asmTEST_W(NativeReg16 reg1, uint16 imm)
1002 {
1003 if (reg1 <= BX) {
1004 if (imm <= 0xff) {
1005 // test al, 1
1006 if (reg1 == AX) {
1007 byte instr[2] = {0xa8, imm};
1008 jitcEmit(instr, sizeof(instr));
1009 } else {
1010 byte instr[3] = {0xf6, 0xc0+reg1, imm};
1011 jitcEmit(instr, sizeof(instr));
1012 }
1013 return;
1014 } else if (!(imm & 0xffff00ff)) {
1015 // test ah, 1
1016 byte instr[3] = {0xf6, 0xc4+reg1, (imm>>8)};
1017 jitcEmit(instr, sizeof(instr));
1018 return;
1019 }
1020 }
1021 // test eax, 1001
1022 if (reg1 == AX) {
1023 byte instr[4];
1024 instr[0] = 0x66;
1025 instr[1] = 0xa9;
1026 *((uint16 *)&instr[2]) = imm;
1027 jitcEmit(instr, sizeof(instr));
1028 } else {
1029 byte instr[5];
1030 instr[0] = 0x66;
1031 instr[1] = 0xf7;
1032 instr[2] = 0xc0+reg1;
1033 *((uint16 *)&instr[3]) = imm;
1034 jitcEmit(instr, sizeof(instr));
1035 }
1036 }
1037
1038 static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg reg1, NativeReg reg2)
1039 {
1040 byte instr[2] = {0x03+(opc<<3), 0xc0+(reg1<<3)+reg2};
1041 jitcEmit(instr, sizeof(instr));
1042 }
1043
1044 static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg16 reg1, NativeReg16 reg2)
1045 {
1046 byte instr[3] = {0x66, 0x03+(opc<<3), 0xc0+(reg1<<3)+reg2};
1047 jitcEmit(instr, sizeof(instr));
1048 }
1049
1050 static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg8 reg1, NativeReg8 reg2)
1051 {
1052 byte instr[2] = {0x02+(opc<<3), 0xc0+(reg1<<3)+reg2};
1053 jitcEmit(instr, sizeof(instr));
1054 }
1055
1056
1057 void FASTCALL asmALU(X86ALUopc opc, NativeReg reg1, NativeReg reg2)
1058 {
1059 switch (opc) {
1060 case X86_MOV:
1061 asmSimpleMODRM(0x8b, reg1, reg2);
1062 break;
1063 case X86_TEST:
1064 asmSimpleMODRM(0x85, reg1, reg2);
1065 break;
1066 case X86_XCHG:
1067 if (reg1 == EAX) {
1068 jitcEmit1(0x90+reg2);
1069 } else if (reg2 == EAX) {
1070 jitcEmit1(0x90+reg1);
1071 } else {
1072 asmSimpleMODRM(0x87, reg1, reg2);
1073 }
1074 break;
1075 default:
1076 asmSimpleALU(opc, reg1, reg2);
1077 }
1078 }
1079 void FASTCALL asmALURegReg(X86ALUopc opc, NativeReg reg1, NativeReg reg2)
1080 {
1081 asmALU(opc, reg1, reg2);
1082 }
1083
1084 void FASTCALL asmALU(X86ALUopc opc, NativeReg16 reg1, NativeReg16 reg2)
1085 {
1086 switch (opc) {
1087 case X86_MOV:
1088 asmSimpleMODRM(0x8b, reg1, reg2);
1089 break;
1090 case X86_TEST:
1091 asmSimpleMODRM(0x85, reg1, reg2);
1092 break;
1093 case X86_XCHG:
1094 if (reg1 == AX) {
1095 byte instr[2] = { 0x66, 0x90+reg2 };
1096 jitcEmit(instr, sizeof instr);
1097 } else if (reg2 == AX) {
1098 byte instr[2] = { 0x66, 0x90+reg1 };
1099 jitcEmit(instr, sizeof instr);
1100 } else {
1101 asmSimpleMODRM(0x87, reg1, reg2);
1102 }
1103 break;
1104 default:
1105 asmSimpleALU(opc, reg1, reg2);
1106 }
1107 }
1108 void FASTCALL asmALURegReg16(X86ALUopc opc, NativeReg reg1, NativeReg reg2)
1109 {
1110 asmALU(opc, (NativeReg16)reg1, (NativeReg16)reg2);
1111 }
1112
1113 void FASTCALL asmALU(X86ALUopc opc, NativeReg8 reg1, NativeReg8 reg2)
1114 {
1115 switch (opc) {
1116 case X86_MOV:
1117 asmSimpleMODRM(0x8a, reg1, reg2);
1118 break;
1119 case X86_TEST:
1120 asmSimpleMODRM(0x84, reg1, reg2);
1121 break;
1122 case X86_XCHG:
1123 asmSimpleMODRM(0x86, reg1, reg2);
1124 break;
1125 default:
1126 asmSimpleALU(opc, reg1, reg2);
1127 }
1128 }
1129 void FASTCALL asmALURegReg8(X86ALUopc opc, NativeReg8 reg1, NativeReg8 reg2)
1130 {
1131 asmALU(opc, reg1, reg2);
1132 }
1133
1134 void FASTCALL asmALU(X86ALUopc opc, NativeReg8 reg1, uint8 imm)
1135 {
1136 byte instr[5];
1137 switch (opc) {
1138 case X86_MOV:
1139 instr[0] = 0xb0 + reg1;
1140 instr[1] = imm;
1141 jitcEmit(instr, 2);
1142 break;
1143 case X86_TEST:
1144 if (reg1 == AL) {
1145 instr[0] = 0xa8;
1146 instr[1] = imm;
1147 jitcEmit(instr, 2);
1148 } else {
1149 instr[0] = 0xf6;
1150 instr[1] = 0xc0 + reg1;
1151 instr[2] = imm;
1152 jitcEmit(instr, 3);
1153 }
1154 break;
1155 case X86_XCHG:
1156 // internal error
1157 break;
1158 default: {
1159 if (reg1 == AL) {
1160 instr[0] = (opc<<3)|0x4;
1161 instr[1] = imm;
1162 jitcEmit(instr, 2);
1163 } else {
1164 instr[0] = 0x80;
1165 instr[1] = 0xc0+(opc<<3)+reg1;
1166 instr[2] = imm;
1167 jitcEmit(instr, 3);
1168 }
1169 break;
1170 }
1171 }
1172 }
1173 void FASTCALL asmALURegImm8(X86ALUopc opc, NativeReg8 reg1, uint8 imm)
1174 {
1175 asmALU(opc, reg1, imm);
1176 }
1177
1178 static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg reg1, uint32 imm)
1179 {
1180 if (imm <= 0x7f || imm >= 0xffffff80) {
1181 byte instr[3] = {0x83, 0xc0+(opc<<3)+reg1, imm};
1182 jitcEmit(instr, sizeof(instr));
1183 } else {
1184 if (reg1 == EAX) {
1185 byte instr[5];
1186 instr[0] = 0x05+(opc<<3);
1187 *((uint32 *)&instr[1]) = imm;
1188 jitcEmit(instr, sizeof(instr));
1189 } else {
1190 byte instr[6];
1191 instr[0] = 0x81;
1192 instr[1] = 0xc0+(opc<<3)+reg1;
1193 *((uint32 *)&instr[2]) = imm;
1194 jitcEmit(instr, sizeof(instr));
1195 }
1196 }
1197 }
1198
1199 static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg16 reg1, uint32 imm)
1200 {
1201 if (imm <= 0x7f || imm >= 0xffffff80) {
1202 byte instr[3] = {0x83, 0xc0+(opc<<3)+reg1, imm};
1203 jitcEmit(instr, sizeof(instr));
1204 } else {
1205 if (reg1 == AX) {
1206 byte instr[4];
1207 instr[0] = 0x66;
1208 instr[1] = 0x05+(opc<<3);
1209 *((uint16 *)&instr[2]) = imm;
1210 jitcEmit(instr, sizeof(instr));
1211 } else {
1212 byte instr[5];
1213 instr[0] = 0x66;
1214 instr[1] = 0x81;
1215 instr[2] = 0xc0+(opc<<3)+reg1;
1216 *((uint16 *)&instr[3]) = imm;
1217 jitcEmit(instr, sizeof(instr));
1218 }
1219 }
1220 }
1221
1222 void FASTCALL asmALU(X86ALUopc opc, NativeReg reg1, uint32 imm)
1223 {
1224 switch (opc) {
1225 case X86_MOV:
1226 if (imm == 0) {
1227 asmALU(X86_XOR, reg1, reg1);
1228 } else {
1229 asmMOV_NoFlags(reg1, imm);
1230 }
1231 break;
1232 case X86_XCHG:
1233 // internal error
1234 break;
1235 case X86_TEST:
1236 asmTEST_D(reg1, imm);
1237 break;
1238 case X86_CMP:
1239 // if (imm == 0) {
1240 // asmALU(X86_OR, reg1, reg1);
1241 // } else {
1242 asmSimpleALU(opc, reg1, imm);
1243 // }
1244 break;
1245 default:
1246 asmSimpleALU(opc, reg1, imm);
1247 }
1248 }
1249 void FASTCALL asmALURegImm(X86ALUopc opc, NativeReg reg1, uint32 imm)
1250 {
1251 asmALU(opc, reg1, imm);
1252 }
1253
1254 void FASTCALL asmALU(X86ALUopc opc, NativeReg16 reg1, uint16 imm)
1255 {
1256 switch (opc) {
1257 case X86_MOV:
1258 if (imm == 0) {
1259 asmALU(X86_XOR, reg1, reg1);
1260 } else {
1261 asmMOV_NoFlags(reg1, imm);
1262 }
1263 break;
1264 case X86_XCHG:
1265 // internal error
1266 break;
1267 case X86_TEST:
1268 asmTEST_W(reg1, imm);
1269 break;
1270 case X86_CMP:
1271 // if (imm == 0) {
1272 // asmALU(X86_OR, reg1, reg1);
1273 // } else {
1274 asmSimpleALU(opc, reg1, imm);
1275 // }
1276 break;
1277 default:
1278 asmSimpleALU(opc, reg1, imm);
1279 }
1280 }
1281 void FASTCALL asmALURegImm16(X86ALUopc opc, NativeReg reg1, uint32 imm)
1282 {
1283 asmALU(opc, (NativeReg16)reg1, imm);
1284 }
1285
1286 void FASTCALL asmMOV_NoFlags(NativeReg reg1, uint32 imm)
1287 {
1288 byte instr[5];
1289 instr[0] = 0xb8+reg1;
1290 *((uint32 *)&instr[1]) = imm;
1291 jitcEmit(instr, sizeof(instr));
1292 }
1293 void FASTCALL asmMOVRegImm_NoFlags(NativeReg reg1, uint32 imm)
1294 {
1295 asmMOV_NoFlags(reg1, imm);
1296 }
1297
1298 void FASTCALL asmMOV_NoFlags(NativeReg16 reg1, uint16 imm)
1299 {
1300 byte instr[4];
1301 instr[0] = 0x66;
1302 instr[1] = 0xb8+reg1;
1303 *((uint16 *)&instr[2]) = imm;
1304 jitcEmit(instr, sizeof(instr));
1305 }
1306 void FASTCALL asmMOVRegImm16_NoFlags(NativeReg reg1, uint16 imm)
1307 {
1308 asmMOV_NoFlags((NativeReg16)reg1, imm);
1309 }
1310
1311 void FASTCALL asmALU(X86ALUopc1 opc, NativeReg reg1)
1312 {
1313 byte instr[2];
1314 switch (opc) {
1315 case X86_NOT:
1316 instr[0] = 0xf7;
1317 instr[1] = 0xd0+reg1;
1318 break;
1319 case X86_NEG:
1320 instr[0] = 0xf7;
1321 instr[1] = 0xd8+reg1;
1322 break;
1323 case X86_MUL:
1324 instr[0] = 0xf7;
1325 instr[1] = 0xe0+reg1;
1326 break;
1327 case X86_IMUL:
1328 instr[0] = 0xf7;
1329 instr[1] = 0xe8+reg1;
1330 break;
1331 case X86_DIV:
1332 instr[0] = 0xf7;
1333 instr[1] = 0xf0+reg1;
1334 break;
1335 case X86_IDIV:
1336 instr[0] = 0xf7;
1337 instr[1] = 0xf8+reg1;
1338 break;
1339 }
1340 jitcEmit(instr, 2);
1341 }
1342 void FASTCALL asmALUReg(X86ALUopc1 opc, NativeReg reg1)
1343 {
1344 asmALU(opc, reg1);
1345 }
1346
1347 void FASTCALL asmALU(X86ALUopc1 opc, NativeReg16 reg1)
1348 {
1349 byte instr[3];
1350 instr[0] = 0x66;
1351
1352 switch (opc) {
1353 case X86_NOT:
1354 instr[1] = 0xf7;
1355 instr[2] = 0xd0+reg1;
1356 break;
1357 case X86_NEG:
1358 instr[1] = 0xf7;
1359 instr[2] = 0xd8+reg1;
1360 break;
1361 case X86_MUL:
1362 instr[1] = 0xf7;
1363 instr[2] = 0xe0+reg1;
1364 break;
1365 case X86_IMUL:
1366 instr[1] = 0xf7;
1367 instr[2] = 0xe8+reg1;
1368 break;
1369 case X86_DIV:
1370 instr[1] = 0xf7;
1371 instr[2] = 0xf0+reg1;
1372 break;
1373 case X86_IDIV:
1374 instr[1] = 0xf7;
1375 instr[2] = 0xf8+reg1;
1376 break;
1377 }
1378 jitcEmit(instr, 3);
1379 }
1380 void FASTCALL asmALUReg16(X86ALUopc1 opc, NativeReg reg1)
1381 {
1382 asmALU(opc, (NativeReg16)reg1);
1383 }
1384
1385 void FASTCALL asmALUMemReg(X86ALUopc opc, byte *modrm, int len, NativeReg reg2)
1386 {
1387 byte instr[15];
1388
1389 switch (opc) {
1390 case X86_MOV:
1391 instr[0] = 0x89;
1392 break;
1393 case X86_XCHG:
1394 instr[0] = 0x87;
1395 break;
1396 case X86_TEST:
1397 instr[0] = 0x85;
1398 break;
1399 default:
1400 instr[0] = 0x01+(opc<<3);
1401 }
1402 memcpy(&instr[1], modrm, len);
1403 instr[1] |= (reg2<<3);
1404 jitcEmit(instr, len+1);
1405 }
1406 void FASTCALL asmALU(X86ALUopc opc, modrm_p modrm, NativeReg reg2)
1407 {
1408 byte instr[15];
1409 int len = modrm++[0];
1410
1411 switch (opc) {
1412 case X86_MOV:
1413 instr[0] = 0x89;
1414 break;
1415 case X86_XCHG:
1416 instr[0] = 0x87;
1417 break;
1418 case X86_TEST:
1419 instr[0] = 0x85;
1420 break;
1421 default:
1422 instr[0] = 0x01+(opc<<3);
1423 }
1424 memcpy(&instr[1], modrm, len);
1425 instr[1] |= (reg2<<3);
1426 jitcEmit(instr, len+1);
1427 }
1428
1429 void FASTCALL asmALUMemReg16(X86ALUopc opc, byte *modrm, int len, NativeReg reg2)
1430 {
1431 byte instr[16];
1432
1433 instr[0] = 0x66;
1434 switch (opc) {
1435 case X86_MOV:
1436 instr[1] = 0x89;
1437 break;
1438 case X86_XCHG:
1439 instr[1] = 0x87;
1440 break;
1441 case X86_TEST:
1442 instr[1] = 0x85;
1443 break;
1444 default:
1445 instr[1] = 0x01+(opc<<3);
1446 }
1447 memcpy(&instr[2], modrm, len);
1448 instr[2] |= (reg2<<3);
1449 jitcEmit(instr, len+2);
1450 }
1451 void FASTCALL asmALU(X86ALUopc opc, modrm_p modrm, NativeReg16 reg2)
1452 {
1453 byte instr[16];
1454 int len = modrm++[0];
1455
1456 instr[0] = 0x66;
1457 switch (opc) {
1458 case X86_MOV:
1459 instr[1] = 0x89;
1460 break;
1461 case X86_XCHG:
1462 instr[1] = 0x87;
1463 break;
1464 case X86_TEST:
1465 instr[1] = 0x85;
1466 break;
1467 default:
1468 instr[1] = 0x01+(opc<<3);
1469 }
1470 memcpy(&instr[2], modrm, len);
1471 instr[2] |= (reg2<<3);
1472 jitcEmit(instr, len+2);
1473 }
1474
1475
1476 static void FASTCALL asmSimpleALU_D(X86ALUopc opc, byte *modrm, int len, uint32 imm)
1477 {
1478 byte instr[15];
1479
1480 if (imm <= 0x7f || imm >= 0xffffff80) {
1481 instr[0] = 0x83;
1482 memcpy(&instr[1], modrm, len);
1483 instr[1] |= (opc<<3);
1484 instr[len+1] = imm;
1485 jitcEmit(instr, len+2);
1486 } else {
1487 instr[0] = 0x81;
1488 memcpy(&instr[1], modrm, len);
1489 instr[1] |= (opc<<3);
1490 *((uint32 *)&instr[len+1]) = imm;
1491 jitcEmit(instr, len+5);
1492 }
1493 }
1494
1495 static void FASTCALL asmSimpleALU_W(X86ALUopc opc, byte *modrm, int len, uint16 imm)
1496 {
1497 byte instr[16];
1498
1499 instr[0] = 0x66;
1500
1501 if (imm <= 0x7f || imm >= 0xff80) {
1502 instr[0] = 0x83;
1503 memcpy(&instr[1], modrm, len);
1504 instr[1] |= (opc<<3);
1505 instr[len+1] = imm;
1506 jitcEmit(instr, len+2);
1507 } else {
1508 instr[1] = 0x81;
1509 memcpy(&instr[2], modrm, len);
1510 instr[2] |= (opc<<3);
1511 *((uint16 *)&instr[len+2]) = imm;
1512 jitcEmit(instr, len+4);
1513 }
1514 }
1515
1516 void FASTCALL asmALUMemImm(X86ALUopc opc, byte *modrm, int len, uint32 imm)
1517 {
1518 byte instr[15];
1519 switch (opc) {
1520 case X86_MOV: {
1521 instr[0] = 0xc7;
1522 memcpy(&instr[1], modrm, len);
1523 *((uint32 *)&instr[len+1]) = imm;
1524 jitcEmit(instr, len+5);
1525 break;
1526 }
1527 case X86_XCHG:
1528 // internal error
1529 break;
1530 case X86_TEST:
1531 instr[0] = 0xf7;
1532 memcpy(&instr[1], modrm, len);
1533 *((uint32 *)&instr[len+1]) = imm;
1534 jitcEmit(instr, len+5);
1535 break;
1536 default:
1537 asmSimpleALU_D(opc, modrm, len, imm);
1538 }
1539 }
1540
1541 void FASTCALL asmALU_D(X86ALUopc opc, modrm_p modrm, uint32 imm)
1542 {
1543 int len = modrm++[0];
1544
1545 asmALUMemImm(opc, modrm, len, imm);
1546 }
1547
1548 void FASTCALL asmALUMemImm16(X86ALUopc opc, byte *modrm, int len, uint16 imm)
1549 {
1550 byte instr[16];
1551 instr[0] = 0x66;
1552
1553 switch (opc) {
1554 case X86_MOV: {
1555 instr[1] = 0xc7;
1556 memcpy(&instr[2], modrm, len);
1557 *((uint16 *)&instr[len+2]) = imm;
1558 jitcEmit(instr, len+4);
1559 break;
1560 }
1561 case X86_XCHG:
1562 // internal error
1563 break;
1564 case X86_TEST:
1565 instr[1] = 0xf7;
1566 memcpy(&instr[2], modrm, len);
1567 *((uint16 *)&instr[len+2]) = imm;
1568 jitcEmit(instr, len+4);
1569 break;
1570 default:
1571 asmSimpleALU_W(opc, modrm, len, imm);
1572 }
1573 }
1574 void FASTCALL asmALU_W(X86ALUopc opc, modrm_p modrm, uint16 imm)
1575 {
1576 int len = modrm++[0];
1577
1578 asmALUMemImm16(opc, modrm, len, imm);
1579 }
1580
1581 void FASTCALL asmALURegMem(X86ALUopc opc, NativeReg reg1, byte *modrm, int len)
1582 {
1583 byte instr[15];
1584 switch (opc) {
1585 case X86_MOV:
1586 instr[0] = 0x8b;
1587 break;
1588 case X86_XCHG:
1589 // XCHG is symmetric
1590 instr[0] = 0x87;
1591 break;
1592 case X86_TEST:
1593 // TEST is symmetric
1594 instr[0] = 0x85;
1595 break;
1596 default:
1597 instr[0] = 0x03+(opc<<3);
1598 }
1599 memcpy(&instr[1], modrm, len);
1600 instr[1] |= (reg1<<3);
1601 jitcEmit(instr, len+1);
1602 }
1603 void FASTCALL asmALU(X86ALUopc opc, NativeReg reg1, modrm_p modrm)
1604 {
1605 int len = modrm++[0];
1606
1607 asmALURegMem(opc, reg1, modrm, len);
1608 }
1609
1610 void FASTCALL asmALURegMem16(X86ALUopc opc, NativeReg reg1, byte *modrm, int len)
1611 {
1612 byte instr[16];
1613 instr[0] = 0x66;
1614 switch (opc) {
1615 case X86_MOV:
1616 instr[1] = 0x8b;
1617 break;
1618 case X86_XCHG:
1619 // XCHG is symmetric
1620 instr[1] = 0x87;
1621 break;
1622 case X86_TEST:
1623 // TEST is symmetric
1624 instr[1] = 0x85;
1625 break;
1626 default:
1627 instr[1] = 0x03+(opc<<3);
1628 }
1629 memcpy(&instr[2], modrm, len);
1630 instr[2] |= (reg1<<3);
1631 jitcEmit(instr, len+2);
1632 }
1633 void FASTCALL asmALU(X86ALUopc opc, NativeReg16 reg1, modrm_p modrm)
1634 {
1635 int len = modrm++[0];
1636
1637 asmALURegMem16(opc, (NativeReg)reg1, modrm, len);
1638 }
1639
1640 void FASTCALL asmALURegMem8(X86ALUopc opc, NativeReg8 reg1, byte *modrm, int len)
1641 {
1642 byte instr[15];
1643 switch (opc) {
1644 case X86_MOV:
1645 instr[0] = 0x8a;
1646 break;
1647 case X86_XCHG:
1648 // XCHG is symmetric
1649 instr[0] = 0x86;
1650 break;
1651 case X86_TEST:
1652 // TEST is symmetric
1653 instr[0] = 0x84;
1654 break;
1655 default:
1656 instr[0] = 0x02+(opc<<3);
1657 }
1658 memcpy(&instr[1], modrm, len);
1659 instr[1] |= (reg1<<3);
1660 jitcEmit(instr, len+1);
1661 }
1662 void FASTCALL asmALU(X86ALUopc opc, NativeReg8 reg1, modrm_p modrm)
1663 {
1664 int len = modrm++[0];
1665
1666 asmALURegMem8(opc, reg1, modrm, len);
1667 }
1668
1669 void FASTCALL asmALUMemReg8(X86ALUopc opc, byte *modrm, int len, NativeReg8 reg2)
1670 {
1671 byte instr[15];
1672 switch (opc) {
1673 case X86_MOV:
1674 instr[0] = 0x88;
1675 break;
1676 case X86_XCHG:
1677 instr[0] = 0x86;
1678 break;
1679 case X86_TEST:
1680 instr[0] = 0x84;
1681 break;
1682 default:
1683 instr[0] = 0x00+(opc<<3);
1684 }
1685 memcpy(&instr[1], modrm, len);
1686 instr[1] |= (reg2<<3);
1687 jitcEmit(instr, len+1);
1688 }
1689 void FASTCALL asmALU(X86ALUopc opc, modrm_p modrm, NativeReg8 reg2)
1690 {
1691 int len = modrm++[0];
1692
1693 asmALUMemReg8(opc, modrm, len, reg2);
1694 }
1695
1696 void FASTCALL asmALUMemImm8(X86ALUopc opc, byte *modrm, int len, uint8 imm)
1697 {
1698 byte instr[15];
1699 switch (opc) {
1700 case X86_MOV:
1701 instr[0] = 0xc6;
1702 break;
1703 case X86_XCHG:
1704 // internal error
1705 break;
1706 case X86_TEST:
1707 instr[0] = 0xf6;
1708 break;
1709 default:
1710 instr[0] = 0x80;
1711 memcpy(&instr[1], modrm, len);
1712 instr[1] |= (opc<<3);
1713 instr[len+1] = imm;
1714 jitcEmit(instr, len+2);
1715 return;
1716 }
1717 memcpy(&instr[1], modrm, len);
1718 instr[len+1] = imm;
1719 jitcEmit(instr, len+2);
1720 }
1721 void FASTCALL asmALU_B(X86ALUopc opc, modrm_p modrm, uint8 imm)
1722 {
1723 int len = modrm++[0];
1724
1725 asmALUMemImm8(opc, modrm, len, imm);
1726 }
1727
1728 void FASTCALL asmMOV(const void *disp, NativeReg reg1)
1729 {
1730 byte instr[6];
1731 if (reg1==EAX) {
1732 instr[0] = 0xa3;
1733 *((uint32 *)&instr[1]) = (uint32)disp;
1734 jitcEmit(instr, 5);
1735 } else {
1736 instr[0] = 0x89;
1737 instr[1] = 0x05 | (reg1 << 3);
1738 *((uint32 *)&instr[2]) = (uint32)disp;
1739 jitcEmit(instr, 6);
1740 }
1741 }
1742 void FASTCALL asmMOVDMemReg(uint32 disp, NativeReg reg1)
1743 {
1744 asmMOV((const void *)disp, reg1);
1745 }
1746
1747 void FASTCALL asmMOV(const void *disp, NativeReg16 reg1)
1748 {
1749 byte instr[7];
1750 instr[0] = 0x66;
1751 if (reg1==AX) {
1752 instr[1] = 0xa3;
1753 *((uint32 *)&instr[2]) = (uint32)disp;
1754 jitcEmit(instr, 6);
1755 } else {
1756 instr[1] = 0x89;
1757 instr[2] = 0x05 | (reg1 << 3);
1758 *((uint32 *)&instr[3]) = (uint32)disp;
1759 jitcEmit(instr, 7);
1760 }
1761 }
1762 void FASTCALL asmMOVDMemReg16(uint32 disp, NativeReg reg1)
1763 {
1764 asmMOV((const void *)disp, (NativeReg16)reg1);
1765 }
1766
1767 void FASTCALL asmMOV(NativeReg reg1, const void *disp)
1768 {
1769 byte instr[6];
1770 if (reg1==EAX) {
1771 instr[0] = 0xa1;
1772 *((uint32 *)&instr[1]) = (uint32)disp;
1773 jitcEmit(instr, 5);
1774 } else {
1775 instr[0] = 0x8b;
1776 instr[1] = 0x05 | (reg1 << 3);
1777 *((uint32 *)&instr[2]) = (uint32)disp;
1778 jitcEmit(instr, 6);
1779 }
1780 }
1781 void FASTCALL asmMOVRegDMem(NativeReg reg1, uint32 disp)
1782 {
1783 asmMOV(reg1, (const void *)disp);
1784 }
1785
1786 void FASTCALL asmMOV(NativeReg16 reg1, const void *disp)
1787 {
1788 byte instr[7];
1789 instr[0] = 0x66;
1790 if (reg1==AX) {
1791 instr[1] = 0xa1;
1792 *((uint32 *)&instr[2]) = (uint32)disp;
1793 jitcEmit(instr, 6);
1794 } else {
1795 instr[1] = 0x8b;
1796 instr[2] = 0x05 | (reg1 << 3);
1797 *((uint32 *)&instr[3]) = (uint32)disp;
1798 jitcEmit(instr, 7);
1799 }
1800 }
1801 void FASTCALL asmMOVRegDMem16(NativeReg reg1, uint32 disp)
1802 {
1803 asmMOV((NativeReg16)reg1, (const void *)disp);
1804 }
1805
1806 void FASTCALL asmTEST(const void *disp, uint32 imm)
1807 {
1808 byte instr[15];
1809 instr[1] = 0x05;
1810 if (!(imm & 0xffffff00)) {
1811 instr[0] = 0xf6;
1812 *((uint32 *)&instr[2]) = (uint32)disp;
1813 instr[6] = imm;
1814 } else if (!(imm & 0xffff00ff)) {
1815 instr[0] = 0xf6;
1816 *((uint32 *)&instr[2]) = (uint32)disp+1;
1817 instr[6] = imm >> 8;
1818 } else if (!(imm & 0xff00ffff)) {
1819 instr[0] = 0xf6;
1820 *((uint32 *)&instr[2]) = (uint32)disp+2;
1821 instr[6] = imm >> 16;
1822 } else if (!(imm & 0x00ffffff)) {
1823 instr[0] = 0xf6;
1824 *((uint32 *)&instr[2]) = (uint32)disp+3;
1825 instr[6] = imm >> 24;
1826 } else {
1827 instr[0] = 0xf7;
1828 *((uint32 *)&instr[2]) = (uint32)disp;
1829 *((uint32 *)&instr[6]) = imm;
1830 jitcEmit(instr, 10);
1831 return;
1832 }
1833 jitcEmit(instr, 7);
1834 }
1835 void FASTCALL asmTESTDMemImm(uint32 disp, uint32 imm)
1836 {
1837 asmTEST((const void *)disp, imm);
1838 }
1839
1840 void FASTCALL asmAND(const void *disp, uint32 imm)
1841 {
1842 byte instr[15];
1843 instr[1] = 0x25;
1844 if ((imm & 0xffffff00)==0xffffff00) {
1845 instr[0] = 0x80;
1846 *((uint32 *)&instr[2]) = (uint32)disp;
1847 instr[6] = imm;
1848 } else if ((imm & 0xffff00ff)==0xffff00ff) {
1849 instr[0] = 0x80;
1850 *((uint32 *)&instr[2]) = (uint32)disp+1;
1851 instr[6] = imm >> 8;
1852 } else if ((imm & 0xff00ffff)==0xff00ffff) {
1853 instr[0] = 0x80;
1854 *((uint32 *)&instr[2]) = (uint32)disp+2;
1855 instr[6] = imm >> 16;
1856 } else if ((imm & 0x00ffffff)==0x00ffffff) {
1857 instr[0] = 0x80;
1858 *((uint32 *)&instr[2]) = (uint32)disp+3;
1859 instr[6] = imm >> 24;
1860 } else {
1861 instr[0] = 0x81;
1862 *((uint32 *)&instr[2]) = (uint32)disp;
1863 *((uint32 *)&instr[6]) = imm;
1864 jitcEmit(instr, 10);
1865 return;
1866 }
1867 jitcEmit(instr, 7);
1868 }
1869 void FASTCALL asmANDDMemImm(uint32 disp, uint32 imm)
1870 {
1871 asmAND((const void *)disp, imm);
1872 }
1873
1874 void FASTCALL asmOR(const void *disp, uint32 imm)
1875 {
1876 byte instr[15];
1877 instr[1] = 0x0d;
1878 if (!(imm & 0xffffff00)) {
1879 instr[0] = 0x80;
1880 *((uint32 *)&instr[2]) = (uint32)disp;
1881 instr[6] = imm;
1882 } else if (!(imm & 0xffff00ff)) {
1883 instr[0] = 0x80;
1884 *((uint32 *)&instr[2]) = (uint32)disp+1;
1885 instr[6] = imm >> 8;
1886 } else if (!(imm & 0xff00ffff)) {
1887 instr[0] = 0x80;
1888 *((uint32 *)&instr[2]) = (uint32)disp+2;
1889 instr[6] = imm >> 16;
1890 } else if (!(imm & 0x00ffffff)) {
1891 instr[0] = 0x80;
1892 *((uint32 *)&instr[2]) = (uint32)disp+3;
1893 instr[6] = imm >> 24;
1894 } else {
1895 instr[0] = 0x81;
1896 *((uint32 *)&instr[2]) = (uint32)disp;
1897 *((uint32 *)&instr[6]) = imm;
1898 jitcEmit(instr, 10);
1899 return;
1900 }
1901 jitcEmit(instr, 7);
1902 }
1903 void FASTCALL asmORDMemImm(uint32 disp, uint32 imm)
1904 {
1905 asmOR((const void *)disp, imm);
1906 }
1907
1908
1909 void FASTCALL asmMOVxx(X86MOVxx opc, NativeReg reg1, NativeReg8 reg2)
1910 {
1911 byte instr[3] = {0x0f, opc, 0xc0+(reg1<<3)+reg2};
1912 jitcEmit(instr, sizeof(instr));
1913 }
1914 void FASTCALL asmMOVxxRegReg8(X86MOVxx opc, NativeReg reg1, NativeReg8 reg2)
1915 {
1916 asmMOVxx(opc, reg1, reg2);
1917 }
1918
1919 void FASTCALL asmMOVxx(X86MOVxx opc, NativeReg reg1, NativeReg16 reg2)
1920 {
1921 byte instr[3] = {0x0f, opc+1, 0xc0+(reg1<<3)+reg2};
1922 jitcEmit(instr, sizeof(instr));
1923 }
1924 void FASTCALL asmMOVxxRegReg16(X86MOVxx opc, NativeReg reg1, NativeReg reg2)
1925 {
1926 asmMOVxx(opc, reg1, (NativeReg16)reg2);
1927 }
1928
1929 void FASTCALL asmMOVxxRegMem8(X86MOVxx opc, NativeReg reg1, byte *modrm, int len)
1930 {
1931 byte instr[16] = { 0x0f };
1932
1933 instr[1] = opc;
1934 memcpy(&instr[2], modrm, len);
1935 instr[2] |= (reg1 << 3);
1936
1937 jitcEmit(instr, len+2);
1938 }
1939 void FASTCALL asmMOVxx_B(X86MOVxx opc, NativeReg reg1, modrm_p modrm)
1940 {
1941 int len = modrm++[0];
1942
1943 asmMOVxxRegMem8(opc, reg1, modrm, len);
1944 }
1945
1946 void FASTCALL asmMOVxxRegMem16(X86MOVxx opc, NativeReg reg1, byte *modrm, int len)
1947 {
1948 byte instr[16] = { 0x0f };
1949
1950 instr[1] = opc+1;
1951 memcpy(&instr[2], modrm, len);
1952 instr[2] |= (reg1 << 3);
1953
1954 jitcEmit(instr, len+2);
1955 }
1956 void FASTCALL asmMOVxx_W(X86MOVxx opc, NativeReg reg1, modrm_p modrm)
1957 {
1958 int len = modrm++[0];
1959
1960 asmMOVxxRegMem16(opc, reg1, modrm, len);
1961 }
1962
1963 void FASTCALL asmSET(X86FlagTest flags, NativeReg8 reg1)
1964 {
1965 byte instr[3] = {0x0f, 0x90+flags, 0xc0+reg1};
1966 jitcEmit(instr, sizeof(instr));
1967 }
1968 void FASTCALL asmSETReg8(X86FlagTest flags, NativeReg8 reg1)
1969 {
1970 asmSET(flags, reg1);
1971 }
1972
1973 void FASTCALL asmSETMem(X86FlagTest flags, byte *modrm, int len)
1974 {
1975 byte instr[15];
1976 instr[0] = 0x0f;
1977 instr[1] = 0x90+flags;
1978 memcpy(instr+2, modrm, len);
1979 jitcEmit(instr, len+2);
1980 }
1981 void FASTCALL asmSET(X86FlagTest flags, modrm_p modrm)
1982 {
1983 int len = modrm++[0];
1984
1985 asmSETMem(flags, modrm, len);
1986 }
1987
1988 void FASTCALL asmCMOV(X86FlagTest flags, NativeReg reg1, NativeReg reg2)
1989 {
1990 if (gJITC.hostCPUCaps.cmov) {
1991 byte instr[3] = {0x0f, 0x40+flags, 0xc0+(reg1<<3)+reg2};
1992 jitcEmit(instr, sizeof(instr));
1993 } else {
1994 byte instr[4] = {
1995 0x70+(flags ^ 1), 0x02, // jnCC $+2
1996 0x8b, 0xc0+(reg1<<3)+reg2, // mov reg1, reg2
1997 };
1998 jitcEmit(instr, sizeof instr);
1999 }
2000 }
2001 void FASTCALL asmCMOVRegReg(X86FlagTest flags, NativeReg reg1, NativeReg reg2)
2002 {
2003 asmCMOV(flags, reg1, reg2);
2004 }
2005
2006 void FASTCALL asmCMOVRegMem(X86FlagTest flags, NativeReg reg1, byte *modrm, int len)
2007 {
2008 if (gJITC.hostCPUCaps.cmov) {
2009 byte instr[16] = {0x0f, 0x40+flags };
2010 memcpy(&instr[2], modrm, len);
2011 instr[2] |= (reg1<<3);
2012 jitcEmit(instr, len+2);
2013 } else {
2014 byte instr[17] = {
2015 0x70+(flags ^ 1), 1 + len, // jnCC $+2
2016 0x8b, // mov reg1, *
2017 };
2018 memcpy(&instr[3], modrm, len);
2019 instr[3] |= (reg1<<3);
2020 jitcEmit(instr, len+3);
2021 }
2022 }
2023 void FASTCALL asmCMOV(X86FlagTest flags, NativeReg reg1, modrm_p modrm)
2024 {
2025 int len = modrm++[0];
2026
2027 asmCMOVRegMem(flags, reg1, modrm, len);
2028 }
2029
2030 void FASTCALL asmShift(X86ShiftOpc opc, NativeReg reg1, uint32 imm)
2031 {
2032 if (imm == 1) {
2033 byte instr[2] = {0xd1, 0xc0+opc+reg1};
2034 jitcEmit(instr, sizeof(instr));
2035 } else {
2036 byte instr[3] = {0xc1, 0xc0+opc+reg1, imm};
2037 jitcEmit(instr, sizeof(instr));
2038 }
2039 }
2040 void FASTCALL asmShiftRegImm(X86ShiftOpc opc, NativeReg reg1, uint32 imm)
2041 {
2042 asmShift(opc, reg1, imm);
2043 }
2044
2045 void FASTCALL asmShift_CL(X86ShiftOpc opc, NativeReg reg1)
2046 {
2047 // 0xd3 [ModR/M]
2048 byte instr[2] = {0xd3, 0xc0+opc+reg1};
2049 jitcEmit(instr, sizeof(instr));
2050 }
2051 void FASTCALL asmShiftRegCL(X86ShiftOpc opc, NativeReg reg1)
2052 {
2053 asmShift_CL(opc, reg1);
2054 }
2055
2056 void FASTCALL asmShift(X86ShiftOpc opc, NativeReg16 reg1, uint32 imm)
2057 {
2058 if (imm == 1) {
2059 byte instr[3] = {0x66, 0xd1, 0xc0+opc+reg1};
2060 jitcEmit(instr, sizeof(instr));
2061 } else {
2062 byte instr[4] = {0x66, 0xc1, 0xc0+opc+reg1, imm};
2063 jitcEmit(instr, sizeof(instr));
2064 }
2065 }
2066 void FASTCALL asmShiftReg16Imm(X86ShiftOpc opc, NativeReg reg1, uint32 imm)
2067 {
2068 asmShift(opc, (NativeReg16)reg1, imm);
2069 }
2070
2071 void FASTCALL asmShift_CL(X86ShiftOpc opc, NativeReg16 reg1)
2072 {
2073 // 0xd3 [ModR/M]
2074 byte instr[3] = {0x66, 0xd3, 0xc0+opc+reg1};
2075 jitcEmit(instr, sizeof(instr));
2076 }
2077 void FASTCALL asmShiftReg16CL(X86ShiftOpc opc, NativeReg reg1)
2078 {
2079 asmShift_CL(opc, (NativeReg16)reg1);
2080 }
2081
2082 void FASTCALL asmShift(X86ShiftOpc opc, NativeReg8 reg1, uint32 imm)
2083 {
2084 if (imm == 1) {
2085 byte instr[2] = {0xd0, 0xc0+opc+reg1};
2086 jitcEmit(instr, sizeof(instr));
2087 } else {
2088 byte instr[3] = {0xc0, 0xc0+opc+reg1, imm};
2089 jitcEmit(instr, sizeof(instr));
2090 }
2091 }
2092 void FASTCALL asmShiftReg8Imm(X86ShiftOpc opc, NativeReg8 reg1, uint32 imm)
2093 {
2094 asmShift(opc, reg1, imm);
2095 }
2096
2097 void FASTCALL asmShift_CL(X86ShiftOpc opc, NativeReg8 reg1)
2098 {
2099 // 0xd3 [ModR/M]
2100 byte instr[2] = {0xd2, 0xc0+opc+reg1};
2101 jitcEmit(instr, sizeof(instr));
2102 }
2103 void FASTCALL asmShiftReg8CL(X86ShiftOpc opc, NativeReg8 reg1)
2104 {
2105 asmShift_CL(opc, reg1);
2106 }
2107
2108 void FASTCALL asmIMUL(NativeReg reg1, NativeReg reg2, uint32 imm)
2109 {
2110 if (imm <= 0x7f || imm >= 0xffffff80) {
2111 byte instr[3] = {0x6b, 0xc0+(reg1<<3)+reg2, imm};
2112 jitcEmit(instr, sizeof(instr));
2113 } else {
2114 byte instr[6] = {0x69, 0xc0+(reg1<<3)+reg2};
2115 *((uint32*)(&instr[2])) = imm;
2116 jitcEmit(instr, sizeof(instr));
2117 }
2118 }
2119
2120 void FASTCALL asmIMUL(NativeReg reg1, NativeReg reg2)
2121 {
2122 byte instr[3] = {0x0f, 0xaf, 0xc0+(reg1<<3)+reg2};
2123 jitcEmit(instr, sizeof(instr));
2124 }
2125
2126 void FASTCALL asmIMULRegRegImm(NativeReg reg1, NativeReg reg2, uint32 imm)
2127 {
2128 asmIMUL(reg1, reg2, imm);
2129 }
2130
2131 void FASTCALL asmIMULRegReg(NativeReg reg1, NativeReg reg2)
2132 {
2133 asmIMUL(reg1, reg2);
2134 }
2135
2136 void FASTCALL asmINC(NativeReg reg1)
2137 {
2138 jitcEmit1(0x40+reg1);
2139 }
2140 void FASTCALL asmINCReg(NativeReg reg1)
2141 {
2142 asmINC(reg1);
2143 }
2144
2145 void FASTCALL asmDECReg(NativeReg reg1)
2146 {
2147 jitcEmit1(0x48+reg1);
2148 }
2149 void FASTCALL asmDEC(NativeReg reg1)
2150 {
2151 asmDEC(reg1);
2152 }
2153
2154 void FASTCALL asmLEA(NativeReg reg1, byte *modrm, int len)
2155 {
2156 byte instr[15];
2157 instr[0] = 0x8d;
2158 memcpy(instr+1, modrm, len);
2159 instr[1] |= reg1<<3;
2160 jitcEmit(instr, len+1);
2161 }
2162 void FASTCALL asmLEA(NativeReg reg1, modrm_p modrm)
2163 {
2164 int len = modrm++[0];
2165
2166 asmLEA(reg1, modrm, len);
2167 }
2168
2169 void FASTCALL asmBTx(X86BitTest opc, NativeReg reg1, int value)
2170 {
2171 byte instr[4] = {0x0f, 0xba, 0xc0+(opc<<3)+reg1, value};
2172 jitcEmit(instr, sizeof instr);
2173 }
2174 void FASTCALL asmBTxRegImm(X86BitTest opc, NativeReg reg1, int value)
2175 {
2176 asmBTx(opc, reg1, value);
2177 }
2178
2179 void FASTCALL asmBTxMemImm(X86BitTest opc, byte *modrm, int len, int value)
2180 {
2181 byte instr[15];
2182 instr[0] = 0x0f;
2183 instr[1] = 0xba;
2184 memcpy(instr+2, modrm, len);
2185 instr[2] |= opc<<3;
2186 instr[len+2] = value;
2187 jitcEmit(instr, len+3);
2188 }
2189 void FASTCALL asmBTx(X86BitTest opc, modrm_p modrm, int value)
2190 {
2191 int len = modrm++[0];
2192
2193 asmBTxMemImm(opc, modrm, len, value);
2194 }
2195
2196 void FASTCALL asmBSx(X86BitSearch opc, NativeReg reg1, NativeReg reg2)
2197 {
2198 byte instr[3] = {0x0f, opc, 0xc0+(reg1<<3)+reg2};
2199 jitcEmit(instr, sizeof(instr));
2200 }
2201 void FASTCALL asmBSxRegReg(X86BitSearch opc, NativeReg reg1, NativeReg reg2)
2202 {
2203 asmBSx(opc, reg1, reg2);
2204 }
2205
2206 void FASTCALL asmBSWAP(NativeReg reg)
2207 {
2208 byte instr[2];
2209 instr[0] = 0x0f;
2210 instr[1] = 0xc8+reg;
2211 jitcEmit(instr, sizeof(instr));
2212 }
2213
2214 void FASTCALL asmJMP(NativeAddress to)
2215 {
2216 /*
2217 * We use jitcEmitAssure here, since
2218 * we have to know the exact address of the jump
2219 * instruction (since it is relative)
2220 */
2221 restart:
2222 byte instr[5];
2223 uint32 rel = (uint32)(to - (gJITC.currentPage->tcp+2));
2224 if (rel <= 0x7f || rel >= 0xffffff80) {
2225 if (!jitcEmitAssure(2)) goto restart;
2226 instr[0] = 0xeb;
2227 instr[1] = rel;
2228 jitcEmit(instr, 2);
2229 } else {
2230 if (!jitcEmitAssure(5)) goto restart;
2231 instr[0] = 0xe9;
2232 *((uint32 *)&instr[1]) = (uint32)(to - (gJITC.currentPage->tcp+5));
2233 // *((uint32 *)&instr[1]) = rel - 3;
2234 jitcEmit(instr, 5);
2235 }
2236 }
2237
2238 void FASTCALL asmJxx(X86FlagTest flags, NativeAddress to)
2239 {
2240 restart:
2241 byte instr[6];
2242 uint32 rel = (uint32)(to - (gJITC.currentPage->tcp+2));
2243 if (rel <= 0x7f || rel >= 0xffffff80) {
2244 if (!jitcEmitAssure(2)) goto restart;
2245 instr[0] = 0x70+flags;
2246 instr[1] = rel;
2247 jitcEmit(instr, 2);
2248 } else {
2249 if (!jitcEmitAssure(6)) goto restart;
2250 instr[0] = 0x0f;
2251 instr[1] = 0x80+flags;
2252 *((uint32 *)&instr[2]) = (uint32)(to - (gJITC.currentPage->tcp+6));
2253 // *((uint32 *)&instr[2]) = rel - 3;
2254 jitcEmit(instr, 6);
2255 }
2256 }
2257
2258 NativeAddress FASTCALL asmJMPFixup()
2259 {
2260 byte instr[5];
2261 instr[0] = 0xe9;
2262 jitcEmit(instr, 5);
2263 return gJITC.currentPage->tcp - 4;
2264 }
2265
2266 NativeAddress FASTCALL asmJxxFixup(X86FlagTest flags)
2267 {
2268 byte instr[6];
2269 instr[0] = 0x0f;
2270 instr[1] = 0x80+flags;
2271 jitcEmit(instr, 6);
2272 return gJITC.currentPage->tcp - 4;
2273 }
2274
2275 void FASTCALL asmResolveFixup(NativeAddress at, NativeAddress to)
2276 {
2277 /*
2278 * yes, I also didn't believe this could be real code until
2279 * I had written it. -Sebastian
2280 */
2281 if (to == 0) {
2282 to = gJITC.currentPage->tcp;
2283 }
2284 *((uint32 *)at) = (uint32)(to - ((uint32)at+4));
2285 }
2286
2287 void FASTCALL asmCALL(NativeAddress to)
2288 {
2289 jitcEmitAssure(5);
2290 byte instr[5];
2291 instr[0] = 0xe8;
2292 *((uint32 *)&instr[1]) = (uint32)(to - (gJITC.currentPage->tcp+5));
2293 jitcEmit(instr, 5);
2294 }
2295
2296 void FASTCALL asmSimple(X86SimpleOpc simple)
2297 {
2298 if (simple > 0xff) {
2299 jitcEmit((byte*)&simple, 2);
2300 } else {
2301 jitcEmit1(simple);
2302 }
2303 }
2304
2305 void FASTCALL asmFComp(X86FloatCompOp op, NativeFloatReg sti)
2306 {
2307 byte instr[2];
2308
2309 memcpy(instr, &op, 2);
2310 instr[1] += sti;
2311
2312 jitcEmit(instr, 2);
2313 }
2314 void FASTCALL asmFCompSTi(X86FloatCompOp op, NativeFloatReg sti)
2315 {
2316 asmFComp(op, sti);
2317 }
2318
2319 void FASTCALL asmFICompMem(X86FloatICompOp op, byte *modrm, int len)
2320 {
2321 byte instr[16];
2322
2323 instr[0] = op;
2324 memcpy(&instr[1], modrm, len);
2325 instr[1] |= 2<<3;
2326 jitcEmit(instr, len+1);
2327 }
2328 void FASTCALL asmFIComp(X86FloatICompOp op, modrm_p modrm)
2329 {
2330 int len = modrm++[0];
2331
2332 asmFICompMem(op, modrm, len);
2333 }
2334
2335 void FASTCALL asmFICompPMem(X86FloatICompOp op, byte *modrm, int len)
2336 {
2337 byte instr[16];
2338
2339 instr[0] = op;
2340 memcpy(&instr[1], modrm, len);
2341 instr[1] |= 3<<3;
2342 jitcEmit(instr, len+1);
2343 }
2344 void FASTCALL asmFICompP(X86FloatICompOp op, modrm_p modrm)
2345 {
2346 int len = modrm++[0];
2347
2348 asmFICompPMem(op, modrm, len);
2349 }
2350
2351 void FASTCALL asmFArithMem(X86FloatArithOp op, byte *modrm, int len)
2352 {
2353 int mod = 0;
2354 switch (op) {
2355 case X86_FADD:
2356 mod = 0;
2357 break;
2358 case X86_FMUL:
2359 mod = 1;
2360 break;
2361 case X86_FDIV:
2362 mod = 6;
2363 break;
2364 case X86_FDIVR:
2365 mod = 7;
2366 break;
2367 case X86_FSUB:
2368 mod = 4;
2369 break;
2370 case X86_FSUBR:
2371 mod = 5;
2372 break;
2373 }
2374 byte instr[15];
2375 instr[0] = 0xdc;
2376 memcpy(instr+1, modrm, len);
2377 instr[1] |= mod<<3;
2378 jitcEmit(instr, len+1);
2379 }
2380 void FASTCALL asmFArith(X86FloatArithOp op, modrm_p modrm)
2381 {
2382 int len = modrm++[0];
2383
2384 asmFArithMem(op, modrm, len);
2385 }
2386
2387 void FASTCALL asmFArith_ST0(X86FloatArithOp op, NativeFloatReg sti)
2388 {
2389 byte instr[2] = {0xd8, op+sti};
2390 jitcEmit(instr, sizeof instr);
2391 }
2392 void FASTCALL asmFArithST0(X86FloatArithOp op, NativeFloatReg sti)
2393 {
2394 asmFArith_ST0(op, sti);
2395 }
2396
2397 void FASTCALL asmFArith_STi(X86FloatArithOp op, NativeFloatReg sti)
2398 {
2399 byte instr[2] = {0xdc, op+sti};
2400 jitcEmit(instr, sizeof instr);
2401 }
2402 void FASTCALL asmFArithSTi(X86FloatArithOp op, NativeFloatReg sti)
2403 {
2404 asmFArith_STi(op, sti);
2405 }
2406
2407 void FASTCALL asmFArithP_STi(X86FloatArithOp op, NativeFloatReg sti)
2408 {
2409 byte instr[2] = {0xde, op+sti};
2410 jitcEmit(instr, sizeof instr);
2411 }
2412 void FASTCALL asmFArithSTiP(X86FloatArithOp op, NativeFloatReg sti)
2413 {
2414 asmFArithP_STi(op, sti);
2415 }
2416
2417 void FASTCALL asmFXCH(NativeFloatReg sti)
2418 {
2419 byte instr[2] = {0xd9, 0xc8+sti};
2420 jitcEmit(instr, sizeof instr);
2421 }
2422 void FASTCALL asmFXCHSTi(NativeFloatReg sti)
2423 {
2424 asmFXCH(sti);
2425 }
2426
2427 void FASTCALL asmFFREE(NativeFloatReg sti)
2428 {
2429 byte instr[2] = {0xdd, 0xc0+sti};
2430 jitcEmit(instr, sizeof instr);
2431 }
2432 void FASTCALL asmFFREESTi(NativeFloatReg sti)
2433 {
2434 asmFFREE(sti);
2435 }
2436
2437 void FASTCALL asmFFREEP(NativeFloatReg sti)
2438 {
2439 /*
2440 * AMD says:
2441 * "Note that the FREEP instructions, although insufficiently
2442 * documented in the past, is supported by all 32-bit x86 processors."
2443 */
2444 byte instr[2] = {0xdf, 0xc0+sti};
2445 jitcEmit(instr, sizeof instr);
2446 }
2447 void FASTCALL asmFFREEPSTi(NativeFloatReg sti)
2448 {
2449 asmFFREEP(sti);
2450 }
2451
2452 void FASTCALL asmFSimple(X86FloatOp op)
2453 {
2454 jitcEmit((byte*)&op, 2);
2455 }
2456 void FASTCALL asmFSimpleST0(X86FloatOp op)
2457 {
2458 asmFSimple(op);
2459 }
2460
2461 void FASTCALL asmFLDSingleMem(byte *modrm, int len)
2462 {
2463 byte instr[15];
2464 instr[0] = 0xd9;
2465 memcpy(instr+1, modrm, len);
2466 jitcEmit(instr, len+1);
2467 }
2468 void FASTCALL asmFLD_Single(modrm_p modrm)
2469 {
2470 int len = modrm++[0];
2471
2472 asmFLDSingleMem(modrm, len);
2473 }
2474
2475 void FASTCALL asmFLDDoubleMem(byte *modrm, int len)
2476 {
2477 byte instr[15];
2478 instr[0] = 0xdd;
2479 memcpy(instr+1, modrm, len);
2480 jitcEmit(instr, len+1);
2481 }
2482 void FASTCALL asmFLD_Double(modrm_p modrm)
2483 {
2484 int len = modrm++[0];
2485
2486 asmFLDDoubleMem(modrm, len);
2487 }
2488
2489 void FASTCALL asmFLD(NativeFloatReg sti)
2490 {
2491 byte instr[2] = {0xd9, 0xc0+sti};
2492 jitcEmit(instr, sizeof instr);
2493 }
2494 void FASTCALL asmFLDSTi(NativeFloatReg sti)
2495 {
2496 asmFLD(sti);
2497 }
2498
2499 void FASTCALL asmFILD16(byte *modrm, int len)
2500 {
2501 byte instr[15];
2502 instr[0] = 0xdf;
2503 memcpy(instr+1, modrm, len);
2504 jitcEmit(instr, len+1);
2505 }
2506 void FASTCALL asmFILD_W(modrm_p modrm)
2507 {
2508 int len = modrm++[0];
2509
2510 asmFILD16(modrm, len);
2511 }
2512
2513 void FASTCALL asmFILD(byte *modrm, int len)
2514 {
2515 byte instr[15];
2516 instr[0] = 0xdb;
2517 memcpy(instr+1, modrm, len);
2518 jitcEmit(instr, len+1);
2519 }
2520 void FASTCALL asmFILD_D(modrm_p modrm)
2521 {
2522 int len = modrm++[0];
2523
2524 asmFILD(modrm, len);
2525 }
2526
2527 void FASTCALL asmFILD_Q(modrm_p modrm)
2528 {
2529 byte instr[15];
2530 instr[0] = 0xdf;
2531 memcpy(instr+1, modrm+1, modrm[0]);
2532 instr[1] |= 5<<3;
2533 jitcEmit(instr, modrm[0]+1);
2534 }
2535
2536 void FASTCALL asmFSTSingleMem(byte *modrm, int len)
2537 {
2538 byte instr[15];
2539 instr[0] = 0xd9;
2540 memcpy(instr+1, modrm, len);
2541 instr[1] |= 2<<3;
2542 jitcEmit(instr, len+1);
2543 }
2544 void FASTCALL asmFST_Single(modrm_p modrm)
2545 {
2546 int len = modrm++[0];
2547
2548 asmFSTSingleMem(modrm, len);
2549 }
2550
2551 void FASTCALL asmFSTPSingleMem(byte *modrm, int len)
2552 {
2553 byte instr[15];
2554 instr[0] = 0xd9;
2555 memcpy(instr+1, modrm, len);
2556 instr[1] |= 3<<3;
2557 jitcEmit(instr, len+1);
2558 }
2559 void FASTCALL asmFSTP_Single(modrm_p modrm)
2560 {
2561 int len = modrm++[0];
2562
2563 asmFSTPSingleMem(modrm, len);
2564 }
2565
2566 void FASTCALL asmFSTDoubleMem(byte *modrm, int len)
2567 {
2568 byte instr[15];
2569 instr[0] = 0xdd;
2570 memcpy(instr+1, modrm, len);
2571 instr[1] |= 2<<3;
2572 jitcEmit(instr, len+1);
2573 }
2574 void FASTCALL asmFST_Double(modrm_p modrm)
2575 {
2576 int len = modrm++[0];
2577
2578 asmFSTDoubleMem(modrm, len);
2579 }
2580
2581 void FASTCALL asmFSTPDoubleMem(byte *modrm, int len)
2582 {
2583 byte instr[15];
2584 instr[0] = 0xdd;
2585 memcpy(instr+1, modrm, len);
2586 instr[1] |= 3<<3;
2587 jitcEmit(instr, len+1);
2588 }
2589 void FASTCALL asmFSTP_Double(modrm_p modrm)
2590 {
2591 int len = modrm++[0];
2592
2593 asmFSTPDoubleMem(modrm, len);
2594 }
2595
2596 void FASTCALL asmFST(NativeFloatReg sti)
2597 {
2598 byte instr[2] = {0xdd, 0xd0+sti};
2599 jitcEmit(instr, sizeof instr);
2600 }
2601 void FASTCALL asmFSTDSTi(NativeFloatReg sti)
2602 {
2603 asmFST(sti);
2604 }
2605
2606 void FASTCALL asmFSTP(NativeFloatReg sti)
2607 {
2608 byte instr[2] = {0xdd, 0xd8+sti};
2609 jitcEmit(instr, sizeof instr);
2610 }
2611 void FASTCALL asmFSTDPSTi(NativeFloatReg sti)
2612 {
2613 asmFSTP(sti);
2614 }
2615
2616 void FASTCALL asmFISTP_W(modrm_p modrm)
2617 {
2618 byte instr[15];
2619 instr[0] = 0xdf;
2620 memcpy(instr+1, modrm+1, modrm[0]);
2621 instr[1] |= 3<<3;
2622 jitcEmit(instr, modrm[0]+1);
2623 }
2624
2625 void FASTCALL asmFISTPMem(byte *modrm, int len)
2626 {
2627 byte instr[15];
2628 instr[0] = 0xdb;
2629 memcpy(instr+1, modrm, len);
2630 instr[1] |= 3<<3;
2631 jitcEmit(instr, len+1);
2632 }
2633 void FASTCALL asmFISTP_D(modrm_p modrm)
2634 {
2635 int len = modrm++[0];
2636
2637 asmFISTPMem(modrm, len);
2638 }
2639
2640 void FASTCALL asmFISTPMem64(byte *modrm, int len)
2641 {
2642 byte instr[15];
2643 instr[0] = 0xdf;
2644 memcpy(instr+1, modrm, len);
2645 instr[1] |= 7<<3;
2646 jitcEmit(instr, len+1);
2647 }
2648 void FASTCALL asmFISTP_Q(modrm_p modrm)
2649 {
2650 int len = modrm++[0];
2651
2652 asmFISTPMem64(modrm, len);
2653 }
2654
2655 void FASTCALL asmFISTTPMem(byte *modrm, int len)
2656 {
2657 byte instr[15];
2658 instr[0] = 0xdb;
2659 memcpy(instr+1, modrm, len);
2660 instr[1] |= 1<<3;
2661 jitcEmit(instr, len+1);
2662 }
2663 void FASTCALL asmFISTTP(modrm_p modrm)
2664 {
2665 int len = modrm++[0];
2666
2667 asmFISTTPMem(modrm, len);
2668 }
2669
2670 void FASTCALL asmFLDCWMem(byte *modrm, int len)
2671 {
2672 byte instr[15];
2673 instr[0] = 0xd9;
2674 memcpy(instr+1, modrm, len);
2675 instr[1] |= 5<<3;
2676 jitcEmit(instr, len+1);
2677 }
2678 void FASTCALL asmFLDCW(modrm_p modrm)
2679 {
2680 int len = modrm++[0];
2681
2682 asmFLDCWMem(modrm, len);
2683 }
2684
2685 void FASTCALL asmFSTCWMem(byte *modrm, int len)
2686 {
2687 byte instr[15];
2688 instr[0] = 0xd9;
2689 memcpy(instr+1, modrm, len);
2690 instr[1] |= 7<<3;
2691 jitcEmit(instr, len+1);
2692 }
2693 void FASTCALL asmFSTCW(modrm_p modrm)
2694 {
2695 int len = modrm++[0];
2696
2697 asmFSTCWMem(modrm, len);
2698 }
2699
2700 void FASTCALL asmFSTSWMem(byte *modrm, int len)
2701 {
2702 byte instr[15];
2703 instr[0] = 0xdd;
2704 memcpy(instr+1, modrm, len);
2705 instr[1] |= 7<<3;
2706 jitcEmit(instr, len+1);
2707 }
2708 void FASTCALL asmFSTSW(modrm_p modrm)
2709 {
2710 int len = modrm++[0];
2711
2712 asmFSTSWMem(modrm, len);
2713 }
2714
2715 void FASTCALL asmFSTSW_EAX(void)
2716 {
2717 byte instr[15] = { 0xdf, 0xe0 };
2718 jitcEmit(instr, 2);
2719 }
2720
2721 /*
2722 * Maps one client vector register to one native vector register
2723 * Will never emit any code.
2724 */
2725 static inline void FASTCALL jitcMapVectorRegister(NativeVectorReg nreg, JitcVectorReg creg)
2726 {
2727 //printf("*** map: XMM%u (vr%u)\n", nreg, creg);
2728 gJITC.n2cVectorReg[nreg] = creg;
2729 gJITC.c2nVectorReg[creg] = nreg;
2730
2731 gJITC.nativeVectorRegState[nreg] = rsMapped;
2732 }
2733
2734 /*
2735 * Unmaps the native vector register from any client vector register
2736 * Will never emit any code.
2737 */
2738 static inline void FASTCALL jitcUnmapVectorRegister(NativeVectorReg nreg)
2739 {
2740 JitcVectorReg creg = gJITC.n2cVectorReg[nreg];
2741
2742 if (nreg != VECTREG_NO && creg != PPC_VECTREG_NO) {
2743 //printf("*** unmap: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]);
2744
2745 gJITC.n2cVectorReg[nreg] = PPC_VECTREG_NO;
2746 gJITC.c2nVectorReg[creg] = VECTREG_NO;
2747
2748 gJITC.nativeVectorRegState[nreg] = rsUnused;
2749 }
2750 }
2751
2752 /*
2753 * Marks the native vector register as dirty.
2754 * Does *not* touch native vector register.
2755 * Will not produce code.
2756 */
2757 void FASTCALL jitcDirtyVectorRegister(NativeVectorReg nreg)
2758 {
2759 JitcVectorReg creg = gJITC.n2cVectorReg[nreg];
2760
2761 //printf("*** dirty(%u) with creg = %u\n", nreg, creg);
2762
2763 if (creg == JITC_VECTOR_NEG1 || creg == PPC_VECTREG_NO) {
2764 //printf("*** dirty: %u = %u or %u\n", creg, JITC_VECTOR_NEG1, PPC_REG_NO);
2765 return;
2766 }
2767
2768 if (gJITC.nativeVectorRegState[nreg] == rsUnused) {
2769 printf("!!! Attemped dirty of an anonymous vector register!\n");
2770 return;
2771 }
2772
2773 if (creg == gJITC.nativeVectorReg) {
2774 gJITC.nativeVectorReg = VECTREG_NO;
2775 }
2776
2777 gJITC.nativeVectorRegState[nreg] = rsDirty;
2778 }
2779
2780 /*
2781 * Marks the native vector register as non-dirty.
2782 * Does *not* flush native vector register.
2783 * Will not produce code.
2784 */
2785 static inline void FASTCALL jitcUndirtyVectorRegister(NativeVectorReg nreg)
2786 {
2787 if (gJITC.nativeVectorRegState[nreg] > rsMapped) {
2788 //printf("*** undirty: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]);
2789
2790 gJITC.nativeVectorRegState[nreg] = rsMapped;
2791 }
2792 }
2793
2794 /*
2795 * Loads a native vector register with its mapped value.
2796 * Does not alter the native vector register's markings.
2797 * Will always emit an load.
2798 */
2799 static inline void FASTCALL jitcLoadVectorRegister(NativeVectorReg nreg)
2800 {
2801 JitcVectorReg creg = gJITC.n2cVectorReg[nreg];
2802
2803 if (creg == JITC_VECTOR_NEG1 && gJITC.hostCPUCaps.sse2) {
2804 //printf("*** load neg1: XMM%u\n", nreg);
2805
2806 /* On a P4, we can load -1 far faster with logic */
2807 asmPALU(PALUD(X86_PCMPEQ), nreg, nreg);
2808 return;
2809 }
2810
2811 //printf("*** load: XMM%u (vr%u)\n", nreg, creg);
2812 asmMOVAPS(nreg, &gCPU.vr[creg]);
2813 }
2814
2815 /*
2816 * Stores a native vector register to its mapped client vector register.
2817 * Does not alter the native vector register's markings.
2818 * Will always emit a store.
2819 */
2820 static inline void FASTCALL jitcStoreVectorRegister(NativeVectorReg nreg)
2821 {
2822 JitcVectorReg creg = gJITC.n2cVectorReg[nreg];
2823
2824 if (creg == JITC_VECTOR_NEG1 || creg == PPC_VECTREG_NO)
2825 return;
2826
2827 //printf("*** store: XMM%u (vr%u)\n", nreg, creg);
2828
2829 asmMOVAPS(&gCPU.vr[creg], nreg);
2830 }
2831
2832 /*
2833 * Returns the native vector register that is mapped to the client
2834 * vector register.
2835 * Will never emit any code.
2836 */
2837 NativeVectorReg FASTCALL jitcGetClientVectorRegisterMapping(JitcVectorReg creg)
2838 {
2839 return gJITC.c2nVectorReg[creg];
2840 }
2841
2842 /*
2843 * Makes the vector register the least recently used vector register.
2844 * Will never emit any code.
2845 */
2846 static inline void FASTCALL jitcDiscardVectorRegister(NativeVectorReg nreg)
2847 {
2848 NativeVectorReg lreg, mreg;
2849
2850 mreg = gJITC.MRUvregs[nreg];
2851 lreg = gJITC.LRUvregs[nreg];
2852
2853 // remove from the list
2854 gJITC.MRUvregs[lreg] = mreg;
2855 gJITC.LRUvregs[mreg] = lreg;
2856
2857 mreg = gJITC.MRUvregs[XMM_SENTINEL];
2858
2859 // insert into the list in the LRU spot
2860 gJITC.LRUvregs[nreg] = XMM_SENTINEL;
2861 gJITC.MRUvregs[nreg] = mreg;
2862
2863 gJITC.LRUvregs[mreg] = nreg;
2864 gJITC.MRUvregs[XMM_SENTINEL] = nreg;
2865 }
2866
2867 /*
2868 * Makes the vector register the most recently used vector register.
2869 * Will never emit any code.
2870 */
2871 void FASTCALL jitcTouchVectorRegister(NativeVectorReg nreg)
2872 {
2873 NativeVectorReg lreg, mreg;
2874
2875 mreg = gJITC.MRUvregs[nreg];
2876 lreg = gJITC.LRUvregs[nreg];
2877
2878 // remove from the list
2879 gJITC.MRUvregs[lreg] = mreg;
2880 gJITC.LRUvregs[mreg] = lreg;
2881
2882 lreg = gJITC.LRUvregs[XMM_SENTINEL];
2883
2884 // insert into the list in the LRU spot
2885 gJITC.MRUvregs[nreg] = XMM_SENTINEL;
2886 gJITC.LRUvregs[nreg] = lreg;
2887
2888 gJITC.MRUvregs[lreg] = nreg;
2889 gJITC.LRUvregs[XMM_SENTINEL] = nreg;
2890 }
2891
2892 /*
2893 * Unmaps a native vector register, and marks it least recently used.
2894 * Will not emit any code.
2895 */
2896 void FASTCALL jitcDropSingleVectorRegister(NativeVectorReg nreg)
2897 {
2898 jitcDiscardVectorRegister(nreg);
2899 jitcUnmapVectorRegister(nreg);
2900 }
2901
2902 int FASTCALL jitcAssertFlushedVectorRegister(JitcVectorReg creg)
2903 {
2904 NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
2905
2906 if (nreg != VECTREG_NO && gJITC.nativeVectorRegState[nreg] == rsDirty) {
2907 printf("!!! Unflushed vector XMM%u (vr%u)!\n", nreg, creg);
2908 return 1;
2909 }
2910 return 0;
2911 }
2912 int FASTCALL jitcAssertFlushedVectorRegisters()
2913 {
2914 int ret = 0;
2915
2916 for (JitcVectorReg i=0; i<32; i++)
2917 ret |= jitcAssertFlushedVectorRegister(i);
2918
2919 return ret;
2920 }
2921
2922 void FASTCALL jitcShowVectorRegisterStatus(JitcVectorReg creg)
2923 {
2924 NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
2925
2926 if (nreg != VECTREG_NO) {
2927 int status = gJITC.nativeVectorRegState[nreg];
2928 char *text;
2929
2930 if (status == rsUnused)
2931 text = "unused";
2932 else if (status == rsMapped)
2933 text = "mapped";
2934 else if (status == rsDirty)
2935 text = "dirty";
2936 else
2937 text = "unknown";
2938
2939 //printf("*** vr%u => XMM%u (%s)\n", creg, nreg, text);
2940 } else {
2941 //printf("*** vr%u => memory\n", creg);
2942 }
2943 }
2944
2945 /*
2946 * If the native vector register is marked dirty, then it writes that
2947 * value out to the client vector register store.
2948 * Will produce a store, if the native vector register is dirty.
2949 */
2950 static inline void FASTCALL jitcFlushSingleVectorRegister(NativeVectorReg nreg)
2951 {
2952 if (gJITC.nativeVectorRegState[nreg] == rsDirty) {
2953 //printf("*** flush: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]);
2954 jitcStoreVectorRegister(nreg);
2955 }
2956 }
2957
2958 /*
2959 * Flushes the register, frees it, and makes it least recently used.
2960 * Will produce a store, if the native vector register was dirty.
2961 */
2962 static inline void FASTCALL jitcTrashSingleVectorRegister(NativeVectorReg nreg)
2963 {
2964 if (gJITC.nativeVectorRegState[nreg] > rsUnused) {
2965 //printf("*** trash: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]);
2966 }
2967
2968 jitcFlushSingleVectorRegister(nreg);
2969 jitcDropSingleVectorRegister(nreg);
2970 }
2971
2972 /*
2973 * Flushes the register, frees it, and makes it most recently used.
2974 * Will produce a store, if the native vector register was dirty.
2975 */
2976 static inline void FASTCALL jitcClobberSingleVectorRegister(NativeVectorReg nreg)
2977 {
2978 if (gJITC.nativeVectorRegState[nreg] > rsUnused) {
2979 //printf("*** clobber: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]);
2980 }
2981
2982 jitcFlushSingleVectorRegister(nreg);
2983 jitcTouchVectorRegister(nreg);
2984 jitcUnmapVectorRegister(nreg);
2985 }
2986
2987 /*
2988 * Allocates a native vector register.
2989 * If hint is non-zero, then it indicates that the value is unlikely
2990 * to be re-used soon, so to keep it at the end of the LRU.
2991 * To use hints, pass hint == the number of temporary registers
2992 * May produce a store, if no native vector registers are available.
2993 */
2994 NativeVectorReg FASTCALL jitcAllocVectorRegister(int hint)
2995 {
2996 NativeVectorReg nreg = gJITC.MRUvregs[XMM_SENTINEL];
2997
2998 if (hint >= XMM_SENTINEL) {
2999 nreg = gJITC.LRUvregs[nreg];
3000
3001 jitcTrashSingleVectorRegister(nreg);
3002 } else if (hint) {
3003 for (int i=1; i<hint; i++) {
3004 nreg = gJITC.MRUvregs[nreg];
3005 }
3006
3007 jitcTrashSingleVectorRegister(nreg);
3008 } else {
3009 jitcClobberSingleVectorRegister(nreg);
3010 }
3011
3012 return nreg;
3013 }
3014
3015 /*
3016 * Returns native vector register that contains value of client
3017 * register or allocates new vector register which maps to
3018 * the client register.
3019 * Marks the register dirty.
3020 *
3021 * May produce a store, if no registers are available.
3022 * Will never produce a load.
3023 */
3024 NativeVectorReg FASTCALL jitcMapClientVectorRegisterDirty(JitcVectorReg creg, int hint)
3025 {
3026 NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
3027
3028 if (nreg == VECTREG_NO) {
3029 nreg = jitcAllocVectorRegister(hint);
3030
3031 jitcMapVectorRegister(nreg, creg);
3032 } else if (hint) {
3033 jitcDiscardVectorRegister(nreg);
3034 } else {
3035 jitcTouchVectorRegister(nreg);
3036 }
3037
3038 jitcDirtyVectorRegister(nreg);
3039
3040 return nreg;
3041 }
3042
3043 /*
3044 * Returns native vector register that contains the value of the
3045 * client vector register, or allocates new register, and
3046 * loads this value into it.
3047 *
3048 * May produce a store, if no register are available.
3049 * May produce a load, if client vector register isn't mapped.
3050 */
3051 NativeVectorReg FASTCALL jitcGetClientVectorRegister(JitcVectorReg creg, int hint)
3052 {
3053 NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
3054
3055 if (nreg == VECTREG_NO) {
3056 nreg = jitcAllocVectorRegister(hint);
3057 jitcMapVectorRegister(nreg, creg);
3058
3059 jitcLoadVectorRegister(nreg);
3060 } else if (hint) {
3061 jitcDiscardVectorRegister(nreg);
3062 } else {
3063 jitcTouchVectorRegister(nreg);
3064 }
3065
3066 return nreg;
3067 }
3068
3069 /*
3070 * Returns native vector register that contains the value of the
3071 * client vector register, or allocates new register, and
3072 * loads this value into it.
3073 * Will mark the native vector register as dirty.
3074 *
3075 * May produce a store, if no register are available.
3076 * May produce a load, if client vector register isn't mapped.
3077 */
3078 NativeVectorReg FASTCALL jitcGetClientVectorRegisterDirty(JitcVectorReg creg, int hint)
3079 {
3080 NativeVectorReg nreg = jitcGetClientVectorRegister(creg, hint);
3081
3082 jitcDirtyVectorRegister(nreg);
3083
3084 return nreg;
3085 }
3086
3087 /*
3088 * Flushes native vector register(s).
3089 * Resets dirty flags.
3090 * Will produce stores, if vector registers are dirty.
3091 */
3092 void FASTCALL jitcFlushVectorRegister(int options)
3093 {
3094 if (options == JITC_VECTOR_REGS_ALL) {
3095 for (unsigned int i = XMM0; i <= XMM7; i++) {
3096 jitcFlushSingleVectorRegister((NativeVectorReg)i);
3097 jitcUndirtyVectorRegister((NativeVectorReg)i);
3098 }
3099 } else if (options & NATIVE_REG) {
3100 NativeVectorReg nreg = (NativeVectorReg)(options & 0xf);
3101
3102 jitcFlushSingleVectorRegister(nreg);
3103 jitcUndirtyVectorRegister(nreg);
3104 }
3105 }
3106
3107 /*
3108 * Flushes native vector register(s).
3109 * Doesn't reset dirty flags.
3110 * Will produce stores, if vector registers are dirty.
3111 */
3112 void FASTCALL jitcFlushVectorRegisterDirty(int options)
3113 {
3114 if (options == JITC_VECTOR_REGS_ALL) {
3115 for (unsigned int i = XMM0; i <= XMM7; i++) {
3116 jitcFlushSingleVectorRegister((NativeVectorReg)i);
3117 }
3118 } else if (options & NATIVE_REG) {
3119 NativeVectorReg nreg = (NativeVectorReg)(options & 0xf);
3120
3121 jitcFlushSingleVectorRegister(nreg);
3122 }
3123 }
3124
3125 /*
3126 * Clobbers native vector register(s).
3127 * Will produce stores, if vector registers are dirty.
3128 */
3129 void FASTCALL jitcClobberVectorRegister(int options)
3130 {
3131 if (options == JITC_VECTOR_REGS_ALL) {
3132 for (unsigned int i = XMM0; i <= XMM7; i++) {
3133 jitcClobberSingleVectorRegister((NativeVectorReg)i);
3134 }
3135 } else if (options & NATIVE_REG) {
3136 NativeVectorReg nreg = (NativeVectorReg)(options & 0xf);
3137
3138 jitcClobberSingleVectorRegister(nreg);
3139 }
3140 }
3141
3142 /*
3143 * Trashes native vector register(s).
3144 * Will produce stores, if vector registers are dirty.
3145 */
3146 void FASTCALL jitcTrashVectorRegister(int options)
3147 {
3148 if (options == JITC_VECTOR_REGS_ALL) {
3149 for (unsigned int i = XMM0; i <= XMM7; i++) {
3150 jitcTrashSingleVectorRegister((NativeVectorReg)i);
3151 }
3152 } else if (options & NATIVE_REG) {
3153 NativeVectorReg nreg = (NativeVectorReg)(options & 0xf);
3154
3155 jitcTrashSingleVectorRegister(nreg);
3156 }
3157 }
3158
3159 /*
3160 * Drops native vector register(s).
3161 * Will not produce any code.
3162 */
3163 void FASTCALL jitcDropVectorRegister(int options)
3164 {
3165 if (options == JITC_VECTOR_REGS_ALL) {
3166 for (unsigned int i = XMM0; i <= XMM7; i++) {
3167 jitcDropSingleVectorRegister((NativeVectorReg)i);
3168 }
3169 } else if (options & NATIVE_REG) {
3170 NativeVectorReg nreg = (NativeVectorReg)(options & 0xf);
3171
3172 jitcDropSingleVectorRegister(nreg);
3173 }
3174 }
3175
3176 void FASTCALL jitcFlushClientVectorRegister(JitcVectorReg creg)
3177 {
3178 NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
3179
3180 if (nreg != VECTREG_NO) {
3181 jitcFlushSingleVectorRegister(nreg);
3182 jitcUndirtyVectorRegister(nreg);
3183 }
3184 }
3185
3186 void FASTCALL jitcTrashClientVectorRegister