/[pearpc]/src/cpu/cpu_jitc_x86/x86asm.cc
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /src/cpu/cpu_jitc_x86/x86asm.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations)
Wed Sep 5 17:11:21 2007 UTC (16 years, 7 months ago) by dpavlin
File size: 76860 byte(s)
import upstream CVS
1 dpavlin 1 /*
2     * PearPC
3     * x86asm.cc
4     *
5     * Copyright (C) 2004 Sebastian Biallas (sb@biallas.net)
6     *
7     * This program is free software; you can redistribute it and/or modify
8     * it under the terms of the GNU General Public License version 2 as
9     * published by the Free Software Foundation.
10     *
11     * This program is distributed in the hope that it will be useful,
12     * but WITHOUT ANY WARRANTY; without even the implied warranty of
13     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14     * GNU General Public License for more details.
15     *
16     * You should have received a copy of the GNU General Public License
17     * along with this program; if not, write to the Free Software
18     * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19     */
20    
21     #include <cstring>
22     #include <cstdlib>
23    
24     #include "tools/debug.h"
25     #include "tools/snprintf.h"
26     #include "jitc.h"
27     #include "jitc_asm.h"
28     #include "jitc_debug.h"
29     #include "x86asm.h"
30    
31     void x86GetCaps(X86CPUCaps &caps)
32     {
33     memset(&caps, 0, sizeof caps);
34    
35     caps.loop_align = 8;
36    
37     struct {
38     uint32 level, c, d, b;
39     } id;
40    
41     if (!ppc_cpuid_asm(0, &id)) {
42     ht_snprintf(caps.vendor, sizeof caps.vendor, "unknown");
43     return;
44     }
45    
46     *((uint32 *)caps.vendor) = id.b;
47     *((uint32 *)(caps.vendor+4)) = id.d;
48     *((uint32 *)(caps.vendor+8)) = id.c;
49     caps.vendor[12] = 0;
50     ht_printf("%s\n", caps.vendor);
51     if (id.level == 0) return;
52    
53     struct {
54     uint32 model, features2, features, b;
55     } id2;
56    
57     ppc_cpuid_asm(1, &id2);
58     caps.rdtsc = id2.features & (1<<4);
59     caps.cmov = id2.features & (1<<15);
60     caps.mmx = id2.features & (1<<23);
61     caps._3dnow = id2.features & (1<<31);
62     caps._3dnow2 = id2.features & (1<<30);
63     caps.sse = id2.features & (1<<25);
64     caps.sse2 = id2.features & (1<<26);
65     caps.sse3 = id2.features2 & (1<<0);
66    
67     ppc_cpuid_asm(0x80000000, &id);
68     if (id.level >= 0x80000001) {
69     // processor supports extended functions
70     // now test for 3dnow
71     ppc_cpuid_asm(0x80000001, &id2);
72    
73     caps._3dnow = id2.features & (1<<31);
74     caps._3dnow2 = id2.features & (1<<30);
75     }
76    
77     ht_printf("%s%s%s%s%s%s%s\n",
78     caps.cmov?" CMOV":"",
79     caps.mmx?" MMX":"",
80     caps._3dnow?" 3DNOW":"",
81     caps._3dnow2?" 3DNOW+":"",
82     caps.sse?" SSE":"",
83     caps.sse2?" SSE2":"",
84     caps.sse3?" SSE3":"");
85     }
86    
87     /*
88     * internal functions
89     */
90    
91     static inline void FASTCALL jitcMapRegister(NativeReg nreg, PPC_Register creg)
92     {
93     gJITC.nativeReg[nreg] = creg;
94     gJITC.clientReg[creg] = nreg;
95     }
96    
97     static inline void FASTCALL jitcUnmapRegister(NativeReg reg)
98     {
99     gJITC.clientReg[gJITC.nativeReg[reg]] = REG_NO;
100     gJITC.nativeReg[reg] = PPC_REG_NO;
101     }
102    
103     static inline void FASTCALL jitcLoadRegister(NativeReg nreg, PPC_Register creg)
104     {
105     asmMOVRegDMem(nreg, (uint32)&gCPU+creg);
106     jitcMapRegister(nreg, creg);
107     gJITC.nativeRegState[nreg] = rsMapped;
108     }
109    
110     static inline void FASTCALL jitcStoreRegister(NativeReg nreg, PPC_Register creg)
111     {
112     asmMOVDMemReg((uint32)&gCPU+creg, nreg);
113     }
114    
115     static inline void FASTCALL jitcStoreRegisterUndirty(NativeReg nreg, PPC_Register creg)
116     {
117     jitcStoreRegister(nreg, creg);
118     gJITC.nativeRegState[nreg] = rsMapped; // no longer dirty
119     }
120    
121     static inline PPC_Register FASTCALL jitcGetRegisterMapping(NativeReg reg)
122     {
123     return gJITC.nativeReg[reg];
124     }
125    
126     NativeReg FASTCALL jitcGetClientRegisterMapping(PPC_Register creg)
127     {
128     return gJITC.clientReg[creg];
129     }
130    
131     static inline void FASTCALL jitcDiscardRegister(NativeReg r)
132     {
133     // FIXME: move to front of the LRU list
134     gJITC.nativeRegState[r] = rsUnused;
135     }
136    
137     /*
138     * Puts native register to the end of the LRU list
139     */
140     void FASTCALL jitcTouchRegister(NativeReg r)
141     {
142     NativeRegType *reg = gJITC.nativeRegsList[r];
143     if (reg->moreRU) {
144     // there's a more recently used register
145     if (reg->lessRU) {
146     reg->lessRU->moreRU = reg->moreRU;
147     reg->moreRU->lessRU = reg->lessRU;
148     } else {
149     // reg was LRUreg
150     gJITC.LRUreg = reg->moreRU;
151     reg->moreRU->lessRU = NULL;
152     }
153     reg->moreRU = NULL;
154     reg->lessRU = gJITC.MRUreg;
155     gJITC.MRUreg->moreRU = reg;
156     gJITC.MRUreg = reg;
157     }
158     }
159    
160     /*
161     * clobbers and moves to end of LRU list
162     */
163     static inline void FASTCALL jitcClobberAndTouchRegister(NativeReg reg)
164     {
165     switch (gJITC.nativeRegState[reg]) {
166     case rsDirty:
167     jitcStoreRegisterUndirty(reg, jitcGetRegisterMapping(reg));
168     // fall throu
169     case rsMapped:
170     jitcUnmapRegister(reg);
171     gJITC.nativeRegState[reg] = rsUnused;
172     break;
173     case rsUnused:;
174     }
175     jitcTouchRegister(reg);
176     }
177    
178     /*
179     * clobbers and moves to front of LRU list
180     */
181     static inline void FASTCALL jitcClobberAndDiscardRegister(NativeReg reg)
182     {
183     switch (gJITC.nativeRegState[reg]) {
184     case rsDirty:
185     jitcStoreRegisterUndirty(reg, jitcGetRegisterMapping(reg));
186     // fall throu
187     case rsMapped:
188     jitcUnmapRegister(reg);
189     jitcDiscardRegister(reg);
190     break;
191     case rsUnused:;
192     /*
193     * Note: it makes no sense to move this register to
194     * the front of the LRU list here, since only
195     * other unused register can be before it in the list
196     *
197     * Note2: it would even be an error to move it here,
198     * since ESP isn't in the nativeRegsList
199     */
200     }
201     }
202    
203     void FASTCALL jitcClobberSingleRegister(NativeReg reg)
204     {
205     switch (gJITC.nativeRegState[reg]) {
206     case rsDirty:
207     jitcStoreRegisterUndirty(reg, jitcGetRegisterMapping(reg));
208     // fall throu
209     case rsMapped:
210     jitcUnmapRegister(reg);
211     gJITC.nativeRegState[reg] = rsUnused;
212     break;
213     case rsUnused:;
214     }
215     }
216    
217     /*
218     * Dirty register.
219     * Does *not* touch register
220     * Will not produce code.
221     */
222     NativeReg FASTCALL jitcDirtyRegister(NativeReg r)
223     {
224     gJITC.nativeRegState[r] = rsDirty;
225     return r;
226     }
227    
228     NativeReg FASTCALL jitcAllocFixedRegister(NativeReg reg)
229     {
230     jitcClobberAndTouchRegister(reg);
231     return reg;
232     }
233    
234     /*
235     * Allocates a native register
236     * May produce a store if no registers are avaiable
237     */
238     NativeReg FASTCALL jitcAllocRegister(int options)
239     {
240     NativeReg reg;
241     if (options & NATIVE_REG) {
242     // allocate fixed register
243     reg = (NativeReg)(options & 0xf);
244     } else if (options & NATIVE_REG_8) {
245     // allocate eax, ecx, edx or ebx
246     NativeRegType *rt = gJITC.LRUreg;
247     while (rt->reg > EBX) rt = rt->moreRU;
248     reg = rt->reg;
249     } else {
250     // allocate random register
251     reg = gJITC.LRUreg->reg;
252     }
253     return jitcAllocFixedRegister(reg);
254     }
255    
256     /*
257     * Returns native registers that contains value of
258     * client register or allocates new register which
259     * maps to the client register.
260     * Dirties register.
261     *
262     * May produce a store if no registers are avaiable
263     * May produce a MOV/XCHG to satisfy mapping
264     * Will never produce a load
265     */
266     NativeReg FASTCALL jitcMapClientRegisterDirty(PPC_Register creg, int options)
267     {
268     if (options & NATIVE_REG_8) {
269     // nyi
270     ht_printf("unimpl x86asm:%d\n", __LINE__);
271     exit(-1);
272     }
273     if (options & NATIVE_REG) {
274     NativeReg want_reg = (NativeReg)(options & 0xf);
275     PPC_Register have_mapping = jitcGetRegisterMapping(want_reg);
276    
277     if (have_mapping != PPC_REG_NO) {
278     // test if we're lucky
279     if (have_mapping == creg) {
280     jitcDirtyRegister(want_reg);
281     jitcTouchRegister(want_reg);
282     return want_reg;
283     }
284    
285     // we're not lucky, get a new register for the old mapping
286     NativeReg temp_reg = jitcAllocRegister();
287     // note that AllocRegister also touches temp_reg
288    
289     // make new mapping
290     jitcMapRegister(want_reg, creg);
291    
292     gJITC.nativeRegState[temp_reg] = gJITC.nativeRegState[want_reg];
293     // now we can mess with want_reg
294     jitcDirtyRegister(want_reg);
295    
296     // maybe the old mapping was discarded and we're done
297     if (temp_reg == want_reg) return want_reg;
298    
299     // ok, restore old mapping
300     if (temp_reg == EAX || want_reg == EAX) {
301     asmALURegReg(X86_XCHG, temp_reg, want_reg);
302     } else {
303     asmALURegReg(X86_MOV, temp_reg, want_reg);
304     }
305     jitcMapRegister(temp_reg, have_mapping);
306     } else {
307     // want_reg is free
308     // unmap creg if needed
309     NativeReg reg = jitcGetClientRegisterMapping(creg);
310     if (reg != REG_NO) {
311     jitcUnmapRegister(reg);
312     jitcDiscardRegister(reg);
313     }
314     jitcMapRegister(want_reg, creg);
315     jitcDirtyRegister(want_reg);
316     }
317     jitcTouchRegister(want_reg);
318     return want_reg;
319     } else {
320     NativeReg reg = jitcGetClientRegisterMapping(creg);
321     if (reg == REG_NO) {
322     reg = jitcAllocRegister();
323     jitcMapRegister(reg, creg);
324     } else {
325     jitcTouchRegister(reg);
326     }
327     return jitcDirtyRegister(reg);
328     }
329     }
330    
331    
332     /*
333     * Returns native registers that contains value of
334     * client register or allocates new register with
335     * this content.
336     *
337     * May produce a store if no registers are avaiable
338     * May produce a load if client registers isn't mapped
339     * May produce a MOV/XCHG to satisfy mapping
340     */
341     NativeReg FASTCALL jitcGetClientRegister(PPC_Register creg, int options)
342     {
343     if (options & NATIVE_REG_8) {
344     NativeReg client_reg_maps_to = jitcGetClientRegisterMapping(creg);
345     if (client_reg_maps_to == REG_NO) {
346     NativeReg reg = jitcAllocRegister(NATIVE_REG_8);
347     jitcLoadRegister(reg, creg);
348     return reg;
349     } else {
350     if (client_reg_maps_to <= EBX) {
351     jitcTouchRegister(client_reg_maps_to);
352     return client_reg_maps_to;
353     }
354     NativeReg want_reg = jitcAllocRegister(NATIVE_REG_8);
355     asmALURegReg(X86_MOV, want_reg, client_reg_maps_to);
356     jitcUnmapRegister(client_reg_maps_to);
357     jitcMapRegister(want_reg, creg);
358     gJITC.nativeRegState[want_reg] = gJITC.nativeRegState[client_reg_maps_to];
359     gJITC.nativeRegState[client_reg_maps_to] = rsUnused;
360     return want_reg;
361     }
362     }
363     if (options & NATIVE_REG) {
364     NativeReg want_reg = (NativeReg)(options & 0xf);
365     PPC_Register native_reg_maps_to = jitcGetRegisterMapping(want_reg);
366     NativeReg client_reg_maps_to = jitcGetClientRegisterMapping(creg);
367     if (native_reg_maps_to != PPC_REG_NO) {
368     // test if we're lucky
369     if (native_reg_maps_to == creg) {
370     jitcTouchRegister(want_reg);
371     } else {
372     // we need to satisfy mapping
373     if (client_reg_maps_to != REG_NO) {
374     asmALURegReg(X86_XCHG, want_reg, client_reg_maps_to);
375     RegisterState rs = gJITC.nativeRegState[want_reg];
376     gJITC.nativeRegState[want_reg] = gJITC.nativeRegState[client_reg_maps_to];
377     gJITC.nativeRegState[client_reg_maps_to] = rs;
378     jitcMapRegister(want_reg, creg);
379     jitcMapRegister(client_reg_maps_to, native_reg_maps_to);
380     jitcTouchRegister(want_reg);
381     } else {
382     // client register isn't mapped
383     jitcAllocFixedRegister(want_reg);
384     jitcLoadRegister(want_reg, creg);
385     }
386     }
387     return want_reg;
388     } else {
389     // want_reg is free
390     jitcTouchRegister(want_reg);
391     if (client_reg_maps_to != REG_NO) {
392     asmALURegReg(X86_MOV, want_reg, client_reg_maps_to);
393     gJITC.nativeRegState[want_reg] = gJITC.nativeRegState[client_reg_maps_to];
394     jitcUnmapRegister(client_reg_maps_to);
395     jitcDiscardRegister(client_reg_maps_to);
396     jitcMapRegister(want_reg, creg);
397     } else {
398     jitcLoadRegister(want_reg, creg);
399     }
400     return want_reg;
401     }
402     } else {
403     NativeReg client_reg_maps_to = jitcGetClientRegisterMapping(creg);
404     if (client_reg_maps_to != REG_NO) {
405     jitcTouchRegister(client_reg_maps_to);
406     return client_reg_maps_to;
407     } else {
408     NativeReg reg = jitcAllocRegister();
409     jitcLoadRegister(reg, creg);
410     return reg;
411     }
412     }
413     }
414    
415     /*
416     * Same as jitcGetClientRegister() but also dirties result
417     */
418     NativeReg FASTCALL jitcGetClientRegisterDirty(PPC_Register creg, int options)
419     {
420     return jitcDirtyRegister(jitcGetClientRegister(creg, options));
421     }
422    
423     static inline void FASTCALL jitcFlushSingleRegister(NativeReg reg)
424     {
425     if (gJITC.nativeRegState[reg] == rsDirty) {
426     jitcStoreRegisterUndirty(reg, jitcGetRegisterMapping(reg));
427     }
428     }
429    
430     static inline void FASTCALL jitcFlushSingleRegisterDirty(NativeReg reg)
431     {
432     if (gJITC.nativeRegState[reg] == rsDirty) {
433     jitcStoreRegister(reg, jitcGetRegisterMapping(reg));
434     }
435     }
436    
437     /*
438     * Flushes native register(s).
439     * Resets dirty flags.
440     * Will produce a store if register is dirty.
441     */
442     void FASTCALL jitcFlushRegister(int options)
443     {
444     if (options == NATIVE_REGS_ALL) {
445     for (NativeReg i = EAX; i <= EDI; i = (NativeReg)(i+1)) jitcFlushSingleRegister(i);
446     } else if (options & NATIVE_REG) {
447     NativeReg reg = (NativeReg)(options & 0xf);
448     jitcFlushSingleRegister(reg);
449     }
450     }
451    
452     /*
453     * Flushes native register(s).
454     * Doesnt reset dirty flags.
455     * Will produce a store if register is dirty.
456     */
457     void FASTCALL jitcFlushRegisterDirty(int options)
458     {
459     if (options == NATIVE_REGS_ALL) {
460     for (NativeReg i = EAX; i <= EDI; i = (NativeReg)(i+1)) jitcFlushSingleRegisterDirty(i);
461     } else if (options & NATIVE_REG) {
462     NativeReg reg = (NativeReg)(options & 0xf);
463     jitcFlushSingleRegisterDirty(reg);
464     }
465     }
466     /*
467     * Clobbers native register(s).
468     * Register is unused afterwards.
469     * Will produce a store if register was dirty.
470     */
471     void FASTCALL jitcClobberRegister(int options)
472     {
473     if (options == NATIVE_REGS_ALL) {
474     /*
475     * We dont use clobberAndDiscard here
476     * since it make no sense to move one register
477     * if we clobber all
478     */
479     for (NativeReg i = EAX; i <= EDI; i=(NativeReg)(i+1)) jitcClobberSingleRegister(i);
480     } else if (options & NATIVE_REG) {
481     NativeReg reg = (NativeReg)(options & 0xf);
482     jitcClobberAndDiscardRegister(reg);
483     }
484     }
485    
486     /*
487     *
488     */
489     void FASTCALL jitcFlushAll()
490     {
491     jitcClobberCarryAndFlags();
492     jitcFlushRegister();
493     jitcFlushVectorRegister();
494     }
495    
496     /*
497     *
498     */
499     void FASTCALL jitcClobberAll()
500     {
501     jitcClobberCarryAndFlags();
502     jitcClobberRegister();
503     jitcFloatRegisterClobberAll();
504     jitcTrashVectorRegister();
505     }
506    
507     /*
508     * Invalidates all mappings
509     *
510     * Will never produce code
511     */
512     void FASTCALL jitcInvalidateAll()
513     {
514     #if 0
515     for (int i=EAX; i<=EDI; i++) {
516     if(gJITC.nativeRegState[i] != rsDirty) {
517     printf("!!! Unflushed register invalidated!\n");
518     }
519     }
520     #endif
521    
522     memset(gJITC.nativeReg, PPC_REG_NO, sizeof gJITC.nativeReg);
523     memset(gJITC.nativeRegState, rsUnused, sizeof gJITC.nativeRegState);
524     memset(gJITC.clientReg, REG_NO, sizeof gJITC.clientReg);
525     gJITC.nativeCarryState = gJITC.nativeFlagsState = rsUnused;
526    
527     for (unsigned int i=XMM0; i<=XMM7; i++) {
528     if(gJITC.nativeVectorRegState[i] == rsDirty) {
529     printf("!!! Unflushed vector register invalidated! (XMM%u)\n", i);
530     }
531     }
532    
533     memset(gJITC.n2cVectorReg, PPC_VECTREG_NO, sizeof gJITC.n2cVectorReg);
534     memset(gJITC.c2nVectorReg, VECTREG_NO, sizeof gJITC.c2nVectorReg);
535     memset(gJITC.nativeVectorRegState, rsUnused, sizeof gJITC.nativeVectorRegState);
536    
537     gJITC.nativeVectorReg = VECTREG_NO;
538     }
539    
540     /*
541     * Gets the client carry flags into the native carry flag
542     *
543     *
544     */
545     void FASTCALL jitcGetClientCarry()
546     {
547     if (gJITC.nativeCarryState == rsUnused) {
548     jitcClobberFlags();
549    
550     #if 0
551     // bt [gCPU.xer], XER_CA
552     byte modrm[6];
553     asmBTxMemImm(X86_BT, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.xer), 29);
554     #else
555     // bt [gCPU.xer_ca], 0
556     byte modrm[6];
557     asmBTxMemImm(X86_BT, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.xer_ca), 0);
558     #endif
559     gJITC.nativeCarryState = rsMapped;
560     }
561     }
562    
563     void FASTCALL jitcMapFlagsDirty(PPC_CRx cr)
564     {
565     gJITC.nativeFlags = cr;
566     gJITC.nativeFlagsState = rsDirty;
567     }
568    
569     PPC_CRx FASTCALL jitcGetFlagsMapping()
570     {
571     return gJITC.nativeFlags;
572     }
573    
574     bool FASTCALL jitcFlagsMapped()
575     {
576     return gJITC.nativeFlagsState != rsUnused;
577     }
578    
579     bool FASTCALL jitcCarryMapped()
580     {
581     return gJITC.nativeCarryState != rsUnused;
582     }
583    
584     void FASTCALL jitcMapCarryDirty()
585     {
586     gJITC.nativeCarryState = rsDirty;
587     }
588    
589     static inline void FASTCALL jitcFlushCarry()
590     {
591     byte modrm[6];
592     asmSETMem(X86_C, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.xer_ca));
593     }
594    
595     #if 0
596    
597     static inline void FASTCALL jitcFlushFlags()
598     {
599     asmCALL((NativeAddress)ppc_flush_flags_asm);
600     }
601    
602     #else
603    
604     uint8 jitcFlagsMapping[257];
605     uint8 jitcFlagsMapping2[256];
606     uint8 jitcFlagsMappingCMP_U[257];
607     uint8 jitcFlagsMappingCMP_L[257];
608    
609     static inline void FASTCALL jitcFlushFlags()
610     {
611     #if 1
612     byte modrm[6];
613     NativeReg r = jitcAllocRegister(NATIVE_REG_8);
614     asmSETReg8(X86_S, (NativeReg8)r);
615     asmSETReg8(X86_Z, (NativeReg8)(r+4));
616     asmMOVxxRegReg16(X86_MOVZX, r, r);
617     asmALUMemImm8(X86_AND, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), 0x0f);
618     asmALURegMem8(X86_MOV, (NativeReg8)r, modrm, x86_mem(modrm, r, (uint32)&jitcFlagsMapping));
619     asmALUMemReg8(X86_OR, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), (NativeReg8)r);
620     #else
621     byte modrm[6];
622     jitcAllocRegister(NATIVE_REG | EAX);
623     asmSimple(X86_LAHF);
624     asmMOVxxRegReg8(X86_MOVZX, EAX, AH);
625     asmALUMemImm8(X86_AND, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), 0x0f);
626     asmALURegMem8(X86_MOV, AL, modrm, x86_mem(modrm, EAX, (uint32)&jitcFlagsMapping2));
627     asmALUMemReg8(X86_OR, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), AL);
628     #endif
629     }
630    
631     #endif
632    
633     static inline void jitcFlushFlagsAfterCMP(X86FlagTest t1, X86FlagTest t2, byte mask, int disp, uint32 map)
634     {
635     byte modrm[6];
636     NativeReg r = jitcAllocRegister(NATIVE_REG_8);
637     asmSETReg8(t1, (NativeReg8)r);
638     asmSETReg8(t2, (NativeReg8)(r+4));
639     asmMOVxxRegReg16(X86_MOVZX, r, r);
640     asmALUMemImm8(X86_AND, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+disp), mask);
641     asmALURegMem8(X86_MOV, (NativeReg8)r, modrm, x86_mem(modrm, r, map));
642     asmALUMemReg8(X86_OR, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+disp), (NativeReg8)r);
643     }
644    
645     void FASTCALL jitcFlushFlagsAfterCMPL_U(int disp)
646     {
647     jitcFlushFlagsAfterCMP(X86_A, X86_B, 0x0f, disp, (uint32)&jitcFlagsMappingCMP_U);
648     }
649    
650     void FASTCALL jitcFlushFlagsAfterCMPL_L(int disp)
651     {
652     jitcFlushFlagsAfterCMP(X86_A, X86_B, 0xf0, disp, (uint32)&jitcFlagsMappingCMP_L);
653     }
654    
655     void FASTCALL jitcFlushFlagsAfterCMP_U(int disp)
656     {
657     jitcFlushFlagsAfterCMP(X86_G, X86_L, 0x0f, disp, (uint32)&jitcFlagsMappingCMP_U);
658     }
659    
660     void FASTCALL jitcFlushFlagsAfterCMP_L(int disp)
661     {
662     jitcFlushFlagsAfterCMP(X86_G, X86_L, 0xf0, disp, (uint32)&jitcFlagsMappingCMP_L);
663     }
664    
665     void FASTCALL jitcClobberFlags()
666     {
667     if (gJITC.nativeFlagsState == rsDirty) {
668     if (gJITC.nativeCarryState == rsDirty) {
669     jitcFlushCarry();
670     }
671     jitcFlushFlags();
672     gJITC.nativeCarryState = rsUnused;
673     }
674     gJITC.nativeFlagsState = rsUnused;
675     }
676    
677     void FASTCALL jitcClobberCarry()
678     {
679     if (gJITC.nativeCarryState == rsDirty) {
680     jitcFlushCarry();
681     }
682     gJITC.nativeCarryState = rsUnused;
683     }
684    
685     void FASTCALL jitcClobberCarryAndFlags()
686     {
687     if (gJITC.nativeCarryState == rsDirty) {
688     if (gJITC.nativeFlagsState == rsDirty) {
689     jitcFlushCarry();
690     jitcFlushFlags();
691     gJITC.nativeCarryState = gJITC.nativeFlagsState = rsUnused;
692     } else {
693     jitcClobberCarry();
694     }
695     } else {
696     jitcClobberFlags();
697     }
698     }
699    
700     /*
701     * ONLY FOR DEBUG! DON'T CALL (unless you know what you are doing)
702     */
703     void FASTCALL jitcFlushCarryAndFlagsDirty()
704     {
705     if (gJITC.nativeCarryState == rsDirty) {
706     jitcFlushCarry();
707     if (gJITC.nativeFlagsState == rsDirty) {
708     jitcFlushFlags();
709     }
710     } else {
711     if (gJITC.nativeFlagsState == rsDirty) {
712     jitcFlushFlags();
713     }
714     }
715     }
716    
717     /*
718     * jitcFloatRegisterToNative converts the stack-independent
719     * register r to a stack-dependent register ST(i)
720     */
721     NativeFloatReg FASTCALL jitcFloatRegisterToNative(JitcFloatReg r)
722     {
723     return X86_FLOAT_ST(gJITC.nativeFloatTOP-gJITC.floatRegPerm[r]);
724     }
725    
726     /*
727     * jitcFloatRegisterFromNative converts the stack-dependent
728     * register ST(r) to a stack-independent JitcFloatReg
729     */
730     JitcFloatReg FASTCALL jitcFloatRegisterFromNative(NativeFloatReg r)
731     {
732     ASSERT(gJITC.nativeFloatTOP > r);
733     return gJITC.floatRegPermInverse[gJITC.nativeFloatTOP-r];
734     }
735    
736     /*
737     * Returns true iff r is on top of the floating point register
738     * stack.
739     */
740     bool FASTCALL jitcFloatRegisterIsTOP(JitcFloatReg r)
741     {
742     ASSERT(r != JITC_FLOAT_REG_NONE);
743     return gJITC.floatRegPerm[r] == gJITC.nativeFloatTOP;
744     }
745    
746     /*
747     * Exchanges r to the front of the stack.
748     */
749     JitcFloatReg FASTCALL jitcFloatRegisterXCHGToFront(JitcFloatReg r)
750     {
751     ASSERT(r != JITC_FLOAT_REG_NONE);
752     if (jitcFloatRegisterIsTOP(r)) return r;
753    
754     asmFXCHSTi(jitcFloatRegisterToNative(r));
755     JitcFloatReg s = jitcFloatRegisterFromNative(Float_ST0);
756     ASSERT(s != r);
757     // set floatRegPerm := floatRegPerm * (s r)
758     int tmp = gJITC.floatRegPerm[r];
759     gJITC.floatRegPerm[r] = gJITC.floatRegPerm[s];
760     gJITC.floatRegPerm[s] = tmp;
761    
762     // set floatRegPermInverse := (s r) * floatRegPermInverse
763     r = gJITC.floatRegPerm[r];
764     s = gJITC.floatRegPerm[s];
765     tmp = gJITC.floatRegPermInverse[r];
766     gJITC.floatRegPermInverse[r] = gJITC.floatRegPermInverse[s];
767     gJITC.floatRegPermInverse[s] = tmp;
768    
769     return r;
770     }
771    
772     /*
773     * Dirties r
774     */
775     JitcFloatReg FASTCALL jitcFloatRegisterDirty(JitcFloatReg r)
776     {
777     gJITC.nativeFloatRegState[r] = rsDirty;
778     return r;
779     }
780    
781     void FASTCALL jitcFloatRegisterInvalidate(JitcFloatReg r)
782     {
783     jitcFloatRegisterXCHGToFront(r);
784     asmFFREEPSTi(Float_ST0);
785     int creg = gJITC.nativeFloatRegStack[r];
786     gJITC.clientFloatReg[creg] = JITC_FLOAT_REG_NONE;
787     gJITC.nativeFloatTOP--;
788     }
789    
790     void FASTCALL jitcPopFloatStack(JitcFloatReg hint1, JitcFloatReg hint2)
791     {
792     ASSERT(gJITC.nativeFloatTOP > 0);
793    
794     JitcFloatReg r;
795     for (int i=0; i<4; i++) {
796     r = jitcFloatRegisterFromNative(X86_FLOAT_ST(gJITC.nativeFloatTOP-i-1));
797     if (r != hint1 && r != hint2) break;
798     }
799    
800     // we can now free r
801     int creg = gJITC.nativeFloatRegStack[r];
802     jitcFloatRegisterXCHGToFront(r);
803     if (gJITC.nativeFloatRegState[r] == rsDirty) {
804     byte modrm[6];
805     asmFSTPDoubleMem(modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.fpr[creg]));
806     } else {
807     asmFFREEPSTi(Float_ST0);
808     }
809     gJITC.nativeFloatRegState[r] = rsUnused;
810     gJITC.clientFloatReg[creg] = JITC_FLOAT_REG_NONE;
811     gJITC.nativeFloatTOP--;
812     }
813    
814     static JitcFloatReg FASTCALL jitcPushFloatStack(int creg)
815     {
816     ASSERT(gJITC.nativeFloatTOP < 8);
817     gJITC.nativeFloatTOP++;
818     int r = gJITC.floatRegPermInverse[gJITC.nativeFloatTOP];
819     byte modrm[6];
820     asmFLDDoubleMem(modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.fpr[creg]));
821     return r;
822     }
823    
824     /*
825     * Creates a copy of r on the stack. If the stack is full, it will
826     * clobber an entry. It will not clobber r nor hint.
827     */
828     JitcFloatReg FASTCALL jitcFloatRegisterDup(JitcFloatReg freg, JitcFloatReg hint)
829     {
830     // ht_printf("dup %d\n", freg);
831     if (gJITC.nativeFloatTOP == 8) {
832     // stack is full
833     jitcPopFloatStack(freg, hint);
834     }
835     asmFLDSTi(jitcFloatRegisterToNative(freg));
836     gJITC.nativeFloatTOP++;
837     int r = gJITC.floatRegPermInverse[gJITC.nativeFloatTOP];
838     gJITC.nativeFloatRegState[r] = rsUnused; // not really mapped
839     return r;
840     }
841    
842     void FASTCALL jitcFloatRegisterClobberAll()
843     {
844     if (!gJITC.nativeFloatTOP) return;
845    
846     do {
847     JitcFloatReg r = jitcFloatRegisterFromNative(Float_ST0);
848     int creg = gJITC.nativeFloatRegStack[r];
849     switch (gJITC.nativeFloatRegState[r]) {
850     case rsDirty: {
851     byte modrm[6];
852     asmFSTPDoubleMem(modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.fpr[creg]));
853     gJITC.clientFloatReg[creg] = JITC_FLOAT_REG_NONE;
854     break;
855     }
856     case rsMapped:
857     asmFFREEPSTi(Float_ST0);
858     gJITC.clientFloatReg[creg] = JITC_FLOAT_REG_NONE;
859     break;
860     case rsUnused: {ASSERT(0);}
861     }
862     } while (--gJITC.nativeFloatTOP);
863     }
864    
865     void FASTCALL jitcFloatRegisterStoreAndPopTOP(JitcFloatReg r)
866     {
867     asmFSTDPSTi(jitcFloatRegisterToNative(r));
868     gJITC.nativeFloatTOP--;
869     }
870    
871     void FASTCALL jitcClobberClientRegisterForFloat(int creg)
872     {
873     NativeReg r = jitcGetClientRegisterMapping(PPC_FPR_U(creg));
874     if (r != REG_NO) jitcClobberRegister(r | NATIVE_REG);
875     r = jitcGetClientRegisterMapping(PPC_FPR_L(creg));
876     if (r != REG_NO) jitcClobberRegister(r | NATIVE_REG);
877     }
878    
879     void FASTCALL jitcInvalidateClientRegisterForFloat(int creg)
880     {
881     // FIXME: no need to clobber, invalidate would be enough
882     jitcClobberClientRegisterForFloat(creg);
883     }
884    
885     JitcFloatReg FASTCALL jitcGetClientFloatRegisterMapping(int creg)
886     {
887     return gJITC.clientFloatReg[creg];
888     }
889    
890     JitcFloatReg FASTCALL jitcGetClientFloatRegisterUnmapped(int creg, int hint1, int hint2)
891     {
892     JitcFloatReg r = jitcGetClientFloatRegisterMapping(creg);
893     if (r == JITC_FLOAT_REG_NONE) {
894     if (gJITC.nativeFloatTOP == 8) {
895     jitcPopFloatStack(hint1, hint2);
896     }
897     r = jitcPushFloatStack(creg);
898     gJITC.nativeFloatRegState[r] = rsUnused;
899     }
900     return r;
901     }
902    
903     JitcFloatReg FASTCALL jitcGetClientFloatRegister(int creg, int hint1, int hint2)
904     {
905     JitcFloatReg r = jitcGetClientFloatRegisterMapping(creg);
906     if (r == JITC_FLOAT_REG_NONE) {
907     if (gJITC.nativeFloatTOP == 8) {
908     jitcPopFloatStack(hint1, hint2);
909     }
910     r = jitcPushFloatStack(creg);
911     gJITC.clientFloatReg[creg] = r;
912     gJITC.nativeFloatRegStack[r] = creg;
913     gJITC.nativeFloatRegState[r] = rsMapped;
914     }
915     return r;
916     }
917    
918     JitcFloatReg FASTCALL jitcMapClientFloatRegisterDirty(int creg, JitcFloatReg freg)
919     {
920     if (freg == JITC_FLOAT_REG_NONE) {
921     freg = jitcFloatRegisterFromNative(Float_ST0);
922     }
923     gJITC.clientFloatReg[creg] = freg;
924     gJITC.nativeFloatRegStack[freg] = creg;
925     gJITC.nativeFloatRegState[freg] = rsDirty;
926     return freg;
927     }
928    
929     /*
930     *
931     */
932     NativeAddress FASTCALL asmHERE()
933     {
934     return gJITC.currentPage->tcp;
935     }
936    
937     void FASTCALL asmNOP(int n)
938     {
939     if (n <= 0) return;
940     byte instr[15];
941     for (int i=0; i < (n-1); i++) {
942     instr[i] = 0x66;
943     }
944     instr[n-1] = 0x90;
945     jitcEmit(instr, n);
946     }
947    
948     static void FASTCALL asmSimpleMODRM(uint8 opc, NativeReg reg1, NativeReg reg2)
949     {
950     byte instr[2] = {opc, 0xc0+(reg1<<3)+reg2};
951     jitcEmit(instr, sizeof(instr));
952     }
953    
954     static void FASTCALL asmSimpleMODRM(uint8 opc, NativeReg16 reg1, NativeReg16 reg2)
955     {
956     byte instr[3] = {0x66, opc, 0xc0+(reg1<<3)+reg2};
957     jitcEmit(instr, sizeof(instr));
958     }
959    
960     static void FASTCALL asmSimpleMODRM(uint8 opc, NativeReg8 reg1, NativeReg8 reg2)
961     {
962     byte instr[2] = {opc, 0xc0+(reg1<<3)+reg2};
963     jitcEmit(instr, sizeof(instr));
964     }
965    
966     static void FASTCALL asmTEST_D(NativeReg reg1, uint32 imm)
967     {
968     if (reg1 <= EBX) {
969     if (imm <= 0xff) {
970     // test al, 1
971     if (reg1 == EAX) {
972     byte instr[2] = {0xa8, imm};
973     jitcEmit(instr, sizeof(instr));
974     } else {
975     byte instr[3] = {0xf6, 0xc0+reg1, imm};
976     jitcEmit(instr, sizeof(instr));
977     }
978     return;
979     } else if (!(imm & 0xffff00ff)) {
980     // test ah, 1
981     byte instr[3] = {0xf6, 0xc4+reg1, (imm>>8)};
982     jitcEmit(instr, sizeof(instr));
983     return;
984     }
985     }
986     // test eax, 1001
987     if (reg1 == EAX) {
988     byte instr[5];
989     instr[0] = 0xa9;
990     *((uint32 *)&instr[1]) = imm;
991     jitcEmit(instr, sizeof(instr));
992     } else {
993     byte instr[6];
994     instr[0] = 0xf7;
995     instr[1] = 0xc0+reg1;
996     *((uint32 *)&instr[2]) = imm;
997     jitcEmit(instr, sizeof(instr));
998     }
999     }
1000    
1001     static void FASTCALL asmTEST_W(NativeReg16 reg1, uint16 imm)
1002     {
1003     if (reg1 <= BX) {
1004     if (imm <= 0xff) {
1005     // test al, 1
1006     if (reg1 == AX) {
1007     byte instr[2] = {0xa8, imm};
1008     jitcEmit(instr, sizeof(instr));
1009     } else {
1010     byte instr[3] = {0xf6, 0xc0+reg1, imm};
1011     jitcEmit(instr, sizeof(instr));
1012     }
1013     return;
1014     } else if (!(imm & 0xffff00ff)) {
1015     // test ah, 1
1016     byte instr[3] = {0xf6, 0xc4+reg1, (imm>>8)};
1017     jitcEmit(instr, sizeof(instr));
1018     return;
1019     }
1020     }
1021     // test eax, 1001
1022     if (reg1 == AX) {
1023     byte instr[4];
1024     instr[0] = 0x66;
1025     instr[1] = 0xa9;
1026     *((uint16 *)&instr[2]) = imm;
1027     jitcEmit(instr, sizeof(instr));
1028     } else {
1029     byte instr[5];
1030     instr[0] = 0x66;
1031     instr[1] = 0xf7;
1032     instr[2] = 0xc0+reg1;
1033     *((uint16 *)&instr[3]) = imm;
1034     jitcEmit(instr, sizeof(instr));
1035     }
1036     }
1037    
1038     static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg reg1, NativeReg reg2)
1039     {
1040     byte instr[2] = {0x03+(opc<<3), 0xc0+(reg1<<3)+reg2};
1041     jitcEmit(instr, sizeof(instr));
1042     }
1043    
1044     static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg16 reg1, NativeReg16 reg2)
1045     {
1046     byte instr[3] = {0x66, 0x03+(opc<<3), 0xc0+(reg1<<3)+reg2};
1047     jitcEmit(instr, sizeof(instr));
1048     }
1049    
1050     static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg8 reg1, NativeReg8 reg2)
1051     {
1052     byte instr[2] = {0x02+(opc<<3), 0xc0+(reg1<<3)+reg2};
1053     jitcEmit(instr, sizeof(instr));
1054     }
1055    
1056    
1057     void FASTCALL asmALU(X86ALUopc opc, NativeReg reg1, NativeReg reg2)
1058     {
1059     switch (opc) {
1060     case X86_MOV:
1061     asmSimpleMODRM(0x8b, reg1, reg2);
1062     break;
1063     case X86_TEST:
1064     asmSimpleMODRM(0x85, reg1, reg2);
1065     break;
1066     case X86_XCHG:
1067     if (reg1 == EAX) {
1068     jitcEmit1(0x90+reg2);
1069     } else if (reg2 == EAX) {
1070     jitcEmit1(0x90+reg1);
1071     } else {
1072     asmSimpleMODRM(0x87, reg1, reg2);
1073     }
1074     break;
1075     default:
1076     asmSimpleALU(opc, reg1, reg2);
1077     }
1078     }
1079     void FASTCALL asmALURegReg(X86ALUopc opc, NativeReg reg1, NativeReg reg2)
1080     {
1081     asmALU(opc, reg1, reg2);
1082     }
1083    
1084     void FASTCALL asmALU(X86ALUopc opc, NativeReg16 reg1, NativeReg16 reg2)
1085     {
1086     switch (opc) {
1087     case X86_MOV:
1088     asmSimpleMODRM(0x8b, reg1, reg2);
1089     break;
1090     case X86_TEST:
1091     asmSimpleMODRM(0x85, reg1, reg2);
1092     break;
1093     case X86_XCHG:
1094     if (reg1 == AX) {
1095     byte instr[2] = { 0x66, 0x90+reg2 };
1096     jitcEmit(instr, sizeof instr);
1097     } else if (reg2 == AX) {
1098     byte instr[2] = { 0x66, 0x90+reg1 };
1099     jitcEmit(instr, sizeof instr);
1100     } else {
1101     asmSimpleMODRM(0x87, reg1, reg2);
1102     }
1103     break;
1104     default:
1105     asmSimpleALU(opc, reg1, reg2);
1106     }
1107     }
1108     void FASTCALL asmALURegReg16(X86ALUopc opc, NativeReg reg1, NativeReg reg2)
1109     {
1110     asmALU(opc, (NativeReg16)reg1, (NativeReg16)reg2);
1111     }
1112    
1113     void FASTCALL asmALU(X86ALUopc opc, NativeReg8 reg1, NativeReg8 reg2)
1114     {
1115     switch (opc) {
1116     case X86_MOV:
1117     asmSimpleMODRM(0x8a, reg1, reg2);
1118     break;
1119     case X86_TEST:
1120     asmSimpleMODRM(0x84, reg1, reg2);
1121     break;
1122     case X86_XCHG:
1123     asmSimpleMODRM(0x86, reg1, reg2);
1124     break;
1125     default:
1126     asmSimpleALU(opc, reg1, reg2);
1127     }
1128     }
1129     void FASTCALL asmALURegReg8(X86ALUopc opc, NativeReg8 reg1, NativeReg8 reg2)
1130     {
1131     asmALU(opc, reg1, reg2);
1132     }
1133    
1134     void FASTCALL asmALU(X86ALUopc opc, NativeReg8 reg1, uint8 imm)
1135     {
1136     byte instr[5];
1137     switch (opc) {
1138     case X86_MOV:
1139     instr[0] = 0xb0 + reg1;
1140     instr[1] = imm;
1141     jitcEmit(instr, 2);
1142     break;
1143     case X86_TEST:
1144     if (reg1 == AL) {
1145     instr[0] = 0xa8;
1146     instr[1] = imm;
1147     jitcEmit(instr, 2);
1148     } else {
1149     instr[0] = 0xf6;
1150     instr[1] = 0xc0 + reg1;
1151     instr[2] = imm;
1152     jitcEmit(instr, 3);
1153     }
1154     break;
1155     case X86_XCHG:
1156     // internal error
1157     break;
1158     default: {
1159     if (reg1 == AL) {
1160     instr[0] = (opc<<3)|0x4;
1161     instr[1] = imm;
1162     jitcEmit(instr, 2);
1163     } else {
1164     instr[0] = 0x80;
1165     instr[1] = 0xc0+(opc<<3)+reg1;
1166     instr[2] = imm;
1167     jitcEmit(instr, 3);
1168     }
1169     break;
1170     }
1171     }
1172     }
1173     void FASTCALL asmALURegImm8(X86ALUopc opc, NativeReg8 reg1, uint8 imm)
1174     {
1175     asmALU(opc, reg1, imm);
1176     }
1177    
1178     static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg reg1, uint32 imm)
1179     {
1180     if (imm <= 0x7f || imm >= 0xffffff80) {
1181     byte instr[3] = {0x83, 0xc0+(opc<<3)+reg1, imm};
1182     jitcEmit(instr, sizeof(instr));
1183     } else {
1184     if (reg1 == EAX) {
1185     byte instr[5];
1186     instr[0] = 0x05+(opc<<3);
1187     *((uint32 *)&instr[1]) = imm;
1188     jitcEmit(instr, sizeof(instr));
1189     } else {
1190     byte instr[6];
1191     instr[0] = 0x81;
1192     instr[1] = 0xc0+(opc<<3)+reg1;
1193     *((uint32 *)&instr[2]) = imm;
1194     jitcEmit(instr, sizeof(instr));
1195     }
1196     }
1197     }
1198    
1199     static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg16 reg1, uint32 imm)
1200     {
1201     if (imm <= 0x7f || imm >= 0xffffff80) {
1202     byte instr[3] = {0x83, 0xc0+(opc<<3)+reg1, imm};
1203     jitcEmit(instr, sizeof(instr));
1204     } else {
1205     if (reg1 == AX) {
1206     byte instr[4];
1207     instr[0] = 0x66;
1208     instr[1] = 0x05+(opc<<3);
1209     *((uint16 *)&instr[2]) = imm;
1210     jitcEmit(instr, sizeof(instr));
1211     } else {
1212     byte instr[5];
1213     instr[0] = 0x66;
1214     instr[1] = 0x81;
1215     instr[2] = 0xc0+(opc<<3)+reg1;
1216     *((uint16 *)&instr[3]) = imm;
1217     jitcEmit(instr, sizeof(instr));
1218     }
1219     }
1220     }
1221    
1222     void FASTCALL asmALU(X86ALUopc opc, NativeReg reg1, uint32 imm)
1223     {
1224     switch (opc) {
1225     case X86_MOV:
1226     if (imm == 0) {
1227     asmALU(X86_XOR, reg1, reg1);
1228     } else {
1229     asmMOV_NoFlags(reg1, imm);
1230     }
1231     break;
1232     case X86_XCHG:
1233     // internal error
1234     break;
1235     case X86_TEST:
1236     asmTEST_D(reg1, imm);
1237     break;
1238     case X86_CMP:
1239     // if (imm == 0) {
1240     // asmALU(X86_OR, reg1, reg1);
1241     // } else {
1242     asmSimpleALU(opc, reg1, imm);
1243     // }
1244     break;
1245     default:
1246     asmSimpleALU(opc, reg1, imm);
1247     }
1248     }
1249     void FASTCALL asmALURegImm(X86ALUopc opc, NativeReg reg1, uint32 imm)
1250     {
1251     asmALU(opc, reg1, imm);
1252     }
1253    
1254     void FASTCALL asmALU(X86ALUopc opc, NativeReg16 reg1, uint16 imm)
1255     {
1256     switch (opc) {
1257     case X86_MOV:
1258     if (imm == 0) {
1259     asmALU(X86_XOR, reg1, reg1);
1260     } else {
1261     asmMOV_NoFlags(reg1, imm);
1262     }
1263     break;
1264     case X86_XCHG:
1265     // internal error
1266     break;
1267     case X86_TEST:
1268     asmTEST_W(reg1, imm);
1269     break;
1270     case X86_CMP:
1271     // if (imm == 0) {
1272     // asmALU(X86_OR, reg1, reg1);
1273     // } else {
1274     asmSimpleALU(opc, reg1, imm);
1275     // }
1276     break;
1277     default:
1278     asmSimpleALU(opc, reg1, imm);
1279     }
1280     }
1281     void FASTCALL asmALURegImm16(X86ALUopc opc, NativeReg reg1, uint32 imm)
1282     {
1283     asmALU(opc, (NativeReg16)reg1, imm);
1284     }
1285    
1286     void FASTCALL asmMOV_NoFlags(NativeReg reg1, uint32 imm)
1287     {
1288     byte instr[5];
1289     instr[0] = 0xb8+reg1;
1290     *((uint32 *)&instr[1]) = imm;
1291     jitcEmit(instr, sizeof(instr));
1292     }
1293     void FASTCALL asmMOVRegImm_NoFlags(NativeReg reg1, uint32 imm)
1294     {
1295     asmMOV_NoFlags(reg1, imm);
1296     }
1297    
1298     void FASTCALL asmMOV_NoFlags(NativeReg16 reg1, uint16 imm)
1299     {
1300     byte instr[4];
1301     instr[0] = 0x66;
1302     instr[1] = 0xb8+reg1;
1303     *((uint16 *)&instr[2]) = imm;
1304     jitcEmit(instr, sizeof(instr));
1305     }
1306     void FASTCALL asmMOVRegImm16_NoFlags(NativeReg reg1, uint16 imm)
1307     {
1308     asmMOV_NoFlags((NativeReg16)reg1, imm);
1309     }
1310    
1311     void FASTCALL asmALU(X86ALUopc1 opc, NativeReg reg1)
1312     {
1313     byte instr[2];
1314     switch (opc) {
1315     case X86_NOT:
1316     instr[0] = 0xf7;
1317     instr[1] = 0xd0+reg1;
1318     break;
1319     case X86_NEG:
1320     instr[0] = 0xf7;
1321     instr[1] = 0xd8+reg1;
1322     break;
1323     case X86_MUL:
1324     instr[0] = 0xf7;
1325     instr[1] = 0xe0+reg1;
1326     break;
1327     case X86_IMUL:
1328     instr[0] = 0xf7;
1329     instr[1] = 0xe8+reg1;
1330     break;
1331     case X86_DIV:
1332     instr[0] = 0xf7;
1333     instr[1] = 0xf0+reg1;
1334     break;
1335     case X86_IDIV:
1336     instr[0] = 0xf7;
1337     instr[1] = 0xf8+reg1;
1338     break;
1339     }
1340     jitcEmit(instr, 2);
1341     }
1342     void FASTCALL asmALUReg(X86ALUopc1 opc, NativeReg reg1)
1343     {
1344     asmALU(opc, reg1);
1345     }
1346    
1347     void FASTCALL asmALU(X86ALUopc1 opc, NativeReg16 reg1)
1348     {
1349     byte instr[3];
1350     instr[0] = 0x66;
1351    
1352     switch (opc) {
1353     case X86_NOT:
1354     instr[1] = 0xf7;
1355     instr[2] = 0xd0+reg1;
1356     break;
1357     case X86_NEG:
1358     instr[1] = 0xf7;
1359     instr[2] = 0xd8+reg1;
1360     break;
1361     case X86_MUL:
1362     instr[1] = 0xf7;
1363     instr[2] = 0xe0+reg1;
1364     break;
1365     case X86_IMUL:
1366     instr[1] = 0xf7;
1367     instr[2] = 0xe8+reg1;
1368     break;
1369     case X86_DIV:
1370     instr[1] = 0xf7;
1371     instr[2] = 0xf0+reg1;
1372     break;
1373     case X86_IDIV:
1374     instr[1] = 0xf7;
1375     instr[2] = 0xf8+reg1;
1376     break;
1377     }
1378     jitcEmit(instr, 3);
1379     }
1380     void FASTCALL asmALUReg16(X86ALUopc1 opc, NativeReg reg1)
1381     {
1382     asmALU(opc, (NativeReg16)reg1);
1383     }
1384    
1385     void FASTCALL asmALUMemReg(X86ALUopc opc, byte *modrm, int len, NativeReg reg2)
1386     {
1387     byte instr[15];
1388    
1389     switch (opc) {
1390     case X86_MOV:
1391     instr[0] = 0x89;
1392     break;
1393     case X86_XCHG:
1394     instr[0] = 0x87;
1395     break;
1396     case X86_TEST:
1397     instr[0] = 0x85;
1398     break;
1399     default:
1400     instr[0] = 0x01+(opc<<3);
1401     }
1402     memcpy(&instr[1], modrm, len);
1403     instr[1] |= (reg2<<3);
1404     jitcEmit(instr, len+1);
1405     }
1406     void FASTCALL asmALU(X86ALUopc opc, modrm_p modrm, NativeReg reg2)
1407     {
1408     byte instr[15];
1409     int len = modrm++[0];
1410    
1411     switch (opc) {
1412     case X86_MOV:
1413     instr[0] = 0x89;
1414     break;
1415     case X86_XCHG:
1416     instr[0] = 0x87;
1417     break;
1418     case X86_TEST:
1419     instr[0] = 0x85;
1420     break;
1421     default:
1422     instr[0] = 0x01+(opc<<3);
1423     }
1424     memcpy(&instr[1], modrm, len);
1425     instr[1] |= (reg2<<3);
1426     jitcEmit(instr, len+1);
1427     }
1428    
1429     void FASTCALL asmALUMemReg16(X86ALUopc opc, byte *modrm, int len, NativeReg reg2)
1430     {
1431     byte instr[16];
1432    
1433     instr[0] = 0x66;
1434     switch (opc) {
1435     case X86_MOV:
1436     instr[1] = 0x89;
1437     break;
1438     case X86_XCHG:
1439     instr[1] = 0x87;
1440     break;
1441     case X86_TEST:
1442     instr[1] = 0x85;
1443     break;
1444     default:
1445     instr[1] = 0x01+(opc<<3);
1446     }
1447     memcpy(&instr[2], modrm, len);
1448     instr[2] |= (reg2<<3);
1449     jitcEmit(instr, len+2);
1450     }
1451     void FASTCALL asmALU(X86ALUopc opc, modrm_p modrm, NativeReg16 reg2)
1452     {
1453     byte instr[16];
1454     int len = modrm++[0];
1455    
1456     instr[0] = 0x66;
1457     switch (opc) {
1458     case X86_MOV:
1459     instr[1] = 0x89;
1460     break;
1461     case X86_XCHG:
1462     instr[1] = 0x87;
1463     break;
1464     case X86_TEST:
1465     instr[1] = 0x85;
1466     break;
1467     default:
1468     instr[1] = 0x01+(opc<<3);
1469     }
1470     memcpy(&instr[2], modrm, len);
1471     instr[2] |= (reg2<<3);
1472     jitcEmit(instr, len+2);
1473     }
1474    
1475    
1476     static void FASTCALL asmSimpleALU_D(X86ALUopc opc, byte *modrm, int len, uint32 imm)
1477     {
1478     byte instr[15];
1479    
1480     if (imm <= 0x7f || imm >= 0xffffff80) {
1481     instr[0] = 0x83;
1482     memcpy(&instr[1], modrm, len);
1483     instr[1] |= (opc<<3);
1484     instr[len+1] = imm;
1485     jitcEmit(instr, len+2);
1486     } else {
1487     instr[0] = 0x81;
1488     memcpy(&instr[1], modrm, len);
1489     instr[1] |= (opc<<3);
1490     *((uint32 *)&instr[len+1]) = imm;
1491     jitcEmit(instr, len+5);
1492     }
1493     }
1494    
1495     static void FASTCALL asmSimpleALU_W(X86ALUopc opc, byte *modrm, int len, uint16 imm)
1496     {
1497     byte instr[16];
1498    
1499     instr[0] = 0x66;
1500    
1501     if (imm <= 0x7f || imm >= 0xff80) {
1502     instr[0] = 0x83;
1503     memcpy(&instr[1], modrm, len);
1504     instr[1] |= (opc<<3);
1505     instr[len+1] = imm;
1506     jitcEmit(instr, len+2);
1507     } else {
1508     instr[1] = 0x81;
1509     memcpy(&instr[2], modrm, len);
1510     instr[2] |= (opc<<3);
1511     *((uint16 *)&instr[len+2]) = imm;
1512     jitcEmit(instr, len+4);
1513     }
1514     }
1515    
1516     void FASTCALL asmALUMemImm(X86ALUopc opc, byte *modrm, int len, uint32 imm)
1517     {
1518     byte instr[15];
1519     switch (opc) {
1520     case X86_MOV: {
1521     instr[0] = 0xc7;
1522     memcpy(&instr[1], modrm, len);
1523     *((uint32 *)&instr[len+1]) = imm;
1524     jitcEmit(instr, len+5);
1525     break;
1526     }
1527     case X86_XCHG:
1528     // internal error
1529     break;
1530     case X86_TEST:
1531     instr[0] = 0xf7;
1532     memcpy(&instr[1], modrm, len);
1533     *((uint32 *)&instr[len+1]) = imm;
1534     jitcEmit(instr, len+5);
1535     break;
1536     default:
1537     asmSimpleALU_D(opc, modrm, len, imm);
1538     }
1539     }
1540    
1541     void FASTCALL asmALU_D(X86ALUopc opc, modrm_p modrm, uint32 imm)
1542     {
1543     int len = modrm++[0];
1544    
1545     asmALUMemImm(opc, modrm, len, imm);
1546     }
1547    
1548     void FASTCALL asmALUMemImm16(X86ALUopc opc, byte *modrm, int len, uint16 imm)
1549     {
1550     byte instr[16];
1551     instr[0] = 0x66;
1552    
1553     switch (opc) {
1554     case X86_MOV: {
1555     instr[1] = 0xc7;
1556     memcpy(&instr[2], modrm, len);
1557     *((uint16 *)&instr[len+2]) = imm;
1558     jitcEmit(instr, len+4);
1559     break;
1560     }
1561     case X86_XCHG:
1562     // internal error
1563     break;
1564     case X86_TEST:
1565     instr[1] = 0xf7;
1566     memcpy(&instr[2], modrm, len);
1567     *((uint16 *)&instr[len+2]) = imm;
1568     jitcEmit(instr, len+4);
1569     break;
1570     default:
1571     asmSimpleALU_W(opc, modrm, len, imm);
1572     }
1573     }
1574     void FASTCALL asmALU_W(X86ALUopc opc, modrm_p modrm, uint16 imm)
1575     {
1576     int len = modrm++[0];
1577    
1578     asmALUMemImm16(opc, modrm, len, imm);
1579     }
1580    
1581     void FASTCALL asmALURegMem(X86ALUopc opc, NativeReg reg1, byte *modrm, int len)
1582     {
1583     byte instr[15];
1584     switch (opc) {
1585     case X86_MOV:
1586     instr[0] = 0x8b;
1587     break;
1588     case X86_XCHG:
1589     // XCHG is symmetric
1590     instr[0] = 0x87;
1591     break;
1592     case X86_TEST:
1593     // TEST is symmetric
1594     instr[0] = 0x85;
1595     break;
1596     default:
1597     instr[0] = 0x03+(opc<<3);
1598     }
1599     memcpy(&instr[1], modrm, len);
1600     instr[1] |= (reg1<<3);
1601     jitcEmit(instr, len+1);
1602     }
1603     void FASTCALL asmALU(X86ALUopc opc, NativeReg reg1, modrm_p modrm)
1604     {
1605     int len = modrm++[0];
1606    
1607     asmALURegMem(opc, reg1, modrm, len);
1608     }
1609    
1610     void FASTCALL asmALURegMem16(X86ALUopc opc, NativeReg reg1, byte *modrm, int len)
1611     {
1612     byte instr[16];
1613     instr[0] = 0x66;
1614     switch (opc) {
1615     case X86_MOV:
1616     instr[1] = 0x8b;
1617     break;
1618     case X86_XCHG:
1619     // XCHG is symmetric
1620     instr[1] = 0x87;
1621     break;
1622     case X86_TEST:
1623     // TEST is symmetric
1624     instr[1] = 0x85;
1625     break;
1626     default:
1627     instr[1] = 0x03+(opc<<3);
1628     }
1629     memcpy(&instr[2], modrm, len);
1630     instr[2] |= (reg1<<3);
1631     jitcEmit(instr, len+2);
1632     }
1633     void FASTCALL asmALU(X86ALUopc opc, NativeReg16 reg1, modrm_p modrm)
1634     {
1635     int len = modrm++[0];
1636    
1637     asmALURegMem16(opc, (NativeReg)reg1, modrm, len);
1638     }
1639    
1640     void FASTCALL asmALURegMem8(X86ALUopc opc, NativeReg8 reg1, byte *modrm, int len)
1641     {
1642     byte instr[15];
1643     switch (opc) {
1644     case X86_MOV:
1645     instr[0] = 0x8a;
1646     break;
1647     case X86_XCHG:
1648     // XCHG is symmetric
1649     instr[0] = 0x86;
1650     break;
1651     case X86_TEST:
1652     // TEST is symmetric
1653     instr[0] = 0x84;
1654     break;
1655     default:
1656     instr[0] = 0x02+(opc<<3);
1657     }
1658     memcpy(&instr[1], modrm, len);
1659     instr[1] |= (reg1<<3);
1660     jitcEmit(instr, len+1);
1661     }
1662     void FASTCALL asmALU(X86ALUopc opc, NativeReg8 reg1, modrm_p modrm)
1663     {
1664     int len = modrm++[0];
1665    
1666     asmALURegMem8(opc, reg1, modrm, len);
1667     }
1668    
1669     void FASTCALL asmALUMemReg8(X86ALUopc opc, byte *modrm, int len, NativeReg8 reg2)
1670     {
1671     byte instr[15];
1672     switch (opc) {
1673     case X86_MOV:
1674     instr[0] = 0x88;
1675     break;
1676     case X86_XCHG:
1677     instr[0] = 0x86;
1678     break;
1679     case X86_TEST:
1680     instr[0] = 0x84;
1681     break;
1682     default:
1683     instr[0] = 0x00+(opc<<3);
1684     }
1685     memcpy(&instr[1], modrm, len);
1686     instr[1] |= (reg2<<3);
1687     jitcEmit(instr, len+1);
1688     }
1689     void FASTCALL asmALU(X86ALUopc opc, modrm_p modrm, NativeReg8 reg2)
1690     {
1691     int len = modrm++[0];
1692    
1693     asmALUMemReg8(opc, modrm, len, reg2);
1694     }
1695    
1696     void FASTCALL asmALUMemImm8(X86ALUopc opc, byte *modrm, int len, uint8 imm)
1697     {
1698     byte instr[15];
1699     switch (opc) {
1700     case X86_MOV:
1701     instr[0] = 0xc6;
1702     break;
1703     case X86_XCHG:
1704     // internal error
1705     break;
1706     case X86_TEST:
1707     instr[0] = 0xf6;
1708     break;
1709     default:
1710     instr[0] = 0x80;
1711     memcpy(&instr[1], modrm, len);
1712     instr[1] |= (opc<<3);
1713     instr[len+1] = imm;
1714     jitcEmit(instr, len+2);
1715     return;
1716     }
1717     memcpy(&instr[1], modrm, len);
1718     instr[len+1] = imm;
1719     jitcEmit(instr, len+2);
1720     }
1721     void FASTCALL asmALU_B(X86ALUopc opc, modrm_p modrm, uint8 imm)
1722     {
1723     int len = modrm++[0];
1724    
1725     asmALUMemImm8(opc, modrm, len, imm);
1726     }
1727    
1728     void FASTCALL asmMOV(const void *disp, NativeReg reg1)
1729     {
1730     byte instr[6];
1731     if (reg1==EAX) {
1732     instr[0] = 0xa3;
1733     *((uint32 *)&instr[1]) = (uint32)disp;
1734     jitcEmit(instr, 5);
1735     } else {
1736     instr[0] = 0x89;
1737     instr[1] = 0x05 | (reg1 << 3);
1738     *((uint32 *)&instr[2]) = (uint32)disp;
1739     jitcEmit(instr, 6);
1740     }
1741     }
1742     void FASTCALL asmMOVDMemReg(uint32 disp, NativeReg reg1)
1743     {
1744     asmMOV((const void *)disp, reg1);
1745     }
1746    
1747     void FASTCALL asmMOV(const void *disp, NativeReg16 reg1)
1748     {
1749     byte instr[7];
1750     instr[0] = 0x66;
1751     if (reg1==AX) {
1752     instr[1] = 0xa3;
1753     *((uint32 *)&instr[2]) = (uint32)disp;
1754     jitcEmit(instr, 6);
1755     } else {
1756     instr[1] = 0x89;
1757     instr[2] = 0x05 | (reg1 << 3);
1758     *((uint32 *)&instr[3]) = (uint32)disp;
1759     jitcEmit(instr, 7);
1760     }
1761     }
1762     void FASTCALL asmMOVDMemReg16(uint32 disp, NativeReg reg1)
1763     {
1764     asmMOV((const void *)disp, (NativeReg16)reg1);
1765     }
1766    
1767     void FASTCALL asmMOV(NativeReg reg1, const void *disp)
1768     {
1769     byte instr[6];
1770     if (reg1==EAX) {
1771     instr[0] = 0xa1;
1772     *((uint32 *)&instr[1]) = (uint32)disp;
1773     jitcEmit(instr, 5);
1774     } else {
1775     instr[0] = 0x8b;
1776     instr[1] = 0x05 | (reg1 << 3);
1777     *((uint32 *)&instr[2]) = (uint32)disp;
1778     jitcEmit(instr, 6);
1779     }
1780     }
1781     void FASTCALL asmMOVRegDMem(NativeReg reg1, uint32 disp)
1782     {
1783     asmMOV(reg1, (const void *)disp);
1784     }
1785    
1786     void FASTCALL asmMOV(NativeReg16 reg1, const void *disp)
1787     {
1788     byte instr[7];
1789     instr[0] = 0x66;
1790     if (reg1==AX) {
1791     instr[1] = 0xa1;
1792     *((uint32 *)&instr[2]) = (uint32)disp;
1793     jitcEmit(instr, 6);
1794     } else {
1795     instr[1] = 0x8b;
1796     instr[2] = 0x05 | (reg1 << 3);
1797     *((uint32 *)&instr[3]) = (uint32)disp;
1798     jitcEmit(instr, 7);
1799     }
1800     }
1801     void FASTCALL asmMOVRegDMem16(NativeReg reg1, uint32 disp)
1802     {
1803     asmMOV((NativeReg16)reg1, (const void *)disp);
1804     }
1805    
1806     void FASTCALL asmTEST(const void *disp, uint32 imm)
1807     {
1808     byte instr[15];
1809     instr[1] = 0x05;
1810     if (!(imm & 0xffffff00)) {
1811     instr[0] = 0xf6;
1812     *((uint32 *)&instr[2]) = (uint32)disp;
1813     instr[6] = imm;
1814     } else if (!(imm & 0xffff00ff)) {
1815     instr[0] = 0xf6;
1816     *((uint32 *)&instr[2]) = (uint32)disp+1;
1817     instr[6] = imm >> 8;
1818     } else if (!(imm & 0xff00ffff)) {
1819     instr[0] = 0xf6;
1820     *((uint32 *)&instr[2]) = (uint32)disp+2;
1821     instr[6] = imm >> 16;
1822     } else if (!(imm & 0x00ffffff)) {
1823     instr[0] = 0xf6;
1824     *((uint32 *)&instr[2]) = (uint32)disp+3;
1825     instr[6] = imm >> 24;
1826     } else {
1827     instr[0] = 0xf7;
1828     *((uint32 *)&instr[2]) = (uint32)disp;
1829     *((uint32 *)&instr[6]) = imm;
1830     jitcEmit(instr, 10);
1831     return;
1832     }
1833     jitcEmit(instr, 7);
1834     }
1835     void FASTCALL asmTESTDMemImm(uint32 disp, uint32 imm)
1836     {
1837     asmTEST((const void *)disp, imm);
1838     }
1839    
1840     void FASTCALL asmAND(const void *disp, uint32 imm)
1841     {
1842     byte instr[15];
1843     instr[1] = 0x25;
1844     if ((imm & 0xffffff00)==0xffffff00) {
1845     instr[0] = 0x80;
1846     *((uint32 *)&instr[2]) = (uint32)disp;
1847     instr[6] = imm;
1848     } else if ((imm & 0xffff00ff)==0xffff00ff) {
1849     instr[0] = 0x80;
1850     *((uint32 *)&instr[2]) = (uint32)disp+1;
1851     instr[6] = imm >> 8;
1852     } else if ((imm & 0xff00ffff)==0xff00ffff) {
1853     instr[0] = 0x80;
1854     *((uint32 *)&instr[2]) = (uint32)disp+2;
1855     instr[6] = imm >> 16;
1856     } else if ((imm & 0x00ffffff)==0x00ffffff) {
1857     instr[0] = 0x80;
1858     *((uint32 *)&instr[2]) = (uint32)disp+3;
1859     instr[6] = imm >> 24;
1860     } else {
1861     instr[0] = 0x81;
1862     *((uint32 *)&instr[2]) = (uint32)disp;
1863     *((uint32 *)&instr[6]) = imm;
1864     jitcEmit(instr, 10);
1865     return;
1866     }
1867     jitcEmit(instr, 7);
1868     }
1869     void FASTCALL asmANDDMemImm(uint32 disp, uint32 imm)
1870     {
1871     asmAND((const void *)disp, imm);
1872     }
1873    
1874     void FASTCALL asmOR(const void *disp, uint32 imm)
1875     {
1876     byte instr[15];
1877     instr[1] = 0x0d;
1878     if (!(imm & 0xffffff00)) {
1879     instr[0] = 0x80;
1880     *((uint32 *)&instr[2]) = (uint32)disp;
1881     instr[6] = imm;
1882     } else if (!(imm & 0xffff00ff)) {
1883     instr[0] = 0x80;
1884     *((uint32 *)&instr[2]) = (uint32)disp+1;
1885     instr[6] = imm >> 8;
1886     } else if (!(imm & 0xff00ffff)) {
1887     instr[0] = 0x80;
1888     *((uint32 *)&instr[2]) = (uint32)disp+2;
1889     instr[6] = imm >> 16;
1890     } else if (!(imm & 0x00ffffff)) {
1891     instr[0] = 0x80;
1892     *((uint32 *)&instr[2]) = (uint32)disp+3;
1893     instr[6] = imm >> 24;
1894     } else {
1895     instr[0] = 0x81;
1896     *((uint32 *)&instr[2]) = (uint32)disp;
1897     *((uint32 *)&instr[6]) = imm;
1898     jitcEmit(instr, 10);
1899     return;
1900     }
1901     jitcEmit(instr, 7);
1902     }
1903     void FASTCALL asmORDMemImm(uint32 disp, uint32 imm)
1904     {
1905     asmOR((const void *)disp, imm);
1906     }
1907    
1908    
1909     void FASTCALL asmMOVxx(X86MOVxx opc, NativeReg reg1, NativeReg8 reg2)
1910     {
1911     byte instr[3] = {0x0f, opc, 0xc0+(reg1<<3)+reg2};
1912     jitcEmit(instr, sizeof(instr));
1913     }
1914     void FASTCALL asmMOVxxRegReg8(X86MOVxx opc, NativeReg reg1, NativeReg8 reg2)
1915     {
1916     asmMOVxx(opc, reg1, reg2);
1917     }
1918    
1919     void FASTCALL asmMOVxx(X86MOVxx opc, NativeReg reg1, NativeReg16 reg2)
1920     {
1921     byte instr[3] = {0x0f, opc+1, 0xc0+(reg1<<3)+reg2};
1922     jitcEmit(instr, sizeof(instr));
1923     }
1924     void FASTCALL asmMOVxxRegReg16(X86MOVxx opc, NativeReg reg1, NativeReg reg2)
1925     {
1926     asmMOVxx(opc, reg1, (NativeReg16)reg2);
1927     }
1928    
1929     void FASTCALL asmMOVxxRegMem8(X86MOVxx opc, NativeReg reg1, byte *modrm, int len)
1930     {
1931     byte instr[16] = { 0x0f };
1932    
1933     instr[1] = opc;
1934     memcpy(&instr[2], modrm, len);
1935     instr[2] |= (reg1 << 3);
1936    
1937     jitcEmit(instr, len+2);
1938     }
1939     void FASTCALL asmMOVxx_B(X86MOVxx opc, NativeReg reg1, modrm_p modrm)
1940     {
1941     int len = modrm++[0];
1942    
1943     asmMOVxxRegMem8(opc, reg1, modrm, len);
1944     }
1945    
1946     void FASTCALL asmMOVxxRegMem16(X86MOVxx opc, NativeReg reg1, byte *modrm, int len)
1947     {
1948     byte instr[16] = { 0x0f };
1949    
1950     instr[1] = opc+1;
1951     memcpy(&instr[2], modrm, len);
1952     instr[2] |= (reg1 << 3);
1953    
1954     jitcEmit(instr, len+2);
1955     }
1956     void FASTCALL asmMOVxx_W(X86MOVxx opc, NativeReg reg1, modrm_p modrm)
1957     {
1958     int len = modrm++[0];
1959    
1960     asmMOVxxRegMem16(opc, reg1, modrm, len);
1961     }
1962    
1963     void FASTCALL asmSET(X86FlagTest flags, NativeReg8 reg1)
1964     {
1965     byte instr[3] = {0x0f, 0x90+flags, 0xc0+reg1};
1966     jitcEmit(instr, sizeof(instr));
1967     }
1968     void FASTCALL asmSETReg8(X86FlagTest flags, NativeReg8 reg1)
1969     {
1970     asmSET(flags, reg1);
1971     }
1972    
1973     void FASTCALL asmSETMem(X86FlagTest flags, byte *modrm, int len)
1974     {
1975     byte instr[15];
1976     instr[0] = 0x0f;
1977     instr[1] = 0x90+flags;
1978     memcpy(instr+2, modrm, len);
1979     jitcEmit(instr, len+2);
1980     }
1981     void FASTCALL asmSET(X86FlagTest flags, modrm_p modrm)
1982     {
1983     int len = modrm++[0];
1984    
1985     asmSETMem(flags, modrm, len);
1986     }
1987    
1988     void FASTCALL asmCMOV(X86FlagTest flags, NativeReg reg1, NativeReg reg2)
1989     {
1990     if (gJITC.hostCPUCaps.cmov) {
1991     byte instr[3] = {0x0f, 0x40+flags, 0xc0+(reg1<<3)+reg2};
1992     jitcEmit(instr, sizeof(instr));
1993     } else {
1994     byte instr[4] = {
1995     0x70+(flags ^ 1), 0x02, // jnCC $+2
1996     0x8b, 0xc0+(reg1<<3)+reg2, // mov reg1, reg2
1997     };
1998     jitcEmit(instr, sizeof instr);
1999     }
2000     }
2001     void FASTCALL asmCMOVRegReg(X86FlagTest flags, NativeReg reg1, NativeReg reg2)
2002     {
2003     asmCMOV(flags, reg1, reg2);
2004     }
2005    
2006     void FASTCALL asmCMOVRegMem(X86FlagTest flags, NativeReg reg1, byte *modrm, int len)
2007     {
2008     if (gJITC.hostCPUCaps.cmov) {
2009     byte instr[16] = {0x0f, 0x40+flags };
2010     memcpy(&instr[2], modrm, len);
2011     instr[2] |= (reg1<<3);
2012     jitcEmit(instr, len+2);
2013     } else {
2014     byte instr[17] = {
2015     0x70+(flags ^ 1), 1 + len, // jnCC $+2
2016     0x8b, // mov reg1, *
2017     };
2018     memcpy(&instr[3], modrm, len);
2019     instr[3] |= (reg1<<3);
2020     jitcEmit(instr, len+3);
2021     }
2022     }
2023     void FASTCALL asmCMOV(X86FlagTest flags, NativeReg reg1, modrm_p modrm)
2024     {
2025     int len = modrm++[0];
2026    
2027     asmCMOVRegMem(flags, reg1, modrm, len);
2028     }
2029    
2030     void FASTCALL asmShift(X86ShiftOpc opc, NativeReg reg1, uint32 imm)
2031     {
2032     if (imm == 1) {
2033     byte instr[2] = {0xd1, 0xc0+opc+reg1};
2034     jitcEmit(instr, sizeof(instr));
2035     } else {
2036     byte instr[3] = {0xc1, 0xc0+opc+reg1, imm};
2037     jitcEmit(instr, sizeof(instr));
2038     }
2039     }
2040     void FASTCALL asmShiftRegImm(X86ShiftOpc opc, NativeReg reg1, uint32 imm)
2041     {
2042     asmShift(opc, reg1, imm);
2043     }
2044    
2045     void FASTCALL asmShift_CL(X86ShiftOpc opc, NativeReg reg1)
2046     {
2047     // 0xd3 [ModR/M]
2048     byte instr[2] = {0xd3, 0xc0+opc+reg1};
2049     jitcEmit(instr, sizeof(instr));
2050     }
2051     void FASTCALL asmShiftRegCL(X86ShiftOpc opc, NativeReg reg1)
2052     {
2053     asmShift_CL(opc, reg1);
2054     }
2055    
2056     void FASTCALL asmShift(X86ShiftOpc opc, NativeReg16 reg1, uint32 imm)
2057     {
2058     if (imm == 1) {
2059     byte instr[3] = {0x66, 0xd1, 0xc0+opc+reg1};
2060     jitcEmit(instr, sizeof(instr));
2061     } else {
2062     byte instr[4] = {0x66, 0xc1, 0xc0+opc+reg1, imm};
2063     jitcEmit(instr, sizeof(instr));
2064     }
2065     }
2066     void FASTCALL asmShiftReg16Imm(X86ShiftOpc opc, NativeReg reg1, uint32 imm)
2067     {
2068     asmShift(opc, (NativeReg16)reg1, imm);
2069     }
2070    
2071     void FASTCALL asmShift_CL(X86ShiftOpc opc, NativeReg16 reg1)
2072     {
2073     // 0xd3 [ModR/M]
2074     byte instr[3] = {0x66, 0xd3, 0xc0+opc+reg1};
2075     jitcEmit(instr, sizeof(instr));
2076     }
2077     void FASTCALL asmShiftReg16CL(X86ShiftOpc opc, NativeReg reg1)
2078     {
2079     asmShift_CL(opc, (NativeReg16)reg1);
2080     }
2081    
2082     void FASTCALL asmShift(X86ShiftOpc opc, NativeReg8 reg1, uint32 imm)
2083     {
2084     if (imm == 1) {
2085     byte instr[2] = {0xd0, 0xc0+opc+reg1};
2086     jitcEmit(instr, sizeof(instr));
2087     } else {
2088     byte instr[3] = {0xc0, 0xc0+opc+reg1, imm};
2089     jitcEmit(instr, sizeof(instr));
2090     }
2091     }
2092     void FASTCALL asmShiftReg8Imm(X86ShiftOpc opc, NativeReg8 reg1, uint32 imm)
2093     {
2094     asmShift(opc, reg1, imm);
2095     }
2096    
2097     void FASTCALL asmShift_CL(X86ShiftOpc opc, NativeReg8 reg1)
2098     {
2099     // 0xd3 [ModR/M]
2100     byte instr[2] = {0xd2, 0xc0+opc+reg1};
2101     jitcEmit(instr, sizeof(instr));
2102     }
2103     void FASTCALL asmShiftReg8CL(X86ShiftOpc opc, NativeReg8 reg1)
2104     {
2105     asmShift_CL(opc, reg1);
2106     }
2107    
2108     void FASTCALL asmIMUL(NativeReg reg1, NativeReg reg2, uint32 imm)
2109     {
2110     if (imm <= 0x7f || imm >= 0xffffff80) {
2111     byte instr[3] = {0x6b, 0xc0+(reg1<<3)+reg2, imm};
2112     jitcEmit(instr, sizeof(instr));
2113     } else {
2114     byte instr[6] = {0x69, 0xc0+(reg1<<3)+reg2};
2115     *((uint32*)(&instr[2])) = imm;
2116     jitcEmit(instr, sizeof(instr));
2117     }
2118     }
2119    
2120     void FASTCALL asmIMUL(NativeReg reg1, NativeReg reg2)
2121     {
2122     byte instr[3] = {0x0f, 0xaf, 0xc0+(reg1<<3)+reg2};
2123     jitcEmit(instr, sizeof(instr));
2124     }
2125    
2126     void FASTCALL asmIMULRegRegImm(NativeReg reg1, NativeReg reg2, uint32 imm)
2127     {
2128     asmIMUL(reg1, reg2, imm);
2129     }
2130    
2131     void FASTCALL asmIMULRegReg(NativeReg reg1, NativeReg reg2)
2132     {
2133     asmIMUL(reg1, reg2);
2134     }
2135    
2136     void FASTCALL asmINC(NativeReg reg1)
2137     {
2138     jitcEmit1(0x40+reg1);
2139     }
2140     void FASTCALL asmINCReg(NativeReg reg1)
2141     {
2142     asmINC(reg1);
2143     }
2144    
2145     void FASTCALL asmDECReg(NativeReg reg1)
2146     {
2147     jitcEmit1(0x48+reg1);
2148     }
2149     void FASTCALL asmDEC(NativeReg reg1)
2150     {
2151     asmDEC(reg1);
2152     }
2153    
2154     void FASTCALL asmLEA(NativeReg reg1, byte *modrm, int len)
2155     {
2156     byte instr[15];
2157     instr[0] = 0x8d;
2158     memcpy(instr+1, modrm, len);
2159     instr[1] |= reg1<<3;
2160     jitcEmit(instr, len+1);
2161     }
2162     void FASTCALL asmLEA(NativeReg reg1, modrm_p modrm)
2163     {
2164     int len = modrm++[0];
2165    
2166     asmLEA(reg1, modrm, len);
2167     }
2168    
2169     void FASTCALL asmBTx(X86BitTest opc, NativeReg reg1, int value)
2170     {
2171     byte instr[4] = {0x0f, 0xba, 0xc0+(opc<<3)+reg1, value};
2172     jitcEmit(instr, sizeof instr);
2173     }
2174     void FASTCALL asmBTxRegImm(X86BitTest opc, NativeReg reg1, int value)
2175     {
2176     asmBTx(opc, reg1, value);
2177     }
2178    
2179     void FASTCALL asmBTxMemImm(X86BitTest opc, byte *modrm, int len, int value)
2180     {
2181     byte instr[15];
2182     instr[0] = 0x0f;
2183     instr[1] = 0xba;
2184     memcpy(instr+2, modrm, len);
2185     instr[2] |= opc<<3;
2186     instr[len+2] = value;
2187     jitcEmit(instr, len+3);
2188     }
2189     void FASTCALL asmBTx(X86BitTest opc, modrm_p modrm, int value)
2190     {
2191     int len = modrm++[0];
2192    
2193     asmBTxMemImm(opc, modrm, len, value);
2194     }
2195    
2196     void FASTCALL asmBSx(X86BitSearch opc, NativeReg reg1, NativeReg reg2)
2197     {
2198     byte instr[3] = {0x0f, opc, 0xc0+(reg1<<3)+reg2};
2199     jitcEmit(instr, sizeof(instr));
2200     }
2201     void FASTCALL asmBSxRegReg(X86BitSearch opc, NativeReg reg1, NativeReg reg2)
2202     {
2203     asmBSx(opc, reg1, reg2);
2204     }
2205    
2206     void FASTCALL asmBSWAP(NativeReg reg)
2207     {
2208     byte instr[2];
2209     instr[0] = 0x0f;
2210     instr[1] = 0xc8+reg;
2211     jitcEmit(instr, sizeof(instr));
2212     }
2213    
2214     void FASTCALL asmJMP(NativeAddress to)
2215     {
2216     /*
2217     * We use jitcEmitAssure here, since
2218     * we have to know the exact address of the jump
2219     * instruction (since it is relative)
2220     */
2221     restart:
2222     byte instr[5];
2223     uint32 rel = (uint32)(to - (gJITC.currentPage->tcp+2));
2224     if (rel <= 0x7f || rel >= 0xffffff80) {
2225     if (!jitcEmitAssure(2)) goto restart;
2226     instr[0] = 0xeb;
2227     instr[1] = rel;
2228     jitcEmit(instr, 2);
2229     } else {
2230     if (!jitcEmitAssure(5)) goto restart;
2231     instr[0] = 0xe9;
2232     *((uint32 *)&instr[1]) = (uint32)(to - (gJITC.currentPage->tcp+5));
2233     // *((uint32 *)&instr[1]) = rel - 3;
2234     jitcEmit(instr, 5);
2235     }
2236     }
2237    
2238     void FASTCALL asmJxx(X86FlagTest flags, NativeAddress to)
2239     {
2240     restart:
2241     byte instr[6];
2242     uint32 rel = (uint32)(to - (gJITC.currentPage->tcp+2));
2243     if (rel <= 0x7f || rel >= 0xffffff80) {
2244     if (!jitcEmitAssure(2)) goto restart;
2245     instr[0] = 0x70+flags;
2246     instr[1] = rel;
2247     jitcEmit(instr, 2);
2248     } else {
2249     if (!jitcEmitAssure(6)) goto restart;
2250     instr[0] = 0x0f;
2251     instr[1] = 0x80+flags;
2252     *((uint32 *)&instr[2]) = (uint32)(to - (gJITC.currentPage->tcp+6));
2253     // *((uint32 *)&instr[2]) = rel - 3;
2254     jitcEmit(instr, 6);
2255     }
2256     }
2257    
2258     NativeAddress FASTCALL asmJMPFixup()
2259     {
2260     byte instr[5];
2261     instr[0] = 0xe9;
2262     jitcEmit(instr, 5);
2263     return gJITC.currentPage->tcp - 4;
2264     }
2265    
2266     NativeAddress FASTCALL asmJxxFixup(X86FlagTest flags)
2267     {
2268     byte instr[6];
2269     instr[0] = 0x0f;
2270     instr[1] = 0x80+flags;
2271     jitcEmit(instr, 6);
2272     return gJITC.currentPage->tcp - 4;
2273     }
2274    
2275     void FASTCALL asmResolveFixup(NativeAddress at, NativeAddress to)
2276     {
2277     /*
2278     * yes, I also didn't believe this could be real code until
2279     * I had written it. -Sebastian
2280     */
2281     if (to == 0) {
2282     to = gJITC.currentPage->tcp;
2283     }
2284     *((uint32 *)at) = (uint32)(to - ((uint32)at+4));
2285     }
2286    
2287     void FASTCALL asmCALL(NativeAddress to)
2288     {
2289     jitcEmitAssure(5);
2290     byte instr[5];
2291     instr[0] = 0xe8;
2292     *((uint32 *)&instr[1]) = (uint32)(to - (gJITC.currentPage->tcp+5));
2293     jitcEmit(instr, 5);
2294     }
2295    
2296     void FASTCALL asmSimple(X86SimpleOpc simple)
2297     {
2298     if (simple > 0xff) {
2299     jitcEmit((byte*)&simple, 2);
2300     } else {
2301     jitcEmit1(simple);
2302     }
2303     }
2304    
2305     void FASTCALL asmFComp(X86FloatCompOp op, NativeFloatReg sti)
2306     {
2307     byte instr[2];
2308    
2309     memcpy(instr, &op, 2);
2310     instr[1] += sti;
2311    
2312     jitcEmit(instr, 2);
2313     }
2314     void FASTCALL asmFCompSTi(X86FloatCompOp op, NativeFloatReg sti)
2315     {
2316     asmFComp(op, sti);
2317     }
2318    
2319     void FASTCALL asmFICompMem(X86FloatICompOp op, byte *modrm, int len)
2320     {
2321     byte instr[16];
2322    
2323     instr[0] = op;
2324     memcpy(&instr[1], modrm, len);
2325     instr[1] |= 2<<3;
2326     jitcEmit(instr, len+1);
2327     }
2328     void FASTCALL asmFIComp(X86FloatICompOp op, modrm_p modrm)
2329     {
2330     int len = modrm++[0];
2331    
2332     asmFICompMem(op, modrm, len);
2333     }
2334    
2335     void FASTCALL asmFICompPMem(X86FloatICompOp op, byte *modrm, int len)
2336     {
2337     byte instr[16];
2338    
2339     instr[0] = op;
2340     memcpy(&instr[1], modrm, len);
2341     instr[1] |= 3<<3;
2342     jitcEmit(instr, len+1);
2343     }
2344     void FASTCALL asmFICompP(X86FloatICompOp op, modrm_p modrm)
2345     {
2346     int len = modrm++[0];
2347    
2348     asmFICompPMem(op, modrm, len);
2349     }
2350    
2351     void FASTCALL asmFArithMem(X86FloatArithOp op, byte *modrm, int len)
2352     {
2353     int mod = 0;
2354     switch (op) {
2355     case X86_FADD:
2356     mod = 0;
2357     break;
2358     case X86_FMUL:
2359     mod = 1;
2360     break;
2361     case X86_FDIV:
2362     mod = 6;
2363     break;
2364     case X86_FDIVR:
2365     mod = 7;
2366     break;
2367     case X86_FSUB:
2368     mod = 4;
2369     break;
2370     case X86_FSUBR:
2371     mod = 5;
2372     break;
2373     }
2374     byte instr[15];
2375     instr[0] = 0xdc;
2376     memcpy(instr+1, modrm, len);
2377     instr[1] |= mod<<3;
2378     jitcEmit(instr, len+1);
2379     }
2380     void FASTCALL asmFArith(X86FloatArithOp op, modrm_p modrm)
2381     {
2382     int len = modrm++[0];
2383    
2384     asmFArithMem(op, modrm, len);
2385     }
2386    
2387     void FASTCALL asmFArith_ST0(X86FloatArithOp op, NativeFloatReg sti)
2388     {
2389     byte instr[2] = {0xd8, op+sti};
2390     jitcEmit(instr, sizeof instr);
2391     }
2392     void FASTCALL asmFArithST0(X86FloatArithOp op, NativeFloatReg sti)
2393     {
2394     asmFArith_ST0(op, sti);
2395     }
2396    
2397     void FASTCALL asmFArith_STi(X86FloatArithOp op, NativeFloatReg sti)
2398     {
2399     byte instr[2] = {0xdc, op+sti};
2400     jitcEmit(instr, sizeof instr);
2401     }
2402     void FASTCALL asmFArithSTi(X86FloatArithOp op, NativeFloatReg sti)
2403     {
2404     asmFArith_STi(op, sti);
2405     }
2406    
2407     void FASTCALL asmFArithP_STi(X86FloatArithOp op, NativeFloatReg sti)
2408     {
2409     byte instr[2] = {0xde, op+sti};
2410     jitcEmit(instr, sizeof instr);
2411     }
2412     void FASTCALL asmFArithSTiP(X86FloatArithOp op, NativeFloatReg sti)
2413     {
2414     asmFArithP_STi(op, sti);
2415     }
2416    
2417     void FASTCALL asmFXCH(NativeFloatReg sti)
2418     {
2419     byte instr[2] = {0xd9, 0xc8+sti};
2420     jitcEmit(instr, sizeof instr);
2421     }
2422     void FASTCALL asmFXCHSTi(NativeFloatReg sti)
2423     {
2424     asmFXCH(sti);
2425     }
2426    
2427     void FASTCALL asmFFREE(NativeFloatReg sti)
2428     {
2429     byte instr[2] = {0xdd, 0xc0+sti};
2430     jitcEmit(instr, sizeof instr);
2431     }
2432     void FASTCALL asmFFREESTi(NativeFloatReg sti)
2433     {
2434     asmFFREE(sti);
2435     }
2436    
2437     void FASTCALL asmFFREEP(NativeFloatReg sti)
2438     {
2439     /*
2440     * AMD says:
2441     * "Note that the FREEP instructions, although insufficiently
2442     * documented in the past, is supported by all 32-bit x86 processors."
2443     */
2444     byte instr[2] = {0xdf, 0xc0+sti};
2445     jitcEmit(instr, sizeof instr);
2446     }
2447     void FASTCALL asmFFREEPSTi(NativeFloatReg sti)
2448     {
2449     asmFFREEP(sti);
2450     }
2451    
2452     void FASTCALL asmFSimple(X86FloatOp op)
2453     {
2454     jitcEmit((byte*)&op, 2);
2455     }
2456     void FASTCALL asmFSimpleST0(X86FloatOp op)
2457     {
2458     asmFSimple(op);
2459     }
2460    
2461     void FASTCALL asmFLDSingleMem(byte *modrm, int len)
2462     {
2463     byte instr[15];
2464     instr[0] = 0xd9;
2465     memcpy(instr+1, modrm, len);
2466     jitcEmit(instr, len+1);
2467     }
2468     void FASTCALL asmFLD_Single(modrm_p modrm)
2469     {
2470     int len = modrm++[0];
2471    
2472     asmFLDSingleMem(modrm, len);
2473     }
2474    
2475     void FASTCALL asmFLDDoubleMem(byte *modrm, int len)
2476     {
2477     byte instr[15];
2478     instr[0] = 0xdd;
2479     memcpy(instr+1, modrm, len);
2480     jitcEmit(instr, len+1);
2481     }
2482     void FASTCALL asmFLD_Double(modrm_p modrm)
2483     {
2484     int len = modrm++[0];
2485    
2486     asmFLDDoubleMem(modrm, len);
2487     }
2488    
2489     void FASTCALL asmFLD(NativeFloatReg sti)
2490     {
2491     byte instr[2] = {0xd9, 0xc0+sti};
2492     jitcEmit(instr, sizeof instr);
2493     }
2494     void FASTCALL asmFLDSTi(NativeFloatReg sti)
2495     {
2496     asmFLD(sti);
2497     }
2498    
2499     void FASTCALL asmFILD16(byte *modrm, int len)
2500     {
2501     byte instr[15];
2502     instr[0] = 0xdf;
2503     memcpy(instr+1, modrm, len);
2504     jitcEmit(instr, len+1);
2505     }
2506     void FASTCALL asmFILD_W(modrm_p modrm)
2507     {
2508     int len = modrm++[0];
2509    
2510     asmFILD16(modrm, len);
2511     }
2512    
2513     void FASTCALL asmFILD(byte *modrm, int len)
2514     {
2515     byte instr[15];
2516     instr[0] = 0xdb;
2517     memcpy(instr+1, modrm, len);
2518     jitcEmit(instr, len+1);
2519     }
2520     void FASTCALL asmFILD_D(modrm_p modrm)
2521     {
2522     int len = modrm++[0];
2523    
2524     asmFILD(modrm, len);
2525     }
2526    
2527     void FASTCALL asmFILD_Q(modrm_p modrm)
2528     {
2529     byte instr[15];
2530     instr[0] = 0xdf;
2531     memcpy(instr+1, modrm+1, modrm[0]);
2532     instr[1] |= 5<<3;
2533     jitcEmit(instr, modrm[0]+1);
2534     }
2535    
2536     void FASTCALL asmFSTSingleMem(byte *modrm, int len)
2537     {
2538     byte instr[15];
2539     instr[0] = 0xd9;
2540     memcpy(instr+1, modrm, len);
2541     instr[1] |= 2<<3;
2542     jitcEmit(instr, len+1);
2543     }
2544     void FASTCALL asmFST_Single(modrm_p modrm)
2545     {
2546     int len = modrm++[0];
2547    
2548     asmFSTSingleMem(modrm, len);
2549     }
2550    
2551     void FASTCALL asmFSTPSingleMem(byte *modrm, int len)
2552     {
2553     byte instr[15];
2554     instr[0] = 0xd9;
2555     memcpy(instr+1, modrm, len);
2556     instr[1] |= 3<<3;
2557     jitcEmit(instr, len+1);
2558     }
2559     void FASTCALL asmFSTP_Single(modrm_p modrm)
2560     {
2561     int len = modrm++[0];
2562    
2563     asmFSTPSingleMem(modrm, len);
2564     }
2565    
2566     void FASTCALL asmFSTDoubleMem(byte *modrm, int len)
2567     {
2568     byte instr[15];
2569     instr[0] = 0xdd;
2570     memcpy(instr+1, modrm, len);
2571     instr[1] |= 2<<3;
2572     jitcEmit(instr, len+1);
2573     }
2574     void FASTCALL asmFST_Double(modrm_p modrm)
2575     {
2576     int len = modrm++[0];
2577    
2578     asmFSTDoubleMem(modrm, len);
2579     }
2580    
2581     void FASTCALL asmFSTPDoubleMem(byte *modrm, int len)
2582     {
2583     byte instr[15];
2584     instr[0] = 0xdd;
2585     memcpy(instr+1, modrm, len);
2586     instr[1] |= 3<<3;
2587     jitcEmit(instr, len+1);
2588     }
2589     void FASTCALL asmFSTP_Double(modrm_p modrm)
2590     {
2591     int len = modrm++[0];
2592    
2593     asmFSTPDoubleMem(modrm, len);
2594     }
2595    
2596     void FASTCALL asmFST(NativeFloatReg sti)
2597     {
2598     byte instr[2] = {0xdd, 0xd0+sti};
2599     jitcEmit(instr, sizeof instr);
2600     }
2601     void FASTCALL asmFSTDSTi(NativeFloatReg sti)
2602     {
2603     asmFST(sti);
2604     }
2605    
2606     void FASTCALL asmFSTP(NativeFloatReg sti)
2607     {
2608     byte instr[2] = {0xdd, 0xd8+sti};
2609     jitcEmit(instr, sizeof instr);
2610     }
2611     void FASTCALL asmFSTDPSTi(NativeFloatReg sti)
2612     {
2613     asmFSTP(sti);
2614     }
2615    
2616     void FASTCALL asmFISTP_W(modrm_p modrm)
2617     {
2618     byte instr[15];
2619     instr[0] = 0xdf;
2620     memcpy(instr+1, modrm+1, modrm[0]);
2621     instr[1] |= 3<<3;
2622     jitcEmit(instr, modrm[0]+1);
2623     }
2624    
2625     void FASTCALL asmFISTPMem(byte *modrm, int len)
2626     {
2627     byte instr[15];
2628     instr[0] = 0xdb;
2629     memcpy(instr+1, modrm, len);
2630     instr[1] |= 3<<3;
2631     jitcEmit(instr, len+1);
2632     }
2633     void FASTCALL asmFISTP_D(modrm_p modrm)
2634     {
2635     int len = modrm++[0];
2636    
2637     asmFISTPMem(modrm, len);
2638     }
2639    
2640     void FASTCALL asmFISTPMem64(byte *modrm, int len)
2641     {
2642     byte instr[15];
2643     instr[0] = 0xdf;
2644     memcpy(instr+1, modrm, len);
2645     instr[1] |= 7<<3;
2646     jitcEmit(instr, len+1);
2647     }
2648     void FASTCALL asmFISTP_Q(modrm_p modrm)
2649     {
2650     int len = modrm++[0];
2651    
2652     asmFISTPMem64(modrm, len);
2653     }
2654    
2655     void FASTCALL asmFISTTPMem(byte *modrm, int len)
2656     {
2657     byte instr[15];
2658     instr[0] = 0xdb;
2659     memcpy(instr+1, modrm, len);
2660     instr[1] |= 1<<3;
2661     jitcEmit(instr, len+1);
2662     }
2663     void FASTCALL asmFISTTP(modrm_p modrm)
2664     {
2665     int len = modrm++[0];
2666    
2667     asmFISTTPMem(modrm, len);
2668     }
2669    
2670     void FASTCALL asmFLDCWMem(byte *modrm, int len)
2671     {
2672     byte instr[15];
2673     instr[0] = 0xd9;
2674     memcpy(instr+1, modrm, len);
2675     instr[1] |= 5<<3;
2676     jitcEmit(instr, len+1);
2677     }
2678     void FASTCALL asmFLDCW(modrm_p modrm)
2679     {
2680     int len = modrm++[0];
2681    
2682     asmFLDCWMem(modrm, len);
2683     }
2684    
2685     void FASTCALL asmFSTCWMem(byte *modrm, int len)
2686     {
2687     byte instr[15];
2688     instr[0] = 0xd9;
2689     memcpy(instr+1, modrm, len);
2690     instr[1] |= 7<<3;
2691     jitcEmit(instr, len+1);
2692     }
2693     void FASTCALL asmFSTCW(modrm_p modrm)
2694     {
2695     int len = modrm++[0];
2696    
2697     asmFSTCWMem(modrm, len);
2698     }
2699    
2700     void FASTCALL asmFSTSWMem(byte *modrm, int len)
2701     {
2702     byte instr[15];
2703     instr[0] = 0xdd;
2704     memcpy(instr+1, modrm, len);
2705     instr[1] |= 7<<3;
2706     jitcEmit(instr, len+1);
2707     }
2708     void FASTCALL asmFSTSW(modrm_p modrm)
2709     {
2710     int len = modrm++[0];
2711    
2712     asmFSTSWMem(modrm, len);
2713     }
2714    
2715     void FASTCALL asmFSTSW_EAX(void)
2716     {
2717     byte instr[15] = { 0xdf, 0xe0 };
2718     jitcEmit(instr, 2);
2719     }
2720    
2721     /*
2722     * Maps one client vector register to one native vector register
2723     * Will never emit any code.
2724     */
2725     static inline void FASTCALL jitcMapVectorRegister(NativeVectorReg nreg, JitcVectorReg creg)
2726     {
2727     //printf("*** map: XMM%u (vr%u)\n", nreg, creg);
2728     gJITC.n2cVectorReg[nreg] = creg;
2729     gJITC.c2nVectorReg[creg] = nreg;
2730    
2731     gJITC.nativeVectorRegState[nreg] = rsMapped;
2732     }
2733    
2734     /*
2735     * Unmaps the native vector register from any client vector register
2736     * Will never emit any code.
2737     */
2738     static inline void FASTCALL jitcUnmapVectorRegister(NativeVectorReg nreg)
2739     {
2740     JitcVectorReg creg = gJITC.n2cVectorReg[nreg];
2741    
2742     if (nreg != VECTREG_NO && creg != PPC_VECTREG_NO) {
2743     //printf("*** unmap: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]);
2744    
2745     gJITC.n2cVectorReg[nreg] = PPC_VECTREG_NO;
2746     gJITC.c2nVectorReg[creg] = VECTREG_NO;
2747    
2748     gJITC.nativeVectorRegState[nreg] = rsUnused;
2749     }
2750     }
2751    
2752     /*
2753     * Marks the native vector register as dirty.
2754     * Does *not* touch native vector register.
2755     * Will not produce code.
2756     */
2757     void FASTCALL jitcDirtyVectorRegister(NativeVectorReg nreg)
2758     {
2759     JitcVectorReg creg = gJITC.n2cVectorReg[nreg];
2760    
2761     //printf("*** dirty(%u) with creg = %u\n", nreg, creg);
2762    
2763     if (creg == JITC_VECTOR_NEG1 || creg == PPC_VECTREG_NO) {
2764     //printf("*** dirty: %u = %u or %u\n", creg, JITC_VECTOR_NEG1, PPC_REG_NO);
2765     return;
2766     }
2767    
2768     if (gJITC.nativeVectorRegState[nreg] == rsUnused) {
2769     printf("!!! Attemped dirty of an anonymous vector register!\n");
2770     return;
2771     }
2772    
2773     if (creg == gJITC.nativeVectorReg) {
2774     gJITC.nativeVectorReg = VECTREG_NO;
2775     }
2776    
2777     gJITC.nativeVectorRegState[nreg] = rsDirty;
2778     }
2779    
2780     /*
2781     * Marks the native vector register as non-dirty.
2782     * Does *not* flush native vector register.
2783     * Will not produce code.
2784     */
2785     static inline void FASTCALL jitcUndirtyVectorRegister(NativeVectorReg nreg)
2786     {
2787     if (gJITC.nativeVectorRegState[nreg] > rsMapped) {
2788     //printf("*** undirty: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]);
2789    
2790     gJITC.nativeVectorRegState[nreg] = rsMapped;
2791     }
2792     }
2793    
2794     /*
2795     * Loads a native vector register with its mapped value.
2796     * Does not alter the native vector register's markings.
2797     * Will always emit an load.
2798     */
2799     static inline void FASTCALL jitcLoadVectorRegister(NativeVectorReg nreg)
2800     {
2801     JitcVectorReg creg = gJITC.n2cVectorReg[nreg];
2802    
2803     if (creg == JITC_VECTOR_NEG1 && gJITC.hostCPUCaps.sse2) {
2804     //printf("*** load neg1: XMM%u\n", nreg);
2805    
2806     /* On a P4, we can load -1 far faster with logic */
2807     asmPALU(PALUD(X86_PCMPEQ), nreg, nreg);
2808     return;
2809     }
2810    
2811     //printf("*** load: XMM%u (vr%u)\n", nreg, creg);
2812     asmMOVAPS(nreg, &gCPU.vr[creg]);
2813     }
2814    
2815     /*
2816     * Stores a native vector register to its mapped client vector register.
2817     * Does not alter the native vector register's markings.
2818     * Will always emit a store.
2819     */
2820     static inline void FASTCALL jitcStoreVectorRegister(NativeVectorReg nreg)
2821     {
2822     JitcVectorReg creg = gJITC.n2cVectorReg[nreg];
2823    
2824     if (creg == JITC_VECTOR_NEG1 || creg == PPC_VECTREG_NO)
2825     return;
2826    
2827     //printf("*** store: XMM%u (vr%u)\n", nreg, creg);
2828    
2829     asmMOVAPS(&gCPU.vr[creg], nreg);
2830     }
2831    
2832     /*
2833     * Returns the native vector register that is mapped to the client
2834     * vector register.
2835     * Will never emit any code.
2836     */
2837     NativeVectorReg FASTCALL jitcGetClientVectorRegisterMapping(JitcVectorReg creg)
2838     {
2839     return gJITC.c2nVectorReg[creg];
2840     }
2841    
2842     /*
2843     * Makes the vector register the least recently used vector register.
2844     * Will never emit any code.
2845     */
2846     static inline void FASTCALL jitcDiscardVectorRegister(NativeVectorReg nreg)
2847     {
2848     NativeVectorReg lreg, mreg;
2849    
2850     mreg = gJITC.MRUvregs[nreg];
2851     lreg = gJITC.LRUvregs[nreg];
2852    
2853     // remove from the list
2854     gJITC.MRUvregs[lreg] = mreg;
2855     gJITC.LRUvregs[mreg] = lreg;
2856    
2857     mreg = gJITC.MRUvregs[XMM_SENTINEL];
2858    
2859     // insert into the list in the LRU spot
2860     gJITC.LRUvregs[nreg] = XMM_SENTINEL;
2861     gJITC.MRUvregs[nreg] = mreg;
2862    
2863     gJITC.LRUvregs[mreg] = nreg;
2864     gJITC.MRUvregs[XMM_SENTINEL] = nreg;
2865     }
2866    
2867     /*
2868     * Makes the vector register the most recently used vector register.
2869     * Will never emit any code.
2870     */
2871     void FASTCALL jitcTouchVectorRegister(NativeVectorReg nreg)
2872     {
2873     NativeVectorReg lreg, mreg;
2874    
2875     mreg = gJITC.MRUvregs[nreg];
2876     lreg = gJITC.LRUvregs[nreg];
2877    
2878     // remove from the list
2879     gJITC.MRUvregs[lreg] = mreg;
2880     gJITC.LRUvregs[mreg] = lreg;
2881    
2882     lreg = gJITC.LRUvregs[XMM_SENTINEL];
2883    
2884     // insert into the list in the LRU spot
2885     gJITC.MRUvregs[nreg] = XMM_SENTINEL;
2886     gJITC.LRUvregs[nreg] = lreg;
2887    
2888     gJITC.MRUvregs[lreg] = nreg;
2889     gJITC.LRUvregs[XMM_SENTINEL] = nreg;
2890     }
2891    
2892     /*
2893     * Unmaps a native vector register, and marks it least recently used.
2894     * Will not emit any code.
2895     */
2896     void FASTCALL jitcDropSingleVectorRegister(NativeVectorReg nreg)
2897     {
2898     jitcDiscardVectorRegister(nreg);
2899     jitcUnmapVectorRegister(nreg);
2900     }
2901    
2902     int FASTCALL jitcAssertFlushedVectorRegister(JitcVectorReg creg)
2903     {
2904     NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
2905    
2906     if (nreg != VECTREG_NO && gJITC.nativeVectorRegState[nreg] == rsDirty) {
2907     printf("!!! Unflushed vector XMM%u (vr%u)!\n", nreg, creg);
2908     return 1;
2909     }
2910     return 0;
2911     }
2912     int FASTCALL jitcAssertFlushedVectorRegisters()
2913     {
2914     int ret = 0;
2915    
2916     for (JitcVectorReg i=0; i<32; i++)
2917     ret |= jitcAssertFlushedVectorRegister(i);
2918    
2919     return ret;
2920     }
2921    
2922     void FASTCALL jitcShowVectorRegisterStatus(JitcVectorReg creg)
2923     {
2924     NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
2925    
2926     if (nreg != VECTREG_NO) {
2927     int status = gJITC.nativeVectorRegState[nreg];
2928     char *text;
2929    
2930     if (status == rsUnused)
2931     text = "unused";
2932     else if (status == rsMapped)
2933     text = "mapped";
2934     else if (status == rsDirty)
2935     text = "dirty";
2936     else
2937     text = "unknown";
2938    
2939     //printf("*** vr%u => XMM%u (%s)\n", creg, nreg, text);
2940     } else {
2941     //printf("*** vr%u => memory\n", creg);
2942     }
2943     }
2944    
2945     /*
2946     * If the native vector register is marked dirty, then it writes that
2947     * value out to the client vector register store.
2948     * Will produce a store, if the native vector register is dirty.
2949     */
2950     static inline void FASTCALL jitcFlushSingleVectorRegister(NativeVectorReg nreg)
2951     {
2952     if (gJITC.nativeVectorRegState[nreg] == rsDirty) {
2953     //printf("*** flush: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]);
2954     jitcStoreVectorRegister(nreg);
2955     }
2956     }
2957    
2958     /*
2959     * Flushes the register, frees it, and makes it least recently used.
2960     * Will produce a store, if the native vector register was dirty.
2961     */
2962     static inline void FASTCALL jitcTrashSingleVectorRegister(NativeVectorReg nreg)
2963     {
2964     if (gJITC.nativeVectorRegState[nreg] > rsUnused) {
2965     //printf("*** trash: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]);
2966     }
2967    
2968     jitcFlushSingleVectorRegister(nreg);
2969     jitcDropSingleVectorRegister(nreg);
2970     }
2971    
2972     /*
2973     * Flushes the register, frees it, and makes it most recently used.
2974     * Will produce a store, if the native vector register was dirty.
2975     */
2976     static inline void FASTCALL jitcClobberSingleVectorRegister(NativeVectorReg nreg)
2977     {
2978     if (gJITC.nativeVectorRegState[nreg] > rsUnused) {
2979     //printf("*** clobber: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]);
2980     }
2981    
2982     jitcFlushSingleVectorRegister(nreg);
2983     jitcTouchVectorRegister(nreg);
2984     jitcUnmapVectorRegister(nreg);
2985     }
2986    
2987     /*
2988     * Allocates a native vector register.
2989     * If hint is non-zero, then it indicates that the value is unlikely
2990     * to be re-used soon, so to keep it at the end of the LRU.
2991     * To use hints, pass hint == the number of temporary registers
2992     * May produce a store, if no native vector registers are available.
2993     */
2994     NativeVectorReg FASTCALL jitcAllocVectorRegister(int hint)
2995     {
2996     NativeVectorReg nreg = gJITC.MRUvregs[XMM_SENTINEL];
2997    
2998     if (hint >= XMM_SENTINEL) {
2999     nreg = gJITC.LRUvregs[nreg];
3000    
3001     jitcTrashSingleVectorRegister(nreg);
3002     } else if (hint) {
3003     for (int i=1; i<hint; i++) {
3004     nreg = gJITC.MRUvregs[nreg];
3005     }
3006    
3007     jitcTrashSingleVectorRegister(nreg);
3008     } else {
3009     jitcClobberSingleVectorRegister(nreg);
3010     }
3011    
3012     return nreg;
3013     }
3014    
3015     /*
3016     * Returns native vector register that contains value of client
3017     * register or allocates new vector register which maps to
3018     * the client register.
3019     * Marks the register dirty.
3020     *
3021     * May produce a store, if no registers are available.
3022     * Will never produce a load.
3023     */
3024     NativeVectorReg FASTCALL jitcMapClientVectorRegisterDirty(JitcVectorReg creg, int hint)
3025     {
3026     NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
3027    
3028     if (nreg == VECTREG_NO) {
3029     nreg = jitcAllocVectorRegister(hint);
3030    
3031     jitcMapVectorRegister(nreg, creg);
3032     } else if (hint) {
3033     jitcDiscardVectorRegister(nreg);
3034     } else {
3035     jitcTouchVectorRegister(nreg);
3036     }
3037    
3038     jitcDirtyVectorRegister(nreg);
3039    
3040     return nreg;
3041     }
3042    
3043     /*
3044     * Returns native vector register that contains the value of the
3045     * client vector register, or allocates new register, and
3046     * loads this value into it.
3047     *
3048     * May produce a store, if no register are available.
3049     * May produce a load, if client vector register isn't mapped.
3050     */
3051     NativeVectorReg FASTCALL jitcGetClientVectorRegister(JitcVectorReg creg, int hint)
3052     {
3053     NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
3054    
3055     if (nreg == VECTREG_NO) {
3056     nreg = jitcAllocVectorRegister(hint);
3057     jitcMapVectorRegister(nreg, creg);
3058    
3059     jitcLoadVectorRegister(nreg);
3060     } else if (hint) {
3061     jitcDiscardVectorRegister(nreg);
3062     } else {
3063     jitcTouchVectorRegister(nreg);
3064     }
3065    
3066     return nreg;
3067     }
3068    
3069     /*
3070     * Returns native vector register that contains the value of the
3071     * client vector register, or allocates new register, and
3072     * loads this value into it.
3073     * Will mark the native vector register as dirty.
3074     *
3075     * May produce a store, if no register are available.
3076     * May produce a load, if client vector register isn't mapped.
3077     */
3078     NativeVectorReg FASTCALL jitcGetClientVectorRegisterDirty(JitcVectorReg creg, int hint)
3079     {
3080     NativeVectorReg nreg = jitcGetClientVectorRegister(creg, hint);
3081    
3082     jitcDirtyVectorRegister(nreg);
3083    
3084     return nreg;
3085     }
3086    
3087     /*
3088     * Flushes native vector register(s).
3089     * Resets dirty flags.
3090     * Will produce stores, if vector registers are dirty.
3091     */
3092     void FASTCALL jitcFlushVectorRegister(int options)
3093     {
3094     if (options == JITC_VECTOR_REGS_ALL) {
3095     for (unsigned int i = XMM0; i <= XMM7; i++) {
3096     jitcFlushSingleVectorRegister((NativeVectorReg)i);
3097     jitcUndirtyVectorRegister((NativeVectorReg)i);
3098     }
3099     } else if (options & NATIVE_REG) {
3100     NativeVectorReg nreg = (NativeVectorReg)(options & 0xf);
3101    
3102     jitcFlushSingleVectorRegister(nreg);
3103     jitcUndirtyVectorRegister(nreg);
3104     }
3105     }
3106    
3107     /*
3108     * Flushes native vector register(s).
3109     * Doesn't reset dirty flags.
3110     * Will produce stores, if vector registers are dirty.
3111     */
3112     void FASTCALL jitcFlushVectorRegisterDirty(int options)
3113     {
3114     if (options == JITC_VECTOR_REGS_ALL) {
3115     for (unsigned int i = XMM0; i <= XMM7; i++) {
3116     jitcFlushSingleVectorRegister((NativeVectorReg)i);
3117     }
3118     } else if (options & NATIVE_REG) {
3119     NativeVectorReg nreg = (NativeVectorReg)(options & 0xf);
3120    
3121     jitcFlushSingleVectorRegister(nreg);
3122     }
3123     }
3124    
3125     /*
3126     * Clobbers native vector register(s).
3127     * Will produce stores, if vector registers are dirty.
3128     */
3129     void FASTCALL jitcClobberVectorRegister(int options)
3130     {
3131     if (options == JITC_VECTOR_REGS_ALL) {
3132     for (unsigned int i = XMM0; i <= XMM7; i++) {
3133     jitcClobberSingleVectorRegister((NativeVectorReg)i);
3134     }
3135     } else if (options & NATIVE_REG) {
3136     NativeVectorReg nreg = (NativeVectorReg)(options & 0xf);
3137    
3138     jitcClobberSingleVectorRegister(nreg);
3139     }
3140     }
3141    
3142     /*
3143     * Trashes native vector register(s).
3144     * Will produce stores, if vector registers are dirty.
3145     */
3146     void FASTCALL jitcTrashVectorRegister(int options)
3147     {
3148     if (options == JITC_VECTOR_REGS_ALL) {
3149     for (unsigned int i = XMM0; i <= XMM7; i++) {
3150     jitcTrashSingleVectorRegister((NativeVectorReg)i);
3151     }
3152     } else if (options & NATIVE_REG) {
3153     NativeVectorReg nreg = (NativeVectorReg)(options & 0xf);
3154    
3155     jitcTrashSingleVectorRegister(nreg);
3156     }
3157     }
3158    
3159     /*
3160     * Drops native vector register(s).
3161     * Will not produce any code.
3162     */
3163     void FASTCALL jitcDropVectorRegister(int options)
3164     {
3165     if (options == JITC_VECTOR_REGS_ALL) {
3166     for (unsigned int i = XMM0; i <= XMM7; i++) {
3167     jitcDropSingleVectorRegister((NativeVectorReg)i);
3168     }
3169     } else if (options & NATIVE_REG) {
3170     NativeVectorReg nreg = (NativeVectorReg)(options & 0xf);
3171    
3172     jitcDropSingleVectorRegister(nreg);
3173     }
3174     }
3175    
3176     void FASTCALL jitcFlushClientVectorRegister(JitcVectorReg creg)
3177     {
3178     NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
3179    
3180     if (nreg != VECTREG_NO) {
3181     jitcFlushSingleVectorRegister(nreg);
3182     jitcUndirtyVectorRegister(nreg);
3183     }
3184     }
3185    
3186     void FASTCALL jitcTrashClientVectorRegister(JitcVectorReg creg)
3187     {
3188     NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
3189    
3190     if (nreg != VECTREG_NO) {
3191     jitcTrashSingleVectorRegister(nreg);
3192     }
3193     }
3194    
3195     void FASTCALL jitcClobberClientVectorRegister(JitcVectorReg creg)
3196     {
3197     NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
3198    
3199     if (nreg != VECTREG_NO) {
3200     jitcClobberSingleVectorRegister(nreg);
3201     }
3202     }
3203    
3204     void FASTCALL jitcDropClientVectorRegister(JitcVectorReg creg)
3205     {
3206     NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
3207    
3208     if (nreg != VECTREG_NO) {
3209     jitcDropSingleVectorRegister(nreg);
3210     }
3211     }
3212    
3213     /*
3214     * Renames a native vector register to a different client register.
3215     * Will not emit a load.
3216     * May emit a reg->reg move, if the vector register was in memory.
3217     * May emit a store, if the vector register was dirty
3218     */
3219     NativeVectorReg FASTCALL jitcRenameVectorRegisterDirty(NativeVectorReg reg, JitcVectorReg creg, int hint)
3220     {
3221     NativeVectorReg nreg = gJITC.c2nVectorReg[creg];
3222    
3223     if (nreg == reg) {
3224     /* That's weird... it's already mapped... */
3225     } else if (nreg != VECTREG_NO) {
3226     /* It's already in a register, so rather than losing
3227     * reg pool depth, just move the value.
3228     */
3229     asmALUPS(X86_MOVAPS, nreg, reg);
3230     } else {
3231     /* Otherwise, only the source register is in the reg
3232     * pool, so flush it, then remap it.
3233     */
3234     JitcVectorReg reg2 = gJITC.n2cVectorReg[reg];
3235    
3236     if (reg2 != VECTREG_NO) {
3237     jitcFlushSingleVectorRegister(reg);
3238     jitcUnmapVectorRegister(reg);
3239     }
3240    
3241     nreg = reg;
3242     jitcMapVectorRegister(nreg, creg);
3243     }
3244    
3245     if (hint) jitcDiscardVectorRegister(nreg);
3246     else jitcTouchVectorRegister(nreg);
3247    
3248     jitcDirtyVectorRegister(nreg);
3249    
3250     return nreg;
3251     }
3252    
3253     void asmMOVAPS(NativeVectorReg reg, const void *disp)
3254     {
3255     byte instr[8] = { 0x0f, 0x28 };
3256    
3257     instr[2] = 0x05 | (reg << 3);
3258     *((uint32 *)&instr[3]) = (uint32)disp;
3259    
3260     jitcEmit(instr, 7);
3261     }
3262    
3263     void asmMOVAPS(const void *disp, NativeVectorReg reg)
3264     {
3265     byte instr[8] = { 0x0f, 0x29 };
3266    
3267     instr[2] = 0x05 | (reg << 3);
3268     *((uint32 *)&instr[3]) = (uint32)disp;
3269    
3270     jitcEmit(instr, 7);
3271     }
3272    
3273     void asmMOVUPS(NativeVectorReg reg, const void *disp)
3274     {
3275     byte instr[8] = { 0x0f, 0x10 };
3276    
3277     instr[2] = 0x05 | (reg << 3);
3278     *((uint32 *)&instr[3]) = (uint32)disp;
3279    
3280     jitcEmit(instr, 7);
3281     }
3282    
3283     void asmMOVUPS(const void *disp, NativeVectorReg reg)
3284     {
3285     byte instr[8] = { 0x0f, 0x11 };
3286    
3287     instr[2] = 0x05 | (reg << 3);
3288     *((uint32 *)&instr[3]) = (uint32)disp;
3289    
3290     jitcEmit(instr, 7);
3291     }
3292    
3293     void asmMOVSS(NativeVectorReg reg, const void *disp)
3294     {
3295     byte instr[10] = { 0xf3, 0x0f, 0x10 };
3296    
3297     instr[3] = 0x05 | (reg << 3);
3298     *((uint32 *)&instr[4]) = (uint32)disp;
3299    
3300     jitcEmit(instr, 8);
3301     }
3302    
3303     void asmMOVSS(const void *disp, NativeVectorReg reg)
3304     {
3305     byte instr[10] = { 0xf3, 0x0f, 0x11 };
3306    
3307     instr[3] = 0x05 | (reg << 3);
3308     *((uint32 *)&instr[4]) = (uint32)disp;
3309    
3310     jitcEmit(instr, 8);
3311     }
3312    
3313     void asmALUPS(X86ALUPSopc opc, NativeVectorReg reg1, NativeVectorReg reg2)
3314     {
3315     byte instr[4] = { 0x0f };
3316    
3317     instr[1] = opc;
3318     instr[2] = 0xc0 + (reg1 << 3) + reg2;
3319    
3320     jitcEmit(instr, 3);
3321     }
3322    
3323     void asmALUPS(X86ALUPSopc opc, NativeVectorReg reg1, modrm_p modrm)
3324     {
3325     byte instr[16] = { 0x0f };
3326     int len = modrm++[0];
3327    
3328     instr[1] = opc;
3329     memcpy(&instr[2], modrm, len);
3330     instr[2] |= (reg1 << 3);
3331    
3332     jitcEmit(instr, len+2);
3333     }
3334    
3335     void asmPALU(X86PALUopc opc, NativeVectorReg reg1, NativeVectorReg reg2)
3336     {
3337     byte instr[5] = { 0x66, 0x0f };
3338    
3339     instr[2] = opc;
3340     instr[3] = 0xc0 + (reg1 << 3) + reg2;
3341    
3342     jitcEmit(instr, 4);
3343     }
3344    
3345     void asmPALU(X86PALUopc opc, NativeVectorReg reg1, modrm_p modrm)
3346     {
3347     byte instr[5] = { 0x66, 0x0f };
3348     int len = modrm++[0];
3349    
3350     instr[2] = opc;
3351     memcpy(&instr[3], modrm, len);
3352     instr[3] |= (reg1 << 3);
3353    
3354     jitcEmit(instr, len+3);
3355     }
3356    
3357     void asmSHUFPS(NativeVectorReg reg1, NativeVectorReg reg2, int order)
3358     {
3359     byte instr[5] = { 0x0f, 0xc6, 0xc0+(reg1<<3)+reg2, order };
3360    
3361     jitcEmit(instr, 4);
3362     }
3363    
3364     void asmSHUFPS(NativeVectorReg reg1, modrm_p modrm, int order)
3365     {
3366     byte instr[16] = { 0x0f, 0xc6 };
3367     int len = modrm++[0];
3368    
3369     memcpy(&instr[2], modrm, len);
3370     instr[2] |= (reg1 << 3);
3371     instr[len+2] = order;
3372    
3373     jitcEmit(instr, len+3);
3374     }
3375    
3376     void asmPSHUFD(NativeVectorReg reg1, NativeVectorReg reg2, int order)
3377     {
3378     byte instr[6] = { 0x66, 0x0f, 0x70, 0xc0+(reg1<<3)+reg2, order };
3379    
3380     jitcEmit(instr, 5);
3381     }
3382    
3383     void asmPSHUFD(NativeVectorReg reg1, modrm_p modrm, int order)
3384     {
3385     byte instr[5] = { 0x66, 0x0f, 0x70 };
3386     int len = modrm++[0];
3387    
3388     memcpy(&instr[3], modrm, len);
3389     instr[3] |= (reg1 << 3);
3390     instr[len+3] = order;
3391    
3392     jitcEmit(instr, len+4);
3393     }

  ViewVC Help
Powered by ViewVC 1.1.26