1 |
/* |
2 |
* PearPC |
3 |
* x86asm.cc |
4 |
* |
5 |
* Copyright (C) 2004 Sebastian Biallas (sb@biallas.net) |
6 |
* |
7 |
* This program is free software; you can redistribute it and/or modify |
8 |
* it under the terms of the GNU General Public License version 2 as |
9 |
* published by the Free Software Foundation. |
10 |
* |
11 |
* This program is distributed in the hope that it will be useful, |
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
* GNU General Public License for more details. |
15 |
* |
16 |
* You should have received a copy of the GNU General Public License |
17 |
* along with this program; if not, write to the Free Software |
18 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 |
*/ |
20 |
|
21 |
#include <cstring> |
22 |
#include <cstdlib> |
23 |
|
24 |
#include "tools/debug.h" |
25 |
#include "tools/snprintf.h" |
26 |
#include "jitc.h" |
27 |
#include "jitc_asm.h" |
28 |
#include "jitc_debug.h" |
29 |
#include "x86asm.h" |
30 |
|
31 |
void x86GetCaps(X86CPUCaps &caps) |
32 |
{ |
33 |
memset(&caps, 0, sizeof caps); |
34 |
|
35 |
caps.loop_align = 8; |
36 |
|
37 |
struct { |
38 |
uint32 level, c, d, b; |
39 |
} id; |
40 |
|
41 |
if (!ppc_cpuid_asm(0, &id)) { |
42 |
ht_snprintf(caps.vendor, sizeof caps.vendor, "unknown"); |
43 |
return; |
44 |
} |
45 |
|
46 |
*((uint32 *)caps.vendor) = id.b; |
47 |
*((uint32 *)(caps.vendor+4)) = id.d; |
48 |
*((uint32 *)(caps.vendor+8)) = id.c; |
49 |
caps.vendor[12] = 0; |
50 |
ht_printf("%s\n", caps.vendor); |
51 |
if (id.level == 0) return; |
52 |
|
53 |
struct { |
54 |
uint32 model, features2, features, b; |
55 |
} id2; |
56 |
|
57 |
ppc_cpuid_asm(1, &id2); |
58 |
caps.rdtsc = id2.features & (1<<4); |
59 |
caps.cmov = id2.features & (1<<15); |
60 |
caps.mmx = id2.features & (1<<23); |
61 |
caps._3dnow = id2.features & (1<<31); |
62 |
caps._3dnow2 = id2.features & (1<<30); |
63 |
caps.sse = id2.features & (1<<25); |
64 |
caps.sse2 = id2.features & (1<<26); |
65 |
caps.sse3 = id2.features2 & (1<<0); |
66 |
|
67 |
ppc_cpuid_asm(0x80000000, &id); |
68 |
if (id.level >= 0x80000001) { |
69 |
// processor supports extended functions |
70 |
// now test for 3dnow |
71 |
ppc_cpuid_asm(0x80000001, &id2); |
72 |
|
73 |
caps._3dnow = id2.features & (1<<31); |
74 |
caps._3dnow2 = id2.features & (1<<30); |
75 |
} |
76 |
|
77 |
ht_printf("%s%s%s%s%s%s%s\n", |
78 |
caps.cmov?" CMOV":"", |
79 |
caps.mmx?" MMX":"", |
80 |
caps._3dnow?" 3DNOW":"", |
81 |
caps._3dnow2?" 3DNOW+":"", |
82 |
caps.sse?" SSE":"", |
83 |
caps.sse2?" SSE2":"", |
84 |
caps.sse3?" SSE3":""); |
85 |
} |
86 |
|
87 |
/* |
88 |
* internal functions |
89 |
*/ |
90 |
|
91 |
static inline void FASTCALL jitcMapRegister(NativeReg nreg, PPC_Register creg) |
92 |
{ |
93 |
gJITC.nativeReg[nreg] = creg; |
94 |
gJITC.clientReg[creg] = nreg; |
95 |
} |
96 |
|
97 |
static inline void FASTCALL jitcUnmapRegister(NativeReg reg) |
98 |
{ |
99 |
gJITC.clientReg[gJITC.nativeReg[reg]] = REG_NO; |
100 |
gJITC.nativeReg[reg] = PPC_REG_NO; |
101 |
} |
102 |
|
103 |
static inline void FASTCALL jitcLoadRegister(NativeReg nreg, PPC_Register creg) |
104 |
{ |
105 |
asmMOVRegDMem(nreg, (uint32)&gCPU+creg); |
106 |
jitcMapRegister(nreg, creg); |
107 |
gJITC.nativeRegState[nreg] = rsMapped; |
108 |
} |
109 |
|
110 |
static inline void FASTCALL jitcStoreRegister(NativeReg nreg, PPC_Register creg) |
111 |
{ |
112 |
asmMOVDMemReg((uint32)&gCPU+creg, nreg); |
113 |
} |
114 |
|
115 |
static inline void FASTCALL jitcStoreRegisterUndirty(NativeReg nreg, PPC_Register creg) |
116 |
{ |
117 |
jitcStoreRegister(nreg, creg); |
118 |
gJITC.nativeRegState[nreg] = rsMapped; // no longer dirty |
119 |
} |
120 |
|
121 |
static inline PPC_Register FASTCALL jitcGetRegisterMapping(NativeReg reg) |
122 |
{ |
123 |
return gJITC.nativeReg[reg]; |
124 |
} |
125 |
|
126 |
NativeReg FASTCALL jitcGetClientRegisterMapping(PPC_Register creg) |
127 |
{ |
128 |
return gJITC.clientReg[creg]; |
129 |
} |
130 |
|
131 |
static inline void FASTCALL jitcDiscardRegister(NativeReg r) |
132 |
{ |
133 |
// FIXME: move to front of the LRU list |
134 |
gJITC.nativeRegState[r] = rsUnused; |
135 |
} |
136 |
|
137 |
/* |
138 |
* Puts native register to the end of the LRU list |
139 |
*/ |
140 |
void FASTCALL jitcTouchRegister(NativeReg r) |
141 |
{ |
142 |
NativeRegType *reg = gJITC.nativeRegsList[r]; |
143 |
if (reg->moreRU) { |
144 |
// there's a more recently used register |
145 |
if (reg->lessRU) { |
146 |
reg->lessRU->moreRU = reg->moreRU; |
147 |
reg->moreRU->lessRU = reg->lessRU; |
148 |
} else { |
149 |
// reg was LRUreg |
150 |
gJITC.LRUreg = reg->moreRU; |
151 |
reg->moreRU->lessRU = NULL; |
152 |
} |
153 |
reg->moreRU = NULL; |
154 |
reg->lessRU = gJITC.MRUreg; |
155 |
gJITC.MRUreg->moreRU = reg; |
156 |
gJITC.MRUreg = reg; |
157 |
} |
158 |
} |
159 |
|
160 |
/* |
161 |
* clobbers and moves to end of LRU list |
162 |
*/ |
163 |
static inline void FASTCALL jitcClobberAndTouchRegister(NativeReg reg) |
164 |
{ |
165 |
switch (gJITC.nativeRegState[reg]) { |
166 |
case rsDirty: |
167 |
jitcStoreRegisterUndirty(reg, jitcGetRegisterMapping(reg)); |
168 |
// fall throu |
169 |
case rsMapped: |
170 |
jitcUnmapRegister(reg); |
171 |
gJITC.nativeRegState[reg] = rsUnused; |
172 |
break; |
173 |
case rsUnused:; |
174 |
} |
175 |
jitcTouchRegister(reg); |
176 |
} |
177 |
|
178 |
/* |
179 |
* clobbers and moves to front of LRU list |
180 |
*/ |
181 |
static inline void FASTCALL jitcClobberAndDiscardRegister(NativeReg reg) |
182 |
{ |
183 |
switch (gJITC.nativeRegState[reg]) { |
184 |
case rsDirty: |
185 |
jitcStoreRegisterUndirty(reg, jitcGetRegisterMapping(reg)); |
186 |
// fall throu |
187 |
case rsMapped: |
188 |
jitcUnmapRegister(reg); |
189 |
jitcDiscardRegister(reg); |
190 |
break; |
191 |
case rsUnused:; |
192 |
/* |
193 |
* Note: it makes no sense to move this register to |
194 |
* the front of the LRU list here, since only |
195 |
* other unused register can be before it in the list |
196 |
* |
197 |
* Note2: it would even be an error to move it here, |
198 |
* since ESP isn't in the nativeRegsList |
199 |
*/ |
200 |
} |
201 |
} |
202 |
|
203 |
void FASTCALL jitcClobberSingleRegister(NativeReg reg) |
204 |
{ |
205 |
switch (gJITC.nativeRegState[reg]) { |
206 |
case rsDirty: |
207 |
jitcStoreRegisterUndirty(reg, jitcGetRegisterMapping(reg)); |
208 |
// fall throu |
209 |
case rsMapped: |
210 |
jitcUnmapRegister(reg); |
211 |
gJITC.nativeRegState[reg] = rsUnused; |
212 |
break; |
213 |
case rsUnused:; |
214 |
} |
215 |
} |
216 |
|
217 |
/* |
218 |
* Dirty register. |
219 |
* Does *not* touch register |
220 |
* Will not produce code. |
221 |
*/ |
222 |
NativeReg FASTCALL jitcDirtyRegister(NativeReg r) |
223 |
{ |
224 |
gJITC.nativeRegState[r] = rsDirty; |
225 |
return r; |
226 |
} |
227 |
|
228 |
NativeReg FASTCALL jitcAllocFixedRegister(NativeReg reg) |
229 |
{ |
230 |
jitcClobberAndTouchRegister(reg); |
231 |
return reg; |
232 |
} |
233 |
|
234 |
/* |
235 |
* Allocates a native register |
236 |
* May produce a store if no registers are avaiable |
237 |
*/ |
238 |
NativeReg FASTCALL jitcAllocRegister(int options) |
239 |
{ |
240 |
NativeReg reg; |
241 |
if (options & NATIVE_REG) { |
242 |
// allocate fixed register |
243 |
reg = (NativeReg)(options & 0xf); |
244 |
} else if (options & NATIVE_REG_8) { |
245 |
// allocate eax, ecx, edx or ebx |
246 |
NativeRegType *rt = gJITC.LRUreg; |
247 |
while (rt->reg > EBX) rt = rt->moreRU; |
248 |
reg = rt->reg; |
249 |
} else { |
250 |
// allocate random register |
251 |
reg = gJITC.LRUreg->reg; |
252 |
} |
253 |
return jitcAllocFixedRegister(reg); |
254 |
} |
255 |
|
256 |
/* |
257 |
* Returns native registers that contains value of |
258 |
* client register or allocates new register which |
259 |
* maps to the client register. |
260 |
* Dirties register. |
261 |
* |
262 |
* May produce a store if no registers are avaiable |
263 |
* May produce a MOV/XCHG to satisfy mapping |
264 |
* Will never produce a load |
265 |
*/ |
266 |
NativeReg FASTCALL jitcMapClientRegisterDirty(PPC_Register creg, int options) |
267 |
{ |
268 |
if (options & NATIVE_REG_8) { |
269 |
// nyi |
270 |
ht_printf("unimpl x86asm:%d\n", __LINE__); |
271 |
exit(-1); |
272 |
} |
273 |
if (options & NATIVE_REG) { |
274 |
NativeReg want_reg = (NativeReg)(options & 0xf); |
275 |
PPC_Register have_mapping = jitcGetRegisterMapping(want_reg); |
276 |
|
277 |
if (have_mapping != PPC_REG_NO) { |
278 |
// test if we're lucky |
279 |
if (have_mapping == creg) { |
280 |
jitcDirtyRegister(want_reg); |
281 |
jitcTouchRegister(want_reg); |
282 |
return want_reg; |
283 |
} |
284 |
|
285 |
// we're not lucky, get a new register for the old mapping |
286 |
NativeReg temp_reg = jitcAllocRegister(); |
287 |
// note that AllocRegister also touches temp_reg |
288 |
|
289 |
// make new mapping |
290 |
jitcMapRegister(want_reg, creg); |
291 |
|
292 |
gJITC.nativeRegState[temp_reg] = gJITC.nativeRegState[want_reg]; |
293 |
// now we can mess with want_reg |
294 |
jitcDirtyRegister(want_reg); |
295 |
|
296 |
// maybe the old mapping was discarded and we're done |
297 |
if (temp_reg == want_reg) return want_reg; |
298 |
|
299 |
// ok, restore old mapping |
300 |
if (temp_reg == EAX || want_reg == EAX) { |
301 |
asmALURegReg(X86_XCHG, temp_reg, want_reg); |
302 |
} else { |
303 |
asmALURegReg(X86_MOV, temp_reg, want_reg); |
304 |
} |
305 |
jitcMapRegister(temp_reg, have_mapping); |
306 |
} else { |
307 |
// want_reg is free |
308 |
// unmap creg if needed |
309 |
NativeReg reg = jitcGetClientRegisterMapping(creg); |
310 |
if (reg != REG_NO) { |
311 |
jitcUnmapRegister(reg); |
312 |
jitcDiscardRegister(reg); |
313 |
} |
314 |
jitcMapRegister(want_reg, creg); |
315 |
jitcDirtyRegister(want_reg); |
316 |
} |
317 |
jitcTouchRegister(want_reg); |
318 |
return want_reg; |
319 |
} else { |
320 |
NativeReg reg = jitcGetClientRegisterMapping(creg); |
321 |
if (reg == REG_NO) { |
322 |
reg = jitcAllocRegister(); |
323 |
jitcMapRegister(reg, creg); |
324 |
} else { |
325 |
jitcTouchRegister(reg); |
326 |
} |
327 |
return jitcDirtyRegister(reg); |
328 |
} |
329 |
} |
330 |
|
331 |
|
332 |
/* |
333 |
* Returns native registers that contains value of |
334 |
* client register or allocates new register with |
335 |
* this content. |
336 |
* |
337 |
* May produce a store if no registers are avaiable |
338 |
* May produce a load if client registers isn't mapped |
339 |
* May produce a MOV/XCHG to satisfy mapping |
340 |
*/ |
341 |
NativeReg FASTCALL jitcGetClientRegister(PPC_Register creg, int options) |
342 |
{ |
343 |
if (options & NATIVE_REG_8) { |
344 |
NativeReg client_reg_maps_to = jitcGetClientRegisterMapping(creg); |
345 |
if (client_reg_maps_to == REG_NO) { |
346 |
NativeReg reg = jitcAllocRegister(NATIVE_REG_8); |
347 |
jitcLoadRegister(reg, creg); |
348 |
return reg; |
349 |
} else { |
350 |
if (client_reg_maps_to <= EBX) { |
351 |
jitcTouchRegister(client_reg_maps_to); |
352 |
return client_reg_maps_to; |
353 |
} |
354 |
NativeReg want_reg = jitcAllocRegister(NATIVE_REG_8); |
355 |
asmALURegReg(X86_MOV, want_reg, client_reg_maps_to); |
356 |
jitcUnmapRegister(client_reg_maps_to); |
357 |
jitcMapRegister(want_reg, creg); |
358 |
gJITC.nativeRegState[want_reg] = gJITC.nativeRegState[client_reg_maps_to]; |
359 |
gJITC.nativeRegState[client_reg_maps_to] = rsUnused; |
360 |
return want_reg; |
361 |
} |
362 |
} |
363 |
if (options & NATIVE_REG) { |
364 |
NativeReg want_reg = (NativeReg)(options & 0xf); |
365 |
PPC_Register native_reg_maps_to = jitcGetRegisterMapping(want_reg); |
366 |
NativeReg client_reg_maps_to = jitcGetClientRegisterMapping(creg); |
367 |
if (native_reg_maps_to != PPC_REG_NO) { |
368 |
// test if we're lucky |
369 |
if (native_reg_maps_to == creg) { |
370 |
jitcTouchRegister(want_reg); |
371 |
} else { |
372 |
// we need to satisfy mapping |
373 |
if (client_reg_maps_to != REG_NO) { |
374 |
asmALURegReg(X86_XCHG, want_reg, client_reg_maps_to); |
375 |
RegisterState rs = gJITC.nativeRegState[want_reg]; |
376 |
gJITC.nativeRegState[want_reg] = gJITC.nativeRegState[client_reg_maps_to]; |
377 |
gJITC.nativeRegState[client_reg_maps_to] = rs; |
378 |
jitcMapRegister(want_reg, creg); |
379 |
jitcMapRegister(client_reg_maps_to, native_reg_maps_to); |
380 |
jitcTouchRegister(want_reg); |
381 |
} else { |
382 |
// client register isn't mapped |
383 |
jitcAllocFixedRegister(want_reg); |
384 |
jitcLoadRegister(want_reg, creg); |
385 |
} |
386 |
} |
387 |
return want_reg; |
388 |
} else { |
389 |
// want_reg is free |
390 |
jitcTouchRegister(want_reg); |
391 |
if (client_reg_maps_to != REG_NO) { |
392 |
asmALURegReg(X86_MOV, want_reg, client_reg_maps_to); |
393 |
gJITC.nativeRegState[want_reg] = gJITC.nativeRegState[client_reg_maps_to]; |
394 |
jitcUnmapRegister(client_reg_maps_to); |
395 |
jitcDiscardRegister(client_reg_maps_to); |
396 |
jitcMapRegister(want_reg, creg); |
397 |
} else { |
398 |
jitcLoadRegister(want_reg, creg); |
399 |
} |
400 |
return want_reg; |
401 |
} |
402 |
} else { |
403 |
NativeReg client_reg_maps_to = jitcGetClientRegisterMapping(creg); |
404 |
if (client_reg_maps_to != REG_NO) { |
405 |
jitcTouchRegister(client_reg_maps_to); |
406 |
return client_reg_maps_to; |
407 |
} else { |
408 |
NativeReg reg = jitcAllocRegister(); |
409 |
jitcLoadRegister(reg, creg); |
410 |
return reg; |
411 |
} |
412 |
} |
413 |
} |
414 |
|
415 |
/* |
416 |
* Same as jitcGetClientRegister() but also dirties result |
417 |
*/ |
418 |
NativeReg FASTCALL jitcGetClientRegisterDirty(PPC_Register creg, int options) |
419 |
{ |
420 |
return jitcDirtyRegister(jitcGetClientRegister(creg, options)); |
421 |
} |
422 |
|
423 |
static inline void FASTCALL jitcFlushSingleRegister(NativeReg reg) |
424 |
{ |
425 |
if (gJITC.nativeRegState[reg] == rsDirty) { |
426 |
jitcStoreRegisterUndirty(reg, jitcGetRegisterMapping(reg)); |
427 |
} |
428 |
} |
429 |
|
430 |
static inline void FASTCALL jitcFlushSingleRegisterDirty(NativeReg reg) |
431 |
{ |
432 |
if (gJITC.nativeRegState[reg] == rsDirty) { |
433 |
jitcStoreRegister(reg, jitcGetRegisterMapping(reg)); |
434 |
} |
435 |
} |
436 |
|
437 |
/* |
438 |
* Flushes native register(s). |
439 |
* Resets dirty flags. |
440 |
* Will produce a store if register is dirty. |
441 |
*/ |
442 |
void FASTCALL jitcFlushRegister(int options) |
443 |
{ |
444 |
if (options == NATIVE_REGS_ALL) { |
445 |
for (NativeReg i = EAX; i <= EDI; i = (NativeReg)(i+1)) jitcFlushSingleRegister(i); |
446 |
} else if (options & NATIVE_REG) { |
447 |
NativeReg reg = (NativeReg)(options & 0xf); |
448 |
jitcFlushSingleRegister(reg); |
449 |
} |
450 |
} |
451 |
|
452 |
/* |
453 |
* Flushes native register(s). |
454 |
* Doesnt reset dirty flags. |
455 |
* Will produce a store if register is dirty. |
456 |
*/ |
457 |
void FASTCALL jitcFlushRegisterDirty(int options) |
458 |
{ |
459 |
if (options == NATIVE_REGS_ALL) { |
460 |
for (NativeReg i = EAX; i <= EDI; i = (NativeReg)(i+1)) jitcFlushSingleRegisterDirty(i); |
461 |
} else if (options & NATIVE_REG) { |
462 |
NativeReg reg = (NativeReg)(options & 0xf); |
463 |
jitcFlushSingleRegisterDirty(reg); |
464 |
} |
465 |
} |
466 |
/* |
467 |
* Clobbers native register(s). |
468 |
* Register is unused afterwards. |
469 |
* Will produce a store if register was dirty. |
470 |
*/ |
471 |
void FASTCALL jitcClobberRegister(int options) |
472 |
{ |
473 |
if (options == NATIVE_REGS_ALL) { |
474 |
/* |
475 |
* We dont use clobberAndDiscard here |
476 |
* since it make no sense to move one register |
477 |
* if we clobber all |
478 |
*/ |
479 |
for (NativeReg i = EAX; i <= EDI; i=(NativeReg)(i+1)) jitcClobberSingleRegister(i); |
480 |
} else if (options & NATIVE_REG) { |
481 |
NativeReg reg = (NativeReg)(options & 0xf); |
482 |
jitcClobberAndDiscardRegister(reg); |
483 |
} |
484 |
} |
485 |
|
486 |
/* |
487 |
* |
488 |
*/ |
489 |
void FASTCALL jitcFlushAll() |
490 |
{ |
491 |
jitcClobberCarryAndFlags(); |
492 |
jitcFlushRegister(); |
493 |
jitcFlushVectorRegister(); |
494 |
} |
495 |
|
496 |
/* |
497 |
* |
498 |
*/ |
499 |
void FASTCALL jitcClobberAll() |
500 |
{ |
501 |
jitcClobberCarryAndFlags(); |
502 |
jitcClobberRegister(); |
503 |
jitcFloatRegisterClobberAll(); |
504 |
jitcTrashVectorRegister(); |
505 |
} |
506 |
|
507 |
/* |
508 |
* Invalidates all mappings |
509 |
* |
510 |
* Will never produce code |
511 |
*/ |
512 |
void FASTCALL jitcInvalidateAll() |
513 |
{ |
514 |
#if 0 |
515 |
for (int i=EAX; i<=EDI; i++) { |
516 |
if(gJITC.nativeRegState[i] != rsDirty) { |
517 |
printf("!!! Unflushed register invalidated!\n"); |
518 |
} |
519 |
} |
520 |
#endif |
521 |
|
522 |
memset(gJITC.nativeReg, PPC_REG_NO, sizeof gJITC.nativeReg); |
523 |
memset(gJITC.nativeRegState, rsUnused, sizeof gJITC.nativeRegState); |
524 |
memset(gJITC.clientReg, REG_NO, sizeof gJITC.clientReg); |
525 |
gJITC.nativeCarryState = gJITC.nativeFlagsState = rsUnused; |
526 |
|
527 |
for (unsigned int i=XMM0; i<=XMM7; i++) { |
528 |
if(gJITC.nativeVectorRegState[i] == rsDirty) { |
529 |
printf("!!! Unflushed vector register invalidated! (XMM%u)\n", i); |
530 |
} |
531 |
} |
532 |
|
533 |
memset(gJITC.n2cVectorReg, PPC_VECTREG_NO, sizeof gJITC.n2cVectorReg); |
534 |
memset(gJITC.c2nVectorReg, VECTREG_NO, sizeof gJITC.c2nVectorReg); |
535 |
memset(gJITC.nativeVectorRegState, rsUnused, sizeof gJITC.nativeVectorRegState); |
536 |
|
537 |
gJITC.nativeVectorReg = VECTREG_NO; |
538 |
} |
539 |
|
540 |
/* |
541 |
* Gets the client carry flags into the native carry flag |
542 |
* |
543 |
* |
544 |
*/ |
545 |
void FASTCALL jitcGetClientCarry() |
546 |
{ |
547 |
if (gJITC.nativeCarryState == rsUnused) { |
548 |
jitcClobberFlags(); |
549 |
|
550 |
#if 0 |
551 |
// bt [gCPU.xer], XER_CA |
552 |
byte modrm[6]; |
553 |
asmBTxMemImm(X86_BT, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.xer), 29); |
554 |
#else |
555 |
// bt [gCPU.xer_ca], 0 |
556 |
byte modrm[6]; |
557 |
asmBTxMemImm(X86_BT, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.xer_ca), 0); |
558 |
#endif |
559 |
gJITC.nativeCarryState = rsMapped; |
560 |
} |
561 |
} |
562 |
|
563 |
void FASTCALL jitcMapFlagsDirty(PPC_CRx cr) |
564 |
{ |
565 |
gJITC.nativeFlags = cr; |
566 |
gJITC.nativeFlagsState = rsDirty; |
567 |
} |
568 |
|
569 |
PPC_CRx FASTCALL jitcGetFlagsMapping() |
570 |
{ |
571 |
return gJITC.nativeFlags; |
572 |
} |
573 |
|
574 |
bool FASTCALL jitcFlagsMapped() |
575 |
{ |
576 |
return gJITC.nativeFlagsState != rsUnused; |
577 |
} |
578 |
|
579 |
bool FASTCALL jitcCarryMapped() |
580 |
{ |
581 |
return gJITC.nativeCarryState != rsUnused; |
582 |
} |
583 |
|
584 |
void FASTCALL jitcMapCarryDirty() |
585 |
{ |
586 |
gJITC.nativeCarryState = rsDirty; |
587 |
} |
588 |
|
589 |
static inline void FASTCALL jitcFlushCarry() |
590 |
{ |
591 |
byte modrm[6]; |
592 |
asmSETMem(X86_C, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.xer_ca)); |
593 |
} |
594 |
|
595 |
#if 0 |
596 |
|
597 |
static inline void FASTCALL jitcFlushFlags() |
598 |
{ |
599 |
asmCALL((NativeAddress)ppc_flush_flags_asm); |
600 |
} |
601 |
|
602 |
#else |
603 |
|
604 |
uint8 jitcFlagsMapping[257]; |
605 |
uint8 jitcFlagsMapping2[256]; |
606 |
uint8 jitcFlagsMappingCMP_U[257]; |
607 |
uint8 jitcFlagsMappingCMP_L[257]; |
608 |
|
609 |
static inline void FASTCALL jitcFlushFlags() |
610 |
{ |
611 |
#if 1 |
612 |
byte modrm[6]; |
613 |
NativeReg r = jitcAllocRegister(NATIVE_REG_8); |
614 |
asmSETReg8(X86_S, (NativeReg8)r); |
615 |
asmSETReg8(X86_Z, (NativeReg8)(r+4)); |
616 |
asmMOVxxRegReg16(X86_MOVZX, r, r); |
617 |
asmALUMemImm8(X86_AND, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), 0x0f); |
618 |
asmALURegMem8(X86_MOV, (NativeReg8)r, modrm, x86_mem(modrm, r, (uint32)&jitcFlagsMapping)); |
619 |
asmALUMemReg8(X86_OR, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), (NativeReg8)r); |
620 |
#else |
621 |
byte modrm[6]; |
622 |
jitcAllocRegister(NATIVE_REG | EAX); |
623 |
asmSimple(X86_LAHF); |
624 |
asmMOVxxRegReg8(X86_MOVZX, EAX, AH); |
625 |
asmALUMemImm8(X86_AND, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), 0x0f); |
626 |
asmALURegMem8(X86_MOV, AL, modrm, x86_mem(modrm, EAX, (uint32)&jitcFlagsMapping2)); |
627 |
asmALUMemReg8(X86_OR, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+3), AL); |
628 |
#endif |
629 |
} |
630 |
|
631 |
#endif |
632 |
|
633 |
static inline void jitcFlushFlagsAfterCMP(X86FlagTest t1, X86FlagTest t2, byte mask, int disp, uint32 map) |
634 |
{ |
635 |
byte modrm[6]; |
636 |
NativeReg r = jitcAllocRegister(NATIVE_REG_8); |
637 |
asmSETReg8(t1, (NativeReg8)r); |
638 |
asmSETReg8(t2, (NativeReg8)(r+4)); |
639 |
asmMOVxxRegReg16(X86_MOVZX, r, r); |
640 |
asmALUMemImm8(X86_AND, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+disp), mask); |
641 |
asmALURegMem8(X86_MOV, (NativeReg8)r, modrm, x86_mem(modrm, r, map)); |
642 |
asmALUMemReg8(X86_OR, modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.cr+disp), (NativeReg8)r); |
643 |
} |
644 |
|
645 |
void FASTCALL jitcFlushFlagsAfterCMPL_U(int disp) |
646 |
{ |
647 |
jitcFlushFlagsAfterCMP(X86_A, X86_B, 0x0f, disp, (uint32)&jitcFlagsMappingCMP_U); |
648 |
} |
649 |
|
650 |
void FASTCALL jitcFlushFlagsAfterCMPL_L(int disp) |
651 |
{ |
652 |
jitcFlushFlagsAfterCMP(X86_A, X86_B, 0xf0, disp, (uint32)&jitcFlagsMappingCMP_L); |
653 |
} |
654 |
|
655 |
void FASTCALL jitcFlushFlagsAfterCMP_U(int disp) |
656 |
{ |
657 |
jitcFlushFlagsAfterCMP(X86_G, X86_L, 0x0f, disp, (uint32)&jitcFlagsMappingCMP_U); |
658 |
} |
659 |
|
660 |
void FASTCALL jitcFlushFlagsAfterCMP_L(int disp) |
661 |
{ |
662 |
jitcFlushFlagsAfterCMP(X86_G, X86_L, 0xf0, disp, (uint32)&jitcFlagsMappingCMP_L); |
663 |
} |
664 |
|
665 |
void FASTCALL jitcClobberFlags() |
666 |
{ |
667 |
if (gJITC.nativeFlagsState == rsDirty) { |
668 |
if (gJITC.nativeCarryState == rsDirty) { |
669 |
jitcFlushCarry(); |
670 |
} |
671 |
jitcFlushFlags(); |
672 |
gJITC.nativeCarryState = rsUnused; |
673 |
} |
674 |
gJITC.nativeFlagsState = rsUnused; |
675 |
} |
676 |
|
677 |
void FASTCALL jitcClobberCarry() |
678 |
{ |
679 |
if (gJITC.nativeCarryState == rsDirty) { |
680 |
jitcFlushCarry(); |
681 |
} |
682 |
gJITC.nativeCarryState = rsUnused; |
683 |
} |
684 |
|
685 |
void FASTCALL jitcClobberCarryAndFlags() |
686 |
{ |
687 |
if (gJITC.nativeCarryState == rsDirty) { |
688 |
if (gJITC.nativeFlagsState == rsDirty) { |
689 |
jitcFlushCarry(); |
690 |
jitcFlushFlags(); |
691 |
gJITC.nativeCarryState = gJITC.nativeFlagsState = rsUnused; |
692 |
} else { |
693 |
jitcClobberCarry(); |
694 |
} |
695 |
} else { |
696 |
jitcClobberFlags(); |
697 |
} |
698 |
} |
699 |
|
700 |
/* |
701 |
* ONLY FOR DEBUG! DON'T CALL (unless you know what you are doing) |
702 |
*/ |
703 |
void FASTCALL jitcFlushCarryAndFlagsDirty() |
704 |
{ |
705 |
if (gJITC.nativeCarryState == rsDirty) { |
706 |
jitcFlushCarry(); |
707 |
if (gJITC.nativeFlagsState == rsDirty) { |
708 |
jitcFlushFlags(); |
709 |
} |
710 |
} else { |
711 |
if (gJITC.nativeFlagsState == rsDirty) { |
712 |
jitcFlushFlags(); |
713 |
} |
714 |
} |
715 |
} |
716 |
|
717 |
/* |
718 |
* jitcFloatRegisterToNative converts the stack-independent |
719 |
* register r to a stack-dependent register ST(i) |
720 |
*/ |
721 |
NativeFloatReg FASTCALL jitcFloatRegisterToNative(JitcFloatReg r) |
722 |
{ |
723 |
return X86_FLOAT_ST(gJITC.nativeFloatTOP-gJITC.floatRegPerm[r]); |
724 |
} |
725 |
|
726 |
/* |
727 |
* jitcFloatRegisterFromNative converts the stack-dependent |
728 |
* register ST(r) to a stack-independent JitcFloatReg |
729 |
*/ |
730 |
JitcFloatReg FASTCALL jitcFloatRegisterFromNative(NativeFloatReg r) |
731 |
{ |
732 |
ASSERT(gJITC.nativeFloatTOP > r); |
733 |
return gJITC.floatRegPermInverse[gJITC.nativeFloatTOP-r]; |
734 |
} |
735 |
|
736 |
/* |
737 |
* Returns true iff r is on top of the floating point register |
738 |
* stack. |
739 |
*/ |
740 |
bool FASTCALL jitcFloatRegisterIsTOP(JitcFloatReg r) |
741 |
{ |
742 |
ASSERT(r != JITC_FLOAT_REG_NONE); |
743 |
return gJITC.floatRegPerm[r] == gJITC.nativeFloatTOP; |
744 |
} |
745 |
|
746 |
/* |
747 |
* Exchanges r to the front of the stack. |
748 |
*/ |
749 |
JitcFloatReg FASTCALL jitcFloatRegisterXCHGToFront(JitcFloatReg r) |
750 |
{ |
751 |
ASSERT(r != JITC_FLOAT_REG_NONE); |
752 |
if (jitcFloatRegisterIsTOP(r)) return r; |
753 |
|
754 |
asmFXCHSTi(jitcFloatRegisterToNative(r)); |
755 |
JitcFloatReg s = jitcFloatRegisterFromNative(Float_ST0); |
756 |
ASSERT(s != r); |
757 |
// set floatRegPerm := floatRegPerm * (s r) |
758 |
int tmp = gJITC.floatRegPerm[r]; |
759 |
gJITC.floatRegPerm[r] = gJITC.floatRegPerm[s]; |
760 |
gJITC.floatRegPerm[s] = tmp; |
761 |
|
762 |
// set floatRegPermInverse := (s r) * floatRegPermInverse |
763 |
r = gJITC.floatRegPerm[r]; |
764 |
s = gJITC.floatRegPerm[s]; |
765 |
tmp = gJITC.floatRegPermInverse[r]; |
766 |
gJITC.floatRegPermInverse[r] = gJITC.floatRegPermInverse[s]; |
767 |
gJITC.floatRegPermInverse[s] = tmp; |
768 |
|
769 |
return r; |
770 |
} |
771 |
|
772 |
/* |
773 |
* Dirties r |
774 |
*/ |
775 |
JitcFloatReg FASTCALL jitcFloatRegisterDirty(JitcFloatReg r) |
776 |
{ |
777 |
gJITC.nativeFloatRegState[r] = rsDirty; |
778 |
return r; |
779 |
} |
780 |
|
781 |
void FASTCALL jitcFloatRegisterInvalidate(JitcFloatReg r) |
782 |
{ |
783 |
jitcFloatRegisterXCHGToFront(r); |
784 |
asmFFREEPSTi(Float_ST0); |
785 |
int creg = gJITC.nativeFloatRegStack[r]; |
786 |
gJITC.clientFloatReg[creg] = JITC_FLOAT_REG_NONE; |
787 |
gJITC.nativeFloatTOP--; |
788 |
} |
789 |
|
790 |
void FASTCALL jitcPopFloatStack(JitcFloatReg hint1, JitcFloatReg hint2) |
791 |
{ |
792 |
ASSERT(gJITC.nativeFloatTOP > 0); |
793 |
|
794 |
JitcFloatReg r; |
795 |
for (int i=0; i<4; i++) { |
796 |
r = jitcFloatRegisterFromNative(X86_FLOAT_ST(gJITC.nativeFloatTOP-i-1)); |
797 |
if (r != hint1 && r != hint2) break; |
798 |
} |
799 |
|
800 |
// we can now free r |
801 |
int creg = gJITC.nativeFloatRegStack[r]; |
802 |
jitcFloatRegisterXCHGToFront(r); |
803 |
if (gJITC.nativeFloatRegState[r] == rsDirty) { |
804 |
byte modrm[6]; |
805 |
asmFSTPDoubleMem(modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.fpr[creg])); |
806 |
} else { |
807 |
asmFFREEPSTi(Float_ST0); |
808 |
} |
809 |
gJITC.nativeFloatRegState[r] = rsUnused; |
810 |
gJITC.clientFloatReg[creg] = JITC_FLOAT_REG_NONE; |
811 |
gJITC.nativeFloatTOP--; |
812 |
} |
813 |
|
814 |
static JitcFloatReg FASTCALL jitcPushFloatStack(int creg) |
815 |
{ |
816 |
ASSERT(gJITC.nativeFloatTOP < 8); |
817 |
gJITC.nativeFloatTOP++; |
818 |
int r = gJITC.floatRegPermInverse[gJITC.nativeFloatTOP]; |
819 |
byte modrm[6]; |
820 |
asmFLDDoubleMem(modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.fpr[creg])); |
821 |
return r; |
822 |
} |
823 |
|
824 |
/* |
825 |
* Creates a copy of r on the stack. If the stack is full, it will |
826 |
* clobber an entry. It will not clobber r nor hint. |
827 |
*/ |
828 |
JitcFloatReg FASTCALL jitcFloatRegisterDup(JitcFloatReg freg, JitcFloatReg hint) |
829 |
{ |
830 |
// ht_printf("dup %d\n", freg); |
831 |
if (gJITC.nativeFloatTOP == 8) { |
832 |
// stack is full |
833 |
jitcPopFloatStack(freg, hint); |
834 |
} |
835 |
asmFLDSTi(jitcFloatRegisterToNative(freg)); |
836 |
gJITC.nativeFloatTOP++; |
837 |
int r = gJITC.floatRegPermInverse[gJITC.nativeFloatTOP]; |
838 |
gJITC.nativeFloatRegState[r] = rsUnused; // not really mapped |
839 |
return r; |
840 |
} |
841 |
|
842 |
void FASTCALL jitcFloatRegisterClobberAll() |
843 |
{ |
844 |
if (!gJITC.nativeFloatTOP) return; |
845 |
|
846 |
do { |
847 |
JitcFloatReg r = jitcFloatRegisterFromNative(Float_ST0); |
848 |
int creg = gJITC.nativeFloatRegStack[r]; |
849 |
switch (gJITC.nativeFloatRegState[r]) { |
850 |
case rsDirty: { |
851 |
byte modrm[6]; |
852 |
asmFSTPDoubleMem(modrm, x86_mem(modrm, REG_NO, (uint32)&gCPU.fpr[creg])); |
853 |
gJITC.clientFloatReg[creg] = JITC_FLOAT_REG_NONE; |
854 |
break; |
855 |
} |
856 |
case rsMapped: |
857 |
asmFFREEPSTi(Float_ST0); |
858 |
gJITC.clientFloatReg[creg] = JITC_FLOAT_REG_NONE; |
859 |
break; |
860 |
case rsUnused: {ASSERT(0);} |
861 |
} |
862 |
} while (--gJITC.nativeFloatTOP); |
863 |
} |
864 |
|
865 |
void FASTCALL jitcFloatRegisterStoreAndPopTOP(JitcFloatReg r) |
866 |
{ |
867 |
asmFSTDPSTi(jitcFloatRegisterToNative(r)); |
868 |
gJITC.nativeFloatTOP--; |
869 |
} |
870 |
|
871 |
void FASTCALL jitcClobberClientRegisterForFloat(int creg) |
872 |
{ |
873 |
NativeReg r = jitcGetClientRegisterMapping(PPC_FPR_U(creg)); |
874 |
if (r != REG_NO) jitcClobberRegister(r | NATIVE_REG); |
875 |
r = jitcGetClientRegisterMapping(PPC_FPR_L(creg)); |
876 |
if (r != REG_NO) jitcClobberRegister(r | NATIVE_REG); |
877 |
} |
878 |
|
879 |
void FASTCALL jitcInvalidateClientRegisterForFloat(int creg) |
880 |
{ |
881 |
// FIXME: no need to clobber, invalidate would be enough |
882 |
jitcClobberClientRegisterForFloat(creg); |
883 |
} |
884 |
|
885 |
JitcFloatReg FASTCALL jitcGetClientFloatRegisterMapping(int creg) |
886 |
{ |
887 |
return gJITC.clientFloatReg[creg]; |
888 |
} |
889 |
|
890 |
JitcFloatReg FASTCALL jitcGetClientFloatRegisterUnmapped(int creg, int hint1, int hint2) |
891 |
{ |
892 |
JitcFloatReg r = jitcGetClientFloatRegisterMapping(creg); |
893 |
if (r == JITC_FLOAT_REG_NONE) { |
894 |
if (gJITC.nativeFloatTOP == 8) { |
895 |
jitcPopFloatStack(hint1, hint2); |
896 |
} |
897 |
r = jitcPushFloatStack(creg); |
898 |
gJITC.nativeFloatRegState[r] = rsUnused; |
899 |
} |
900 |
return r; |
901 |
} |
902 |
|
903 |
JitcFloatReg FASTCALL jitcGetClientFloatRegister(int creg, int hint1, int hint2) |
904 |
{ |
905 |
JitcFloatReg r = jitcGetClientFloatRegisterMapping(creg); |
906 |
if (r == JITC_FLOAT_REG_NONE) { |
907 |
if (gJITC.nativeFloatTOP == 8) { |
908 |
jitcPopFloatStack(hint1, hint2); |
909 |
} |
910 |
r = jitcPushFloatStack(creg); |
911 |
gJITC.clientFloatReg[creg] = r; |
912 |
gJITC.nativeFloatRegStack[r] = creg; |
913 |
gJITC.nativeFloatRegState[r] = rsMapped; |
914 |
} |
915 |
return r; |
916 |
} |
917 |
|
918 |
JitcFloatReg FASTCALL jitcMapClientFloatRegisterDirty(int creg, JitcFloatReg freg) |
919 |
{ |
920 |
if (freg == JITC_FLOAT_REG_NONE) { |
921 |
freg = jitcFloatRegisterFromNative(Float_ST0); |
922 |
} |
923 |
gJITC.clientFloatReg[creg] = freg; |
924 |
gJITC.nativeFloatRegStack[freg] = creg; |
925 |
gJITC.nativeFloatRegState[freg] = rsDirty; |
926 |
return freg; |
927 |
} |
928 |
|
929 |
/* |
930 |
* |
931 |
*/ |
932 |
NativeAddress FASTCALL asmHERE() |
933 |
{ |
934 |
return gJITC.currentPage->tcp; |
935 |
} |
936 |
|
937 |
void FASTCALL asmNOP(int n) |
938 |
{ |
939 |
if (n <= 0) return; |
940 |
byte instr[15]; |
941 |
for (int i=0; i < (n-1); i++) { |
942 |
instr[i] = 0x66; |
943 |
} |
944 |
instr[n-1] = 0x90; |
945 |
jitcEmit(instr, n); |
946 |
} |
947 |
|
948 |
static void FASTCALL asmSimpleMODRM(uint8 opc, NativeReg reg1, NativeReg reg2) |
949 |
{ |
950 |
byte instr[2] = {opc, 0xc0+(reg1<<3)+reg2}; |
951 |
jitcEmit(instr, sizeof(instr)); |
952 |
} |
953 |
|
954 |
static void FASTCALL asmSimpleMODRM(uint8 opc, NativeReg16 reg1, NativeReg16 reg2) |
955 |
{ |
956 |
byte instr[3] = {0x66, opc, 0xc0+(reg1<<3)+reg2}; |
957 |
jitcEmit(instr, sizeof(instr)); |
958 |
} |
959 |
|
960 |
static void FASTCALL asmSimpleMODRM(uint8 opc, NativeReg8 reg1, NativeReg8 reg2) |
961 |
{ |
962 |
byte instr[2] = {opc, 0xc0+(reg1<<3)+reg2}; |
963 |
jitcEmit(instr, sizeof(instr)); |
964 |
} |
965 |
|
966 |
static void FASTCALL asmTEST_D(NativeReg reg1, uint32 imm) |
967 |
{ |
968 |
if (reg1 <= EBX) { |
969 |
if (imm <= 0xff) { |
970 |
// test al, 1 |
971 |
if (reg1 == EAX) { |
972 |
byte instr[2] = {0xa8, imm}; |
973 |
jitcEmit(instr, sizeof(instr)); |
974 |
} else { |
975 |
byte instr[3] = {0xf6, 0xc0+reg1, imm}; |
976 |
jitcEmit(instr, sizeof(instr)); |
977 |
} |
978 |
return; |
979 |
} else if (!(imm & 0xffff00ff)) { |
980 |
// test ah, 1 |
981 |
byte instr[3] = {0xf6, 0xc4+reg1, (imm>>8)}; |
982 |
jitcEmit(instr, sizeof(instr)); |
983 |
return; |
984 |
} |
985 |
} |
986 |
// test eax, 1001 |
987 |
if (reg1 == EAX) { |
988 |
byte instr[5]; |
989 |
instr[0] = 0xa9; |
990 |
*((uint32 *)&instr[1]) = imm; |
991 |
jitcEmit(instr, sizeof(instr)); |
992 |
} else { |
993 |
byte instr[6]; |
994 |
instr[0] = 0xf7; |
995 |
instr[1] = 0xc0+reg1; |
996 |
*((uint32 *)&instr[2]) = imm; |
997 |
jitcEmit(instr, sizeof(instr)); |
998 |
} |
999 |
} |
1000 |
|
1001 |
static void FASTCALL asmTEST_W(NativeReg16 reg1, uint16 imm) |
1002 |
{ |
1003 |
if (reg1 <= BX) { |
1004 |
if (imm <= 0xff) { |
1005 |
// test al, 1 |
1006 |
if (reg1 == AX) { |
1007 |
byte instr[2] = {0xa8, imm}; |
1008 |
jitcEmit(instr, sizeof(instr)); |
1009 |
} else { |
1010 |
byte instr[3] = {0xf6, 0xc0+reg1, imm}; |
1011 |
jitcEmit(instr, sizeof(instr)); |
1012 |
} |
1013 |
return; |
1014 |
} else if (!(imm & 0xffff00ff)) { |
1015 |
// test ah, 1 |
1016 |
byte instr[3] = {0xf6, 0xc4+reg1, (imm>>8)}; |
1017 |
jitcEmit(instr, sizeof(instr)); |
1018 |
return; |
1019 |
} |
1020 |
} |
1021 |
// test eax, 1001 |
1022 |
if (reg1 == AX) { |
1023 |
byte instr[4]; |
1024 |
instr[0] = 0x66; |
1025 |
instr[1] = 0xa9; |
1026 |
*((uint16 *)&instr[2]) = imm; |
1027 |
jitcEmit(instr, sizeof(instr)); |
1028 |
} else { |
1029 |
byte instr[5]; |
1030 |
instr[0] = 0x66; |
1031 |
instr[1] = 0xf7; |
1032 |
instr[2] = 0xc0+reg1; |
1033 |
*((uint16 *)&instr[3]) = imm; |
1034 |
jitcEmit(instr, sizeof(instr)); |
1035 |
} |
1036 |
} |
1037 |
|
1038 |
static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg reg1, NativeReg reg2) |
1039 |
{ |
1040 |
byte instr[2] = {0x03+(opc<<3), 0xc0+(reg1<<3)+reg2}; |
1041 |
jitcEmit(instr, sizeof(instr)); |
1042 |
} |
1043 |
|
1044 |
static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg16 reg1, NativeReg16 reg2) |
1045 |
{ |
1046 |
byte instr[3] = {0x66, 0x03+(opc<<3), 0xc0+(reg1<<3)+reg2}; |
1047 |
jitcEmit(instr, sizeof(instr)); |
1048 |
} |
1049 |
|
1050 |
static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg8 reg1, NativeReg8 reg2) |
1051 |
{ |
1052 |
byte instr[2] = {0x02+(opc<<3), 0xc0+(reg1<<3)+reg2}; |
1053 |
jitcEmit(instr, sizeof(instr)); |
1054 |
} |
1055 |
|
1056 |
|
1057 |
void FASTCALL asmALU(X86ALUopc opc, NativeReg reg1, NativeReg reg2) |
1058 |
{ |
1059 |
switch (opc) { |
1060 |
case X86_MOV: |
1061 |
asmSimpleMODRM(0x8b, reg1, reg2); |
1062 |
break; |
1063 |
case X86_TEST: |
1064 |
asmSimpleMODRM(0x85, reg1, reg2); |
1065 |
break; |
1066 |
case X86_XCHG: |
1067 |
if (reg1 == EAX) { |
1068 |
jitcEmit1(0x90+reg2); |
1069 |
} else if (reg2 == EAX) { |
1070 |
jitcEmit1(0x90+reg1); |
1071 |
} else { |
1072 |
asmSimpleMODRM(0x87, reg1, reg2); |
1073 |
} |
1074 |
break; |
1075 |
default: |
1076 |
asmSimpleALU(opc, reg1, reg2); |
1077 |
} |
1078 |
} |
1079 |
void FASTCALL asmALURegReg(X86ALUopc opc, NativeReg reg1, NativeReg reg2) |
1080 |
{ |
1081 |
asmALU(opc, reg1, reg2); |
1082 |
} |
1083 |
|
1084 |
void FASTCALL asmALU(X86ALUopc opc, NativeReg16 reg1, NativeReg16 reg2) |
1085 |
{ |
1086 |
switch (opc) { |
1087 |
case X86_MOV: |
1088 |
asmSimpleMODRM(0x8b, reg1, reg2); |
1089 |
break; |
1090 |
case X86_TEST: |
1091 |
asmSimpleMODRM(0x85, reg1, reg2); |
1092 |
break; |
1093 |
case X86_XCHG: |
1094 |
if (reg1 == AX) { |
1095 |
byte instr[2] = { 0x66, 0x90+reg2 }; |
1096 |
jitcEmit(instr, sizeof instr); |
1097 |
} else if (reg2 == AX) { |
1098 |
byte instr[2] = { 0x66, 0x90+reg1 }; |
1099 |
jitcEmit(instr, sizeof instr); |
1100 |
} else { |
1101 |
asmSimpleMODRM(0x87, reg1, reg2); |
1102 |
} |
1103 |
break; |
1104 |
default: |
1105 |
asmSimpleALU(opc, reg1, reg2); |
1106 |
} |
1107 |
} |
1108 |
void FASTCALL asmALURegReg16(X86ALUopc opc, NativeReg reg1, NativeReg reg2) |
1109 |
{ |
1110 |
asmALU(opc, (NativeReg16)reg1, (NativeReg16)reg2); |
1111 |
} |
1112 |
|
1113 |
void FASTCALL asmALU(X86ALUopc opc, NativeReg8 reg1, NativeReg8 reg2) |
1114 |
{ |
1115 |
switch (opc) { |
1116 |
case X86_MOV: |
1117 |
asmSimpleMODRM(0x8a, reg1, reg2); |
1118 |
break; |
1119 |
case X86_TEST: |
1120 |
asmSimpleMODRM(0x84, reg1, reg2); |
1121 |
break; |
1122 |
case X86_XCHG: |
1123 |
asmSimpleMODRM(0x86, reg1, reg2); |
1124 |
break; |
1125 |
default: |
1126 |
asmSimpleALU(opc, reg1, reg2); |
1127 |
} |
1128 |
} |
1129 |
void FASTCALL asmALURegReg8(X86ALUopc opc, NativeReg8 reg1, NativeReg8 reg2) |
1130 |
{ |
1131 |
asmALU(opc, reg1, reg2); |
1132 |
} |
1133 |
|
1134 |
void FASTCALL asmALU(X86ALUopc opc, NativeReg8 reg1, uint8 imm) |
1135 |
{ |
1136 |
byte instr[5]; |
1137 |
switch (opc) { |
1138 |
case X86_MOV: |
1139 |
instr[0] = 0xb0 + reg1; |
1140 |
instr[1] = imm; |
1141 |
jitcEmit(instr, 2); |
1142 |
break; |
1143 |
case X86_TEST: |
1144 |
if (reg1 == AL) { |
1145 |
instr[0] = 0xa8; |
1146 |
instr[1] = imm; |
1147 |
jitcEmit(instr, 2); |
1148 |
} else { |
1149 |
instr[0] = 0xf6; |
1150 |
instr[1] = 0xc0 + reg1; |
1151 |
instr[2] = imm; |
1152 |
jitcEmit(instr, 3); |
1153 |
} |
1154 |
break; |
1155 |
case X86_XCHG: |
1156 |
// internal error |
1157 |
break; |
1158 |
default: { |
1159 |
if (reg1 == AL) { |
1160 |
instr[0] = (opc<<3)|0x4; |
1161 |
instr[1] = imm; |
1162 |
jitcEmit(instr, 2); |
1163 |
} else { |
1164 |
instr[0] = 0x80; |
1165 |
instr[1] = 0xc0+(opc<<3)+reg1; |
1166 |
instr[2] = imm; |
1167 |
jitcEmit(instr, 3); |
1168 |
} |
1169 |
break; |
1170 |
} |
1171 |
} |
1172 |
} |
1173 |
void FASTCALL asmALURegImm8(X86ALUopc opc, NativeReg8 reg1, uint8 imm) |
1174 |
{ |
1175 |
asmALU(opc, reg1, imm); |
1176 |
} |
1177 |
|
1178 |
static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg reg1, uint32 imm) |
1179 |
{ |
1180 |
if (imm <= 0x7f || imm >= 0xffffff80) { |
1181 |
byte instr[3] = {0x83, 0xc0+(opc<<3)+reg1, imm}; |
1182 |
jitcEmit(instr, sizeof(instr)); |
1183 |
} else { |
1184 |
if (reg1 == EAX) { |
1185 |
byte instr[5]; |
1186 |
instr[0] = 0x05+(opc<<3); |
1187 |
*((uint32 *)&instr[1]) = imm; |
1188 |
jitcEmit(instr, sizeof(instr)); |
1189 |
} else { |
1190 |
byte instr[6]; |
1191 |
instr[0] = 0x81; |
1192 |
instr[1] = 0xc0+(opc<<3)+reg1; |
1193 |
*((uint32 *)&instr[2]) = imm; |
1194 |
jitcEmit(instr, sizeof(instr)); |
1195 |
} |
1196 |
} |
1197 |
} |
1198 |
|
1199 |
static void FASTCALL asmSimpleALU(X86ALUopc opc, NativeReg16 reg1, uint32 imm) |
1200 |
{ |
1201 |
if (imm <= 0x7f || imm >= 0xffffff80) { |
1202 |
byte instr[3] = {0x83, 0xc0+(opc<<3)+reg1, imm}; |
1203 |
jitcEmit(instr, sizeof(instr)); |
1204 |
} else { |
1205 |
if (reg1 == AX) { |
1206 |
byte instr[4]; |
1207 |
instr[0] = 0x66; |
1208 |
instr[1] = 0x05+(opc<<3); |
1209 |
*((uint16 *)&instr[2]) = imm; |
1210 |
jitcEmit(instr, sizeof(instr)); |
1211 |
} else { |
1212 |
byte instr[5]; |
1213 |
instr[0] = 0x66; |
1214 |
instr[1] = 0x81; |
1215 |
instr[2] = 0xc0+(opc<<3)+reg1; |
1216 |
*((uint16 *)&instr[3]) = imm; |
1217 |
jitcEmit(instr, sizeof(instr)); |
1218 |
} |
1219 |
} |
1220 |
} |
1221 |
|
1222 |
void FASTCALL asmALU(X86ALUopc opc, NativeReg reg1, uint32 imm) |
1223 |
{ |
1224 |
switch (opc) { |
1225 |
case X86_MOV: |
1226 |
if (imm == 0) { |
1227 |
asmALU(X86_XOR, reg1, reg1); |
1228 |
} else { |
1229 |
asmMOV_NoFlags(reg1, imm); |
1230 |
} |
1231 |
break; |
1232 |
case X86_XCHG: |
1233 |
// internal error |
1234 |
break; |
1235 |
case X86_TEST: |
1236 |
asmTEST_D(reg1, imm); |
1237 |
break; |
1238 |
case X86_CMP: |
1239 |
// if (imm == 0) { |
1240 |
// asmALU(X86_OR, reg1, reg1); |
1241 |
// } else { |
1242 |
asmSimpleALU(opc, reg1, imm); |
1243 |
// } |
1244 |
break; |
1245 |
default: |
1246 |
asmSimpleALU(opc, reg1, imm); |
1247 |
} |
1248 |
} |
1249 |
void FASTCALL asmALURegImm(X86ALUopc opc, NativeReg reg1, uint32 imm) |
1250 |
{ |
1251 |
asmALU(opc, reg1, imm); |
1252 |
} |
1253 |
|
1254 |
void FASTCALL asmALU(X86ALUopc opc, NativeReg16 reg1, uint16 imm) |
1255 |
{ |
1256 |
switch (opc) { |
1257 |
case X86_MOV: |
1258 |
if (imm == 0) { |
1259 |
asmALU(X86_XOR, reg1, reg1); |
1260 |
} else { |
1261 |
asmMOV_NoFlags(reg1, imm); |
1262 |
} |
1263 |
break; |
1264 |
case X86_XCHG: |
1265 |
// internal error |
1266 |
break; |
1267 |
case X86_TEST: |
1268 |
asmTEST_W(reg1, imm); |
1269 |
break; |
1270 |
case X86_CMP: |
1271 |
// if (imm == 0) { |
1272 |
// asmALU(X86_OR, reg1, reg1); |
1273 |
// } else { |
1274 |
asmSimpleALU(opc, reg1, imm); |
1275 |
// } |
1276 |
break; |
1277 |
default: |
1278 |
asmSimpleALU(opc, reg1, imm); |
1279 |
} |
1280 |
} |
1281 |
void FASTCALL asmALURegImm16(X86ALUopc opc, NativeReg reg1, uint32 imm) |
1282 |
{ |
1283 |
asmALU(opc, (NativeReg16)reg1, imm); |
1284 |
} |
1285 |
|
1286 |
void FASTCALL asmMOV_NoFlags(NativeReg reg1, uint32 imm) |
1287 |
{ |
1288 |
byte instr[5]; |
1289 |
instr[0] = 0xb8+reg1; |
1290 |
*((uint32 *)&instr[1]) = imm; |
1291 |
jitcEmit(instr, sizeof(instr)); |
1292 |
} |
1293 |
void FASTCALL asmMOVRegImm_NoFlags(NativeReg reg1, uint32 imm) |
1294 |
{ |
1295 |
asmMOV_NoFlags(reg1, imm); |
1296 |
} |
1297 |
|
1298 |
void FASTCALL asmMOV_NoFlags(NativeReg16 reg1, uint16 imm) |
1299 |
{ |
1300 |
byte instr[4]; |
1301 |
instr[0] = 0x66; |
1302 |
instr[1] = 0xb8+reg1; |
1303 |
*((uint16 *)&instr[2]) = imm; |
1304 |
jitcEmit(instr, sizeof(instr)); |
1305 |
} |
1306 |
void FASTCALL asmMOVRegImm16_NoFlags(NativeReg reg1, uint16 imm) |
1307 |
{ |
1308 |
asmMOV_NoFlags((NativeReg16)reg1, imm); |
1309 |
} |
1310 |
|
1311 |
void FASTCALL asmALU(X86ALUopc1 opc, NativeReg reg1) |
1312 |
{ |
1313 |
byte instr[2]; |
1314 |
switch (opc) { |
1315 |
case X86_NOT: |
1316 |
instr[0] = 0xf7; |
1317 |
instr[1] = 0xd0+reg1; |
1318 |
break; |
1319 |
case X86_NEG: |
1320 |
instr[0] = 0xf7; |
1321 |
instr[1] = 0xd8+reg1; |
1322 |
break; |
1323 |
case X86_MUL: |
1324 |
instr[0] = 0xf7; |
1325 |
instr[1] = 0xe0+reg1; |
1326 |
break; |
1327 |
case X86_IMUL: |
1328 |
instr[0] = 0xf7; |
1329 |
instr[1] = 0xe8+reg1; |
1330 |
break; |
1331 |
case X86_DIV: |
1332 |
instr[0] = 0xf7; |
1333 |
instr[1] = 0xf0+reg1; |
1334 |
break; |
1335 |
case X86_IDIV: |
1336 |
instr[0] = 0xf7; |
1337 |
instr[1] = 0xf8+reg1; |
1338 |
break; |
1339 |
} |
1340 |
jitcEmit(instr, 2); |
1341 |
} |
1342 |
void FASTCALL asmALUReg(X86ALUopc1 opc, NativeReg reg1) |
1343 |
{ |
1344 |
asmALU(opc, reg1); |
1345 |
} |
1346 |
|
1347 |
void FASTCALL asmALU(X86ALUopc1 opc, NativeReg16 reg1) |
1348 |
{ |
1349 |
byte instr[3]; |
1350 |
instr[0] = 0x66; |
1351 |
|
1352 |
switch (opc) { |
1353 |
case X86_NOT: |
1354 |
instr[1] = 0xf7; |
1355 |
instr[2] = 0xd0+reg1; |
1356 |
break; |
1357 |
case X86_NEG: |
1358 |
instr[1] = 0xf7; |
1359 |
instr[2] = 0xd8+reg1; |
1360 |
break; |
1361 |
case X86_MUL: |
1362 |
instr[1] = 0xf7; |
1363 |
instr[2] = 0xe0+reg1; |
1364 |
break; |
1365 |
case X86_IMUL: |
1366 |
instr[1] = 0xf7; |
1367 |
instr[2] = 0xe8+reg1; |
1368 |
break; |
1369 |
case X86_DIV: |
1370 |
instr[1] = 0xf7; |
1371 |
instr[2] = 0xf0+reg1; |
1372 |
break; |
1373 |
case X86_IDIV: |
1374 |
instr[1] = 0xf7; |
1375 |
instr[2] = 0xf8+reg1; |
1376 |
break; |
1377 |
} |
1378 |
jitcEmit(instr, 3); |
1379 |
} |
1380 |
void FASTCALL asmALUReg16(X86ALUopc1 opc, NativeReg reg1) |
1381 |
{ |
1382 |
asmALU(opc, (NativeReg16)reg1); |
1383 |
} |
1384 |
|
1385 |
void FASTCALL asmALUMemReg(X86ALUopc opc, byte *modrm, int len, NativeReg reg2) |
1386 |
{ |
1387 |
byte instr[15]; |
1388 |
|
1389 |
switch (opc) { |
1390 |
case X86_MOV: |
1391 |
instr[0] = 0x89; |
1392 |
break; |
1393 |
case X86_XCHG: |
1394 |
instr[0] = 0x87; |
1395 |
break; |
1396 |
case X86_TEST: |
1397 |
instr[0] = 0x85; |
1398 |
break; |
1399 |
default: |
1400 |
instr[0] = 0x01+(opc<<3); |
1401 |
} |
1402 |
memcpy(&instr[1], modrm, len); |
1403 |
instr[1] |= (reg2<<3); |
1404 |
jitcEmit(instr, len+1); |
1405 |
} |
1406 |
void FASTCALL asmALU(X86ALUopc opc, modrm_p modrm, NativeReg reg2) |
1407 |
{ |
1408 |
byte instr[15]; |
1409 |
int len = modrm++[0]; |
1410 |
|
1411 |
switch (opc) { |
1412 |
case X86_MOV: |
1413 |
instr[0] = 0x89; |
1414 |
break; |
1415 |
case X86_XCHG: |
1416 |
instr[0] = 0x87; |
1417 |
break; |
1418 |
case X86_TEST: |
1419 |
instr[0] = 0x85; |
1420 |
break; |
1421 |
default: |
1422 |
instr[0] = 0x01+(opc<<3); |
1423 |
} |
1424 |
memcpy(&instr[1], modrm, len); |
1425 |
instr[1] |= (reg2<<3); |
1426 |
jitcEmit(instr, len+1); |
1427 |
} |
1428 |
|
1429 |
void FASTCALL asmALUMemReg16(X86ALUopc opc, byte *modrm, int len, NativeReg reg2) |
1430 |
{ |
1431 |
byte instr[16]; |
1432 |
|
1433 |
instr[0] = 0x66; |
1434 |
switch (opc) { |
1435 |
case X86_MOV: |
1436 |
instr[1] = 0x89; |
1437 |
break; |
1438 |
case X86_XCHG: |
1439 |
instr[1] = 0x87; |
1440 |
break; |
1441 |
case X86_TEST: |
1442 |
instr[1] = 0x85; |
1443 |
break; |
1444 |
default: |
1445 |
instr[1] = 0x01+(opc<<3); |
1446 |
} |
1447 |
memcpy(&instr[2], modrm, len); |
1448 |
instr[2] |= (reg2<<3); |
1449 |
jitcEmit(instr, len+2); |
1450 |
} |
1451 |
void FASTCALL asmALU(X86ALUopc opc, modrm_p modrm, NativeReg16 reg2) |
1452 |
{ |
1453 |
byte instr[16]; |
1454 |
int len = modrm++[0]; |
1455 |
|
1456 |
instr[0] = 0x66; |
1457 |
switch (opc) { |
1458 |
case X86_MOV: |
1459 |
instr[1] = 0x89; |
1460 |
break; |
1461 |
case X86_XCHG: |
1462 |
instr[1] = 0x87; |
1463 |
break; |
1464 |
case X86_TEST: |
1465 |
instr[1] = 0x85; |
1466 |
break; |
1467 |
default: |
1468 |
instr[1] = 0x01+(opc<<3); |
1469 |
} |
1470 |
memcpy(&instr[2], modrm, len); |
1471 |
instr[2] |= (reg2<<3); |
1472 |
jitcEmit(instr, len+2); |
1473 |
} |
1474 |
|
1475 |
|
1476 |
static void FASTCALL asmSimpleALU_D(X86ALUopc opc, byte *modrm, int len, uint32 imm) |
1477 |
{ |
1478 |
byte instr[15]; |
1479 |
|
1480 |
if (imm <= 0x7f || imm >= 0xffffff80) { |
1481 |
instr[0] = 0x83; |
1482 |
memcpy(&instr[1], modrm, len); |
1483 |
instr[1] |= (opc<<3); |
1484 |
instr[len+1] = imm; |
1485 |
jitcEmit(instr, len+2); |
1486 |
} else { |
1487 |
instr[0] = 0x81; |
1488 |
memcpy(&instr[1], modrm, len); |
1489 |
instr[1] |= (opc<<3); |
1490 |
*((uint32 *)&instr[len+1]) = imm; |
1491 |
jitcEmit(instr, len+5); |
1492 |
} |
1493 |
} |
1494 |
|
1495 |
static void FASTCALL asmSimpleALU_W(X86ALUopc opc, byte *modrm, int len, uint16 imm) |
1496 |
{ |
1497 |
byte instr[16]; |
1498 |
|
1499 |
instr[0] = 0x66; |
1500 |
|
1501 |
if (imm <= 0x7f || imm >= 0xff80) { |
1502 |
instr[0] = 0x83; |
1503 |
memcpy(&instr[1], modrm, len); |
1504 |
instr[1] |= (opc<<3); |
1505 |
instr[len+1] = imm; |
1506 |
jitcEmit(instr, len+2); |
1507 |
} else { |
1508 |
instr[1] = 0x81; |
1509 |
memcpy(&instr[2], modrm, len); |
1510 |
instr[2] |= (opc<<3); |
1511 |
*((uint16 *)&instr[len+2]) = imm; |
1512 |
jitcEmit(instr, len+4); |
1513 |
} |
1514 |
} |
1515 |
|
1516 |
void FASTCALL asmALUMemImm(X86ALUopc opc, byte *modrm, int len, uint32 imm) |
1517 |
{ |
1518 |
byte instr[15]; |
1519 |
switch (opc) { |
1520 |
case X86_MOV: { |
1521 |
instr[0] = 0xc7; |
1522 |
memcpy(&instr[1], modrm, len); |
1523 |
*((uint32 *)&instr[len+1]) = imm; |
1524 |
jitcEmit(instr, len+5); |
1525 |
break; |
1526 |
} |
1527 |
case X86_XCHG: |
1528 |
// internal error |
1529 |
break; |
1530 |
case X86_TEST: |
1531 |
instr[0] = 0xf7; |
1532 |
memcpy(&instr[1], modrm, len); |
1533 |
*((uint32 *)&instr[len+1]) = imm; |
1534 |
jitcEmit(instr, len+5); |
1535 |
break; |
1536 |
default: |
1537 |
asmSimpleALU_D(opc, modrm, len, imm); |
1538 |
} |
1539 |
} |
1540 |
|
1541 |
void FASTCALL asmALU_D(X86ALUopc opc, modrm_p modrm, uint32 imm) |
1542 |
{ |
1543 |
int len = modrm++[0]; |
1544 |
|
1545 |
asmALUMemImm(opc, modrm, len, imm); |
1546 |
} |
1547 |
|
1548 |
void FASTCALL asmALUMemImm16(X86ALUopc opc, byte *modrm, int len, uint16 imm) |
1549 |
{ |
1550 |
byte instr[16]; |
1551 |
instr[0] = 0x66; |
1552 |
|
1553 |
switch (opc) { |
1554 |
case X86_MOV: { |
1555 |
instr[1] = 0xc7; |
1556 |
memcpy(&instr[2], modrm, len); |
1557 |
*((uint16 *)&instr[len+2]) = imm; |
1558 |
jitcEmit(instr, len+4); |
1559 |
break; |
1560 |
} |
1561 |
case X86_XCHG: |
1562 |
// internal error |
1563 |
break; |
1564 |
case X86_TEST: |
1565 |
instr[1] = 0xf7; |
1566 |
memcpy(&instr[2], modrm, len); |
1567 |
*((uint16 *)&instr[len+2]) = imm; |
1568 |
jitcEmit(instr, len+4); |
1569 |
break; |
1570 |
default: |
1571 |
asmSimpleALU_W(opc, modrm, len, imm); |
1572 |
} |
1573 |
} |
1574 |
void FASTCALL asmALU_W(X86ALUopc opc, modrm_p modrm, uint16 imm) |
1575 |
{ |
1576 |
int len = modrm++[0]; |
1577 |
|
1578 |
asmALUMemImm16(opc, modrm, len, imm); |
1579 |
} |
1580 |
|
1581 |
void FASTCALL asmALURegMem(X86ALUopc opc, NativeReg reg1, byte *modrm, int len) |
1582 |
{ |
1583 |
byte instr[15]; |
1584 |
switch (opc) { |
1585 |
case X86_MOV: |
1586 |
instr[0] = 0x8b; |
1587 |
break; |
1588 |
case X86_XCHG: |
1589 |
// XCHG is symmetric |
1590 |
instr[0] = 0x87; |
1591 |
break; |
1592 |
case X86_TEST: |
1593 |
// TEST is symmetric |
1594 |
instr[0] = 0x85; |
1595 |
break; |
1596 |
default: |
1597 |
instr[0] = 0x03+(opc<<3); |
1598 |
} |
1599 |
memcpy(&instr[1], modrm, len); |
1600 |
instr[1] |= (reg1<<3); |
1601 |
jitcEmit(instr, len+1); |
1602 |
} |
1603 |
void FASTCALL asmALU(X86ALUopc opc, NativeReg reg1, modrm_p modrm) |
1604 |
{ |
1605 |
int len = modrm++[0]; |
1606 |
|
1607 |
asmALURegMem(opc, reg1, modrm, len); |
1608 |
} |
1609 |
|
1610 |
void FASTCALL asmALURegMem16(X86ALUopc opc, NativeReg reg1, byte *modrm, int len) |
1611 |
{ |
1612 |
byte instr[16]; |
1613 |
instr[0] = 0x66; |
1614 |
switch (opc) { |
1615 |
case X86_MOV: |
1616 |
instr[1] = 0x8b; |
1617 |
break; |
1618 |
case X86_XCHG: |
1619 |
// XCHG is symmetric |
1620 |
instr[1] = 0x87; |
1621 |
break; |
1622 |
case X86_TEST: |
1623 |
// TEST is symmetric |
1624 |
instr[1] = 0x85; |
1625 |
break; |
1626 |
default: |
1627 |
instr[1] = 0x03+(opc<<3); |
1628 |
} |
1629 |
memcpy(&instr[2], modrm, len); |
1630 |
instr[2] |= (reg1<<3); |
1631 |
jitcEmit(instr, len+2); |
1632 |
} |
1633 |
void FASTCALL asmALU(X86ALUopc opc, NativeReg16 reg1, modrm_p modrm) |
1634 |
{ |
1635 |
int len = modrm++[0]; |
1636 |
|
1637 |
asmALURegMem16(opc, (NativeReg)reg1, modrm, len); |
1638 |
} |
1639 |
|
1640 |
void FASTCALL asmALURegMem8(X86ALUopc opc, NativeReg8 reg1, byte *modrm, int len) |
1641 |
{ |
1642 |
byte instr[15]; |
1643 |
switch (opc) { |
1644 |
case X86_MOV: |
1645 |
instr[0] = 0x8a; |
1646 |
break; |
1647 |
case X86_XCHG: |
1648 |
// XCHG is symmetric |
1649 |
instr[0] = 0x86; |
1650 |
break; |
1651 |
case X86_TEST: |
1652 |
// TEST is symmetric |
1653 |
instr[0] = 0x84; |
1654 |
break; |
1655 |
default: |
1656 |
instr[0] = 0x02+(opc<<3); |
1657 |
} |
1658 |
memcpy(&instr[1], modrm, len); |
1659 |
instr[1] |= (reg1<<3); |
1660 |
jitcEmit(instr, len+1); |
1661 |
} |
1662 |
void FASTCALL asmALU(X86ALUopc opc, NativeReg8 reg1, modrm_p modrm) |
1663 |
{ |
1664 |
int len = modrm++[0]; |
1665 |
|
1666 |
asmALURegMem8(opc, reg1, modrm, len); |
1667 |
} |
1668 |
|
1669 |
void FASTCALL asmALUMemReg8(X86ALUopc opc, byte *modrm, int len, NativeReg8 reg2) |
1670 |
{ |
1671 |
byte instr[15]; |
1672 |
switch (opc) { |
1673 |
case X86_MOV: |
1674 |
instr[0] = 0x88; |
1675 |
break; |
1676 |
case X86_XCHG: |
1677 |
instr[0] = 0x86; |
1678 |
break; |
1679 |
case X86_TEST: |
1680 |
instr[0] = 0x84; |
1681 |
break; |
1682 |
default: |
1683 |
instr[0] = 0x00+(opc<<3); |
1684 |
} |
1685 |
memcpy(&instr[1], modrm, len); |
1686 |
instr[1] |= (reg2<<3); |
1687 |
jitcEmit(instr, len+1); |
1688 |
} |
1689 |
void FASTCALL asmALU(X86ALUopc opc, modrm_p modrm, NativeReg8 reg2) |
1690 |
{ |
1691 |
int len = modrm++[0]; |
1692 |
|
1693 |
asmALUMemReg8(opc, modrm, len, reg2); |
1694 |
} |
1695 |
|
1696 |
void FASTCALL asmALUMemImm8(X86ALUopc opc, byte *modrm, int len, uint8 imm) |
1697 |
{ |
1698 |
byte instr[15]; |
1699 |
switch (opc) { |
1700 |
case X86_MOV: |
1701 |
instr[0] = 0xc6; |
1702 |
break; |
1703 |
case X86_XCHG: |
1704 |
// internal error |
1705 |
break; |
1706 |
case X86_TEST: |
1707 |
instr[0] = 0xf6; |
1708 |
break; |
1709 |
default: |
1710 |
instr[0] = 0x80; |
1711 |
memcpy(&instr[1], modrm, len); |
1712 |
instr[1] |= (opc<<3); |
1713 |
instr[len+1] = imm; |
1714 |
jitcEmit(instr, len+2); |
1715 |
return; |
1716 |
} |
1717 |
memcpy(&instr[1], modrm, len); |
1718 |
instr[len+1] = imm; |
1719 |
jitcEmit(instr, len+2); |
1720 |
} |
1721 |
void FASTCALL asmALU_B(X86ALUopc opc, modrm_p modrm, uint8 imm) |
1722 |
{ |
1723 |
int len = modrm++[0]; |
1724 |
|
1725 |
asmALUMemImm8(opc, modrm, len, imm); |
1726 |
} |
1727 |
|
1728 |
void FASTCALL asmMOV(const void *disp, NativeReg reg1) |
1729 |
{ |
1730 |
byte instr[6]; |
1731 |
if (reg1==EAX) { |
1732 |
instr[0] = 0xa3; |
1733 |
*((uint32 *)&instr[1]) = (uint32)disp; |
1734 |
jitcEmit(instr, 5); |
1735 |
} else { |
1736 |
instr[0] = 0x89; |
1737 |
instr[1] = 0x05 | (reg1 << 3); |
1738 |
*((uint32 *)&instr[2]) = (uint32)disp; |
1739 |
jitcEmit(instr, 6); |
1740 |
} |
1741 |
} |
1742 |
void FASTCALL asmMOVDMemReg(uint32 disp, NativeReg reg1) |
1743 |
{ |
1744 |
asmMOV((const void *)disp, reg1); |
1745 |
} |
1746 |
|
1747 |
void FASTCALL asmMOV(const void *disp, NativeReg16 reg1) |
1748 |
{ |
1749 |
byte instr[7]; |
1750 |
instr[0] = 0x66; |
1751 |
if (reg1==AX) { |
1752 |
instr[1] = 0xa3; |
1753 |
*((uint32 *)&instr[2]) = (uint32)disp; |
1754 |
jitcEmit(instr, 6); |
1755 |
} else { |
1756 |
instr[1] = 0x89; |
1757 |
instr[2] = 0x05 | (reg1 << 3); |
1758 |
*((uint32 *)&instr[3]) = (uint32)disp; |
1759 |
jitcEmit(instr, 7); |
1760 |
} |
1761 |
} |
1762 |
void FASTCALL asmMOVDMemReg16(uint32 disp, NativeReg reg1) |
1763 |
{ |
1764 |
asmMOV((const void *)disp, (NativeReg16)reg1); |
1765 |
} |
1766 |
|
1767 |
void FASTCALL asmMOV(NativeReg reg1, const void *disp) |
1768 |
{ |
1769 |
byte instr[6]; |
1770 |
if (reg1==EAX) { |
1771 |
instr[0] = 0xa1; |
1772 |
*((uint32 *)&instr[1]) = (uint32)disp; |
1773 |
jitcEmit(instr, 5); |
1774 |
} else { |
1775 |
instr[0] = 0x8b; |
1776 |
instr[1] = 0x05 | (reg1 << 3); |
1777 |
*((uint32 *)&instr[2]) = (uint32)disp; |
1778 |
jitcEmit(instr, 6); |
1779 |
} |
1780 |
} |
1781 |
void FASTCALL asmMOVRegDMem(NativeReg reg1, uint32 disp) |
1782 |
{ |
1783 |
asmMOV(reg1, (const void *)disp); |
1784 |
} |
1785 |
|
1786 |
void FASTCALL asmMOV(NativeReg16 reg1, const void *disp) |
1787 |
{ |
1788 |
byte instr[7]; |
1789 |
instr[0] = 0x66; |
1790 |
if (reg1==AX) { |
1791 |
instr[1] = 0xa1; |
1792 |
*((uint32 *)&instr[2]) = (uint32)disp; |
1793 |
jitcEmit(instr, 6); |
1794 |
} else { |
1795 |
instr[1] = 0x8b; |
1796 |
instr[2] = 0x05 | (reg1 << 3); |
1797 |
*((uint32 *)&instr[3]) = (uint32)disp; |
1798 |
jitcEmit(instr, 7); |
1799 |
} |
1800 |
} |
1801 |
void FASTCALL asmMOVRegDMem16(NativeReg reg1, uint32 disp) |
1802 |
{ |
1803 |
asmMOV((NativeReg16)reg1, (const void *)disp); |
1804 |
} |
1805 |
|
1806 |
void FASTCALL asmTEST(const void *disp, uint32 imm) |
1807 |
{ |
1808 |
byte instr[15]; |
1809 |
instr[1] = 0x05; |
1810 |
if (!(imm & 0xffffff00)) { |
1811 |
instr[0] = 0xf6; |
1812 |
*((uint32 *)&instr[2]) = (uint32)disp; |
1813 |
instr[6] = imm; |
1814 |
} else if (!(imm & 0xffff00ff)) { |
1815 |
instr[0] = 0xf6; |
1816 |
*((uint32 *)&instr[2]) = (uint32)disp+1; |
1817 |
instr[6] = imm >> 8; |
1818 |
} else if (!(imm & 0xff00ffff)) { |
1819 |
instr[0] = 0xf6; |
1820 |
*((uint32 *)&instr[2]) = (uint32)disp+2; |
1821 |
instr[6] = imm >> 16; |
1822 |
} else if (!(imm & 0x00ffffff)) { |
1823 |
instr[0] = 0xf6; |
1824 |
*((uint32 *)&instr[2]) = (uint32)disp+3; |
1825 |
instr[6] = imm >> 24; |
1826 |
} else { |
1827 |
instr[0] = 0xf7; |
1828 |
*((uint32 *)&instr[2]) = (uint32)disp; |
1829 |
*((uint32 *)&instr[6]) = imm; |
1830 |
jitcEmit(instr, 10); |
1831 |
return; |
1832 |
} |
1833 |
jitcEmit(instr, 7); |
1834 |
} |
1835 |
void FASTCALL asmTESTDMemImm(uint32 disp, uint32 imm) |
1836 |
{ |
1837 |
asmTEST((const void *)disp, imm); |
1838 |
} |
1839 |
|
1840 |
void FASTCALL asmAND(const void *disp, uint32 imm) |
1841 |
{ |
1842 |
byte instr[15]; |
1843 |
instr[1] = 0x25; |
1844 |
if ((imm & 0xffffff00)==0xffffff00) { |
1845 |
instr[0] = 0x80; |
1846 |
*((uint32 *)&instr[2]) = (uint32)disp; |
1847 |
instr[6] = imm; |
1848 |
} else if ((imm & 0xffff00ff)==0xffff00ff) { |
1849 |
instr[0] = 0x80; |
1850 |
*((uint32 *)&instr[2]) = (uint32)disp+1; |
1851 |
instr[6] = imm >> 8; |
1852 |
} else if ((imm & 0xff00ffff)==0xff00ffff) { |
1853 |
instr[0] = 0x80; |
1854 |
*((uint32 *)&instr[2]) = (uint32)disp+2; |
1855 |
instr[6] = imm >> 16; |
1856 |
} else if ((imm & 0x00ffffff)==0x00ffffff) { |
1857 |
instr[0] = 0x80; |
1858 |
*((uint32 *)&instr[2]) = (uint32)disp+3; |
1859 |
instr[6] = imm >> 24; |
1860 |
} else { |
1861 |
instr[0] = 0x81; |
1862 |
*((uint32 *)&instr[2]) = (uint32)disp; |
1863 |
*((uint32 *)&instr[6]) = imm; |
1864 |
jitcEmit(instr, 10); |
1865 |
return; |
1866 |
} |
1867 |
jitcEmit(instr, 7); |
1868 |
} |
1869 |
void FASTCALL asmANDDMemImm(uint32 disp, uint32 imm) |
1870 |
{ |
1871 |
asmAND((const void *)disp, imm); |
1872 |
} |
1873 |
|
1874 |
void FASTCALL asmOR(const void *disp, uint32 imm) |
1875 |
{ |
1876 |
byte instr[15]; |
1877 |
instr[1] = 0x0d; |
1878 |
if (!(imm & 0xffffff00)) { |
1879 |
instr[0] = 0x80; |
1880 |
*((uint32 *)&instr[2]) = (uint32)disp; |
1881 |
instr[6] = imm; |
1882 |
} else if (!(imm & 0xffff00ff)) { |
1883 |
instr[0] = 0x80; |
1884 |
*((uint32 *)&instr[2]) = (uint32)disp+1; |
1885 |
instr[6] = imm >> 8; |
1886 |
} else if (!(imm & 0xff00ffff)) { |
1887 |
instr[0] = 0x80; |
1888 |
*((uint32 *)&instr[2]) = (uint32)disp+2; |
1889 |
instr[6] = imm >> 16; |
1890 |
} else if (!(imm & 0x00ffffff)) { |
1891 |
instr[0] = 0x80; |
1892 |
*((uint32 *)&instr[2]) = (uint32)disp+3; |
1893 |
instr[6] = imm >> 24; |
1894 |
} else { |
1895 |
instr[0] = 0x81; |
1896 |
*((uint32 *)&instr[2]) = (uint32)disp; |
1897 |
*((uint32 *)&instr[6]) = imm; |
1898 |
jitcEmit(instr, 10); |
1899 |
return; |
1900 |
} |
1901 |
jitcEmit(instr, 7); |
1902 |
} |
1903 |
void FASTCALL asmORDMemImm(uint32 disp, uint32 imm) |
1904 |
{ |
1905 |
asmOR((const void *)disp, imm); |
1906 |
} |
1907 |
|
1908 |
|
1909 |
void FASTCALL asmMOVxx(X86MOVxx opc, NativeReg reg1, NativeReg8 reg2) |
1910 |
{ |
1911 |
byte instr[3] = {0x0f, opc, 0xc0+(reg1<<3)+reg2}; |
1912 |
jitcEmit(instr, sizeof(instr)); |
1913 |
} |
1914 |
void FASTCALL asmMOVxxRegReg8(X86MOVxx opc, NativeReg reg1, NativeReg8 reg2) |
1915 |
{ |
1916 |
asmMOVxx(opc, reg1, reg2); |
1917 |
} |
1918 |
|
1919 |
void FASTCALL asmMOVxx(X86MOVxx opc, NativeReg reg1, NativeReg16 reg2) |
1920 |
{ |
1921 |
byte instr[3] = {0x0f, opc+1, 0xc0+(reg1<<3)+reg2}; |
1922 |
jitcEmit(instr, sizeof(instr)); |
1923 |
} |
1924 |
void FASTCALL asmMOVxxRegReg16(X86MOVxx opc, NativeReg reg1, NativeReg reg2) |
1925 |
{ |
1926 |
asmMOVxx(opc, reg1, (NativeReg16)reg2); |
1927 |
} |
1928 |
|
1929 |
void FASTCALL asmMOVxxRegMem8(X86MOVxx opc, NativeReg reg1, byte *modrm, int len) |
1930 |
{ |
1931 |
byte instr[16] = { 0x0f }; |
1932 |
|
1933 |
instr[1] = opc; |
1934 |
memcpy(&instr[2], modrm, len); |
1935 |
instr[2] |= (reg1 << 3); |
1936 |
|
1937 |
jitcEmit(instr, len+2); |
1938 |
} |
1939 |
void FASTCALL asmMOVxx_B(X86MOVxx opc, NativeReg reg1, modrm_p modrm) |
1940 |
{ |
1941 |
int len = modrm++[0]; |
1942 |
|
1943 |
asmMOVxxRegMem8(opc, reg1, modrm, len); |
1944 |
} |
1945 |
|
1946 |
void FASTCALL asmMOVxxRegMem16(X86MOVxx opc, NativeReg reg1, byte *modrm, int len) |
1947 |
{ |
1948 |
byte instr[16] = { 0x0f }; |
1949 |
|
1950 |
instr[1] = opc+1; |
1951 |
memcpy(&instr[2], modrm, len); |
1952 |
instr[2] |= (reg1 << 3); |
1953 |
|
1954 |
jitcEmit(instr, len+2); |
1955 |
} |
1956 |
void FASTCALL asmMOVxx_W(X86MOVxx opc, NativeReg reg1, modrm_p modrm) |
1957 |
{ |
1958 |
int len = modrm++[0]; |
1959 |
|
1960 |
asmMOVxxRegMem16(opc, reg1, modrm, len); |
1961 |
} |
1962 |
|
1963 |
void FASTCALL asmSET(X86FlagTest flags, NativeReg8 reg1) |
1964 |
{ |
1965 |
byte instr[3] = {0x0f, 0x90+flags, 0xc0+reg1}; |
1966 |
jitcEmit(instr, sizeof(instr)); |
1967 |
} |
1968 |
void FASTCALL asmSETReg8(X86FlagTest flags, NativeReg8 reg1) |
1969 |
{ |
1970 |
asmSET(flags, reg1); |
1971 |
} |
1972 |
|
1973 |
void FASTCALL asmSETMem(X86FlagTest flags, byte *modrm, int len) |
1974 |
{ |
1975 |
byte instr[15]; |
1976 |
instr[0] = 0x0f; |
1977 |
instr[1] = 0x90+flags; |
1978 |
memcpy(instr+2, modrm, len); |
1979 |
jitcEmit(instr, len+2); |
1980 |
} |
1981 |
void FASTCALL asmSET(X86FlagTest flags, modrm_p modrm) |
1982 |
{ |
1983 |
int len = modrm++[0]; |
1984 |
|
1985 |
asmSETMem(flags, modrm, len); |
1986 |
} |
1987 |
|
1988 |
void FASTCALL asmCMOV(X86FlagTest flags, NativeReg reg1, NativeReg reg2) |
1989 |
{ |
1990 |
if (gJITC.hostCPUCaps.cmov) { |
1991 |
byte instr[3] = {0x0f, 0x40+flags, 0xc0+(reg1<<3)+reg2}; |
1992 |
jitcEmit(instr, sizeof(instr)); |
1993 |
} else { |
1994 |
byte instr[4] = { |
1995 |
0x70+(flags ^ 1), 0x02, // jnCC $+2 |
1996 |
0x8b, 0xc0+(reg1<<3)+reg2, // mov reg1, reg2 |
1997 |
}; |
1998 |
jitcEmit(instr, sizeof instr); |
1999 |
} |
2000 |
} |
2001 |
void FASTCALL asmCMOVRegReg(X86FlagTest flags, NativeReg reg1, NativeReg reg2) |
2002 |
{ |
2003 |
asmCMOV(flags, reg1, reg2); |
2004 |
} |
2005 |
|
2006 |
void FASTCALL asmCMOVRegMem(X86FlagTest flags, NativeReg reg1, byte *modrm, int len) |
2007 |
{ |
2008 |
if (gJITC.hostCPUCaps.cmov) { |
2009 |
byte instr[16] = {0x0f, 0x40+flags }; |
2010 |
memcpy(&instr[2], modrm, len); |
2011 |
instr[2] |= (reg1<<3); |
2012 |
jitcEmit(instr, len+2); |
2013 |
} else { |
2014 |
byte instr[17] = { |
2015 |
0x70+(flags ^ 1), 1 + len, // jnCC $+2 |
2016 |
0x8b, // mov reg1, * |
2017 |
}; |
2018 |
memcpy(&instr[3], modrm, len); |
2019 |
instr[3] |= (reg1<<3); |
2020 |
jitcEmit(instr, len+3); |
2021 |
} |
2022 |
} |
2023 |
void FASTCALL asmCMOV(X86FlagTest flags, NativeReg reg1, modrm_p modrm) |
2024 |
{ |
2025 |
int len = modrm++[0]; |
2026 |
|
2027 |
asmCMOVRegMem(flags, reg1, modrm, len); |
2028 |
} |
2029 |
|
2030 |
void FASTCALL asmShift(X86ShiftOpc opc, NativeReg reg1, uint32 imm) |
2031 |
{ |
2032 |
if (imm == 1) { |
2033 |
byte instr[2] = {0xd1, 0xc0+opc+reg1}; |
2034 |
jitcEmit(instr, sizeof(instr)); |
2035 |
} else { |
2036 |
byte instr[3] = {0xc1, 0xc0+opc+reg1, imm}; |
2037 |
jitcEmit(instr, sizeof(instr)); |
2038 |
} |
2039 |
} |
2040 |
void FASTCALL asmShiftRegImm(X86ShiftOpc opc, NativeReg reg1, uint32 imm) |
2041 |
{ |
2042 |
asmShift(opc, reg1, imm); |
2043 |
} |
2044 |
|
2045 |
void FASTCALL asmShift_CL(X86ShiftOpc opc, NativeReg reg1) |
2046 |
{ |
2047 |
// 0xd3 [ModR/M] |
2048 |
byte instr[2] = {0xd3, 0xc0+opc+reg1}; |
2049 |
jitcEmit(instr, sizeof(instr)); |
2050 |
} |
2051 |
void FASTCALL asmShiftRegCL(X86ShiftOpc opc, NativeReg reg1) |
2052 |
{ |
2053 |
asmShift_CL(opc, reg1); |
2054 |
} |
2055 |
|
2056 |
void FASTCALL asmShift(X86ShiftOpc opc, NativeReg16 reg1, uint32 imm) |
2057 |
{ |
2058 |
if (imm == 1) { |
2059 |
byte instr[3] = {0x66, 0xd1, 0xc0+opc+reg1}; |
2060 |
jitcEmit(instr, sizeof(instr)); |
2061 |
} else { |
2062 |
byte instr[4] = {0x66, 0xc1, 0xc0+opc+reg1, imm}; |
2063 |
jitcEmit(instr, sizeof(instr)); |
2064 |
} |
2065 |
} |
2066 |
void FASTCALL asmShiftReg16Imm(X86ShiftOpc opc, NativeReg reg1, uint32 imm) |
2067 |
{ |
2068 |
asmShift(opc, (NativeReg16)reg1, imm); |
2069 |
} |
2070 |
|
2071 |
void FASTCALL asmShift_CL(X86ShiftOpc opc, NativeReg16 reg1) |
2072 |
{ |
2073 |
// 0xd3 [ModR/M] |
2074 |
byte instr[3] = {0x66, 0xd3, 0xc0+opc+reg1}; |
2075 |
jitcEmit(instr, sizeof(instr)); |
2076 |
} |
2077 |
void FASTCALL asmShiftReg16CL(X86ShiftOpc opc, NativeReg reg1) |
2078 |
{ |
2079 |
asmShift_CL(opc, (NativeReg16)reg1); |
2080 |
} |
2081 |
|
2082 |
void FASTCALL asmShift(X86ShiftOpc opc, NativeReg8 reg1, uint32 imm) |
2083 |
{ |
2084 |
if (imm == 1) { |
2085 |
byte instr[2] = {0xd0, 0xc0+opc+reg1}; |
2086 |
jitcEmit(instr, sizeof(instr)); |
2087 |
} else { |
2088 |
byte instr[3] = {0xc0, 0xc0+opc+reg1, imm}; |
2089 |
jitcEmit(instr, sizeof(instr)); |
2090 |
} |
2091 |
} |
2092 |
void FASTCALL asmShiftReg8Imm(X86ShiftOpc opc, NativeReg8 reg1, uint32 imm) |
2093 |
{ |
2094 |
asmShift(opc, reg1, imm); |
2095 |
} |
2096 |
|
2097 |
void FASTCALL asmShift_CL(X86ShiftOpc opc, NativeReg8 reg1) |
2098 |
{ |
2099 |
// 0xd3 [ModR/M] |
2100 |
byte instr[2] = {0xd2, 0xc0+opc+reg1}; |
2101 |
jitcEmit(instr, sizeof(instr)); |
2102 |
} |
2103 |
void FASTCALL asmShiftReg8CL(X86ShiftOpc opc, NativeReg8 reg1) |
2104 |
{ |
2105 |
asmShift_CL(opc, reg1); |
2106 |
} |
2107 |
|
2108 |
void FASTCALL asmIMUL(NativeReg reg1, NativeReg reg2, uint32 imm) |
2109 |
{ |
2110 |
if (imm <= 0x7f || imm >= 0xffffff80) { |
2111 |
byte instr[3] = {0x6b, 0xc0+(reg1<<3)+reg2, imm}; |
2112 |
jitcEmit(instr, sizeof(instr)); |
2113 |
} else { |
2114 |
byte instr[6] = {0x69, 0xc0+(reg1<<3)+reg2}; |
2115 |
*((uint32*)(&instr[2])) = imm; |
2116 |
jitcEmit(instr, sizeof(instr)); |
2117 |
} |
2118 |
} |
2119 |
|
2120 |
void FASTCALL asmIMUL(NativeReg reg1, NativeReg reg2) |
2121 |
{ |
2122 |
byte instr[3] = {0x0f, 0xaf, 0xc0+(reg1<<3)+reg2}; |
2123 |
jitcEmit(instr, sizeof(instr)); |
2124 |
} |
2125 |
|
2126 |
void FASTCALL asmIMULRegRegImm(NativeReg reg1, NativeReg reg2, uint32 imm) |
2127 |
{ |
2128 |
asmIMUL(reg1, reg2, imm); |
2129 |
} |
2130 |
|
2131 |
void FASTCALL asmIMULRegReg(NativeReg reg1, NativeReg reg2) |
2132 |
{ |
2133 |
asmIMUL(reg1, reg2); |
2134 |
} |
2135 |
|
2136 |
void FASTCALL asmINC(NativeReg reg1) |
2137 |
{ |
2138 |
jitcEmit1(0x40+reg1); |
2139 |
} |
2140 |
void FASTCALL asmINCReg(NativeReg reg1) |
2141 |
{ |
2142 |
asmINC(reg1); |
2143 |
} |
2144 |
|
2145 |
void FASTCALL asmDECReg(NativeReg reg1) |
2146 |
{ |
2147 |
jitcEmit1(0x48+reg1); |
2148 |
} |
2149 |
void FASTCALL asmDEC(NativeReg reg1) |
2150 |
{ |
2151 |
asmDEC(reg1); |
2152 |
} |
2153 |
|
2154 |
void FASTCALL asmLEA(NativeReg reg1, byte *modrm, int len) |
2155 |
{ |
2156 |
byte instr[15]; |
2157 |
instr[0] = 0x8d; |
2158 |
memcpy(instr+1, modrm, len); |
2159 |
instr[1] |= reg1<<3; |
2160 |
jitcEmit(instr, len+1); |
2161 |
} |
2162 |
void FASTCALL asmLEA(NativeReg reg1, modrm_p modrm) |
2163 |
{ |
2164 |
int len = modrm++[0]; |
2165 |
|
2166 |
asmLEA(reg1, modrm, len); |
2167 |
} |
2168 |
|
2169 |
void FASTCALL asmBTx(X86BitTest opc, NativeReg reg1, int value) |
2170 |
{ |
2171 |
byte instr[4] = {0x0f, 0xba, 0xc0+(opc<<3)+reg1, value}; |
2172 |
jitcEmit(instr, sizeof instr); |
2173 |
} |
2174 |
void FASTCALL asmBTxRegImm(X86BitTest opc, NativeReg reg1, int value) |
2175 |
{ |
2176 |
asmBTx(opc, reg1, value); |
2177 |
} |
2178 |
|
2179 |
void FASTCALL asmBTxMemImm(X86BitTest opc, byte *modrm, int len, int value) |
2180 |
{ |
2181 |
byte instr[15]; |
2182 |
instr[0] = 0x0f; |
2183 |
instr[1] = 0xba; |
2184 |
memcpy(instr+2, modrm, len); |
2185 |
instr[2] |= opc<<3; |
2186 |
instr[len+2] = value; |
2187 |
jitcEmit(instr, len+3); |
2188 |
} |
2189 |
void FASTCALL asmBTx(X86BitTest opc, modrm_p modrm, int value) |
2190 |
{ |
2191 |
int len = modrm++[0]; |
2192 |
|
2193 |
asmBTxMemImm(opc, modrm, len, value); |
2194 |
} |
2195 |
|
2196 |
void FASTCALL asmBSx(X86BitSearch opc, NativeReg reg1, NativeReg reg2) |
2197 |
{ |
2198 |
byte instr[3] = {0x0f, opc, 0xc0+(reg1<<3)+reg2}; |
2199 |
jitcEmit(instr, sizeof(instr)); |
2200 |
} |
2201 |
void FASTCALL asmBSxRegReg(X86BitSearch opc, NativeReg reg1, NativeReg reg2) |
2202 |
{ |
2203 |
asmBSx(opc, reg1, reg2); |
2204 |
} |
2205 |
|
2206 |
void FASTCALL asmBSWAP(NativeReg reg) |
2207 |
{ |
2208 |
byte instr[2]; |
2209 |
instr[0] = 0x0f; |
2210 |
instr[1] = 0xc8+reg; |
2211 |
jitcEmit(instr, sizeof(instr)); |
2212 |
} |
2213 |
|
2214 |
void FASTCALL asmJMP(NativeAddress to) |
2215 |
{ |
2216 |
/* |
2217 |
* We use jitcEmitAssure here, since |
2218 |
* we have to know the exact address of the jump |
2219 |
* instruction (since it is relative) |
2220 |
*/ |
2221 |
restart: |
2222 |
byte instr[5]; |
2223 |
uint32 rel = (uint32)(to - (gJITC.currentPage->tcp+2)); |
2224 |
if (rel <= 0x7f || rel >= 0xffffff80) { |
2225 |
if (!jitcEmitAssure(2)) goto restart; |
2226 |
instr[0] = 0xeb; |
2227 |
instr[1] = rel; |
2228 |
jitcEmit(instr, 2); |
2229 |
} else { |
2230 |
if (!jitcEmitAssure(5)) goto restart; |
2231 |
instr[0] = 0xe9; |
2232 |
*((uint32 *)&instr[1]) = (uint32)(to - (gJITC.currentPage->tcp+5)); |
2233 |
// *((uint32 *)&instr[1]) = rel - 3; |
2234 |
jitcEmit(instr, 5); |
2235 |
} |
2236 |
} |
2237 |
|
2238 |
void FASTCALL asmJxx(X86FlagTest flags, NativeAddress to) |
2239 |
{ |
2240 |
restart: |
2241 |
byte instr[6]; |
2242 |
uint32 rel = (uint32)(to - (gJITC.currentPage->tcp+2)); |
2243 |
if (rel <= 0x7f || rel >= 0xffffff80) { |
2244 |
if (!jitcEmitAssure(2)) goto restart; |
2245 |
instr[0] = 0x70+flags; |
2246 |
instr[1] = rel; |
2247 |
jitcEmit(instr, 2); |
2248 |
} else { |
2249 |
if (!jitcEmitAssure(6)) goto restart; |
2250 |
instr[0] = 0x0f; |
2251 |
instr[1] = 0x80+flags; |
2252 |
*((uint32 *)&instr[2]) = (uint32)(to - (gJITC.currentPage->tcp+6)); |
2253 |
// *((uint32 *)&instr[2]) = rel - 3; |
2254 |
jitcEmit(instr, 6); |
2255 |
} |
2256 |
} |
2257 |
|
2258 |
NativeAddress FASTCALL asmJMPFixup() |
2259 |
{ |
2260 |
byte instr[5]; |
2261 |
instr[0] = 0xe9; |
2262 |
jitcEmit(instr, 5); |
2263 |
return gJITC.currentPage->tcp - 4; |
2264 |
} |
2265 |
|
2266 |
NativeAddress FASTCALL asmJxxFixup(X86FlagTest flags) |
2267 |
{ |
2268 |
byte instr[6]; |
2269 |
instr[0] = 0x0f; |
2270 |
instr[1] = 0x80+flags; |
2271 |
jitcEmit(instr, 6); |
2272 |
return gJITC.currentPage->tcp - 4; |
2273 |
} |
2274 |
|
2275 |
void FASTCALL asmResolveFixup(NativeAddress at, NativeAddress to) |
2276 |
{ |
2277 |
/* |
2278 |
* yes, I also didn't believe this could be real code until |
2279 |
* I had written it. -Sebastian |
2280 |
*/ |
2281 |
if (to == 0) { |
2282 |
to = gJITC.currentPage->tcp; |
2283 |
} |
2284 |
*((uint32 *)at) = (uint32)(to - ((uint32)at+4)); |
2285 |
} |
2286 |
|
2287 |
void FASTCALL asmCALL(NativeAddress to) |
2288 |
{ |
2289 |
jitcEmitAssure(5); |
2290 |
byte instr[5]; |
2291 |
instr[0] = 0xe8; |
2292 |
*((uint32 *)&instr[1]) = (uint32)(to - (gJITC.currentPage->tcp+5)); |
2293 |
jitcEmit(instr, 5); |
2294 |
} |
2295 |
|
2296 |
void FASTCALL asmSimple(X86SimpleOpc simple) |
2297 |
{ |
2298 |
if (simple > 0xff) { |
2299 |
jitcEmit((byte*)&simple, 2); |
2300 |
} else { |
2301 |
jitcEmit1(simple); |
2302 |
} |
2303 |
} |
2304 |
|
2305 |
void FASTCALL asmFComp(X86FloatCompOp op, NativeFloatReg sti) |
2306 |
{ |
2307 |
byte instr[2]; |
2308 |
|
2309 |
memcpy(instr, &op, 2); |
2310 |
instr[1] += sti; |
2311 |
|
2312 |
jitcEmit(instr, 2); |
2313 |
} |
2314 |
void FASTCALL asmFCompSTi(X86FloatCompOp op, NativeFloatReg sti) |
2315 |
{ |
2316 |
asmFComp(op, sti); |
2317 |
} |
2318 |
|
2319 |
void FASTCALL asmFICompMem(X86FloatICompOp op, byte *modrm, int len) |
2320 |
{ |
2321 |
byte instr[16]; |
2322 |
|
2323 |
instr[0] = op; |
2324 |
memcpy(&instr[1], modrm, len); |
2325 |
instr[1] |= 2<<3; |
2326 |
jitcEmit(instr, len+1); |
2327 |
} |
2328 |
void FASTCALL asmFIComp(X86FloatICompOp op, modrm_p modrm) |
2329 |
{ |
2330 |
int len = modrm++[0]; |
2331 |
|
2332 |
asmFICompMem(op, modrm, len); |
2333 |
} |
2334 |
|
2335 |
void FASTCALL asmFICompPMem(X86FloatICompOp op, byte *modrm, int len) |
2336 |
{ |
2337 |
byte instr[16]; |
2338 |
|
2339 |
instr[0] = op; |
2340 |
memcpy(&instr[1], modrm, len); |
2341 |
instr[1] |= 3<<3; |
2342 |
jitcEmit(instr, len+1); |
2343 |
} |
2344 |
void FASTCALL asmFICompP(X86FloatICompOp op, modrm_p modrm) |
2345 |
{ |
2346 |
int len = modrm++[0]; |
2347 |
|
2348 |
asmFICompPMem(op, modrm, len); |
2349 |
} |
2350 |
|
2351 |
void FASTCALL asmFArithMem(X86FloatArithOp op, byte *modrm, int len) |
2352 |
{ |
2353 |
int mod = 0; |
2354 |
switch (op) { |
2355 |
case X86_FADD: |
2356 |
mod = 0; |
2357 |
break; |
2358 |
case X86_FMUL: |
2359 |
mod = 1; |
2360 |
break; |
2361 |
case X86_FDIV: |
2362 |
mod = 6; |
2363 |
break; |
2364 |
case X86_FDIVR: |
2365 |
mod = 7; |
2366 |
break; |
2367 |
case X86_FSUB: |
2368 |
mod = 4; |
2369 |
break; |
2370 |
case X86_FSUBR: |
2371 |
mod = 5; |
2372 |
break; |
2373 |
} |
2374 |
byte instr[15]; |
2375 |
instr[0] = 0xdc; |
2376 |
memcpy(instr+1, modrm, len); |
2377 |
instr[1] |= mod<<3; |
2378 |
jitcEmit(instr, len+1); |
2379 |
} |
2380 |
void FASTCALL asmFArith(X86FloatArithOp op, modrm_p modrm) |
2381 |
{ |
2382 |
int len = modrm++[0]; |
2383 |
|
2384 |
asmFArithMem(op, modrm, len); |
2385 |
} |
2386 |
|
2387 |
void FASTCALL asmFArith_ST0(X86FloatArithOp op, NativeFloatReg sti) |
2388 |
{ |
2389 |
byte instr[2] = {0xd8, op+sti}; |
2390 |
jitcEmit(instr, sizeof instr); |
2391 |
} |
2392 |
void FASTCALL asmFArithST0(X86FloatArithOp op, NativeFloatReg sti) |
2393 |
{ |
2394 |
asmFArith_ST0(op, sti); |
2395 |
} |
2396 |
|
2397 |
void FASTCALL asmFArith_STi(X86FloatArithOp op, NativeFloatReg sti) |
2398 |
{ |
2399 |
byte instr[2] = {0xdc, op+sti}; |
2400 |
jitcEmit(instr, sizeof instr); |
2401 |
} |
2402 |
void FASTCALL asmFArithSTi(X86FloatArithOp op, NativeFloatReg sti) |
2403 |
{ |
2404 |
asmFArith_STi(op, sti); |
2405 |
} |
2406 |
|
2407 |
void FASTCALL asmFArithP_STi(X86FloatArithOp op, NativeFloatReg sti) |
2408 |
{ |
2409 |
byte instr[2] = {0xde, op+sti}; |
2410 |
jitcEmit(instr, sizeof instr); |
2411 |
} |
2412 |
void FASTCALL asmFArithSTiP(X86FloatArithOp op, NativeFloatReg sti) |
2413 |
{ |
2414 |
asmFArithP_STi(op, sti); |
2415 |
} |
2416 |
|
2417 |
void FASTCALL asmFXCH(NativeFloatReg sti) |
2418 |
{ |
2419 |
byte instr[2] = {0xd9, 0xc8+sti}; |
2420 |
jitcEmit(instr, sizeof instr); |
2421 |
} |
2422 |
void FASTCALL asmFXCHSTi(NativeFloatReg sti) |
2423 |
{ |
2424 |
asmFXCH(sti); |
2425 |
} |
2426 |
|
2427 |
void FASTCALL asmFFREE(NativeFloatReg sti) |
2428 |
{ |
2429 |
byte instr[2] = {0xdd, 0xc0+sti}; |
2430 |
jitcEmit(instr, sizeof instr); |
2431 |
} |
2432 |
void FASTCALL asmFFREESTi(NativeFloatReg sti) |
2433 |
{ |
2434 |
asmFFREE(sti); |
2435 |
} |
2436 |
|
2437 |
void FASTCALL asmFFREEP(NativeFloatReg sti) |
2438 |
{ |
2439 |
/* |
2440 |
* AMD says: |
2441 |
* "Note that the FREEP instructions, although insufficiently |
2442 |
* documented in the past, is supported by all 32-bit x86 processors." |
2443 |
*/ |
2444 |
byte instr[2] = {0xdf, 0xc0+sti}; |
2445 |
jitcEmit(instr, sizeof instr); |
2446 |
} |
2447 |
void FASTCALL asmFFREEPSTi(NativeFloatReg sti) |
2448 |
{ |
2449 |
asmFFREEP(sti); |
2450 |
} |
2451 |
|
2452 |
void FASTCALL asmFSimple(X86FloatOp op) |
2453 |
{ |
2454 |
jitcEmit((byte*)&op, 2); |
2455 |
} |
2456 |
void FASTCALL asmFSimpleST0(X86FloatOp op) |
2457 |
{ |
2458 |
asmFSimple(op); |
2459 |
} |
2460 |
|
2461 |
void FASTCALL asmFLDSingleMem(byte *modrm, int len) |
2462 |
{ |
2463 |
byte instr[15]; |
2464 |
instr[0] = 0xd9; |
2465 |
memcpy(instr+1, modrm, len); |
2466 |
jitcEmit(instr, len+1); |
2467 |
} |
2468 |
void FASTCALL asmFLD_Single(modrm_p modrm) |
2469 |
{ |
2470 |
int len = modrm++[0]; |
2471 |
|
2472 |
asmFLDSingleMem(modrm, len); |
2473 |
} |
2474 |
|
2475 |
void FASTCALL asmFLDDoubleMem(byte *modrm, int len) |
2476 |
{ |
2477 |
byte instr[15]; |
2478 |
instr[0] = 0xdd; |
2479 |
memcpy(instr+1, modrm, len); |
2480 |
jitcEmit(instr, len+1); |
2481 |
} |
2482 |
void FASTCALL asmFLD_Double(modrm_p modrm) |
2483 |
{ |
2484 |
int len = modrm++[0]; |
2485 |
|
2486 |
asmFLDDoubleMem(modrm, len); |
2487 |
} |
2488 |
|
2489 |
void FASTCALL asmFLD(NativeFloatReg sti) |
2490 |
{ |
2491 |
byte instr[2] = {0xd9, 0xc0+sti}; |
2492 |
jitcEmit(instr, sizeof instr); |
2493 |
} |
2494 |
void FASTCALL asmFLDSTi(NativeFloatReg sti) |
2495 |
{ |
2496 |
asmFLD(sti); |
2497 |
} |
2498 |
|
2499 |
void FASTCALL asmFILD16(byte *modrm, int len) |
2500 |
{ |
2501 |
byte instr[15]; |
2502 |
instr[0] = 0xdf; |
2503 |
memcpy(instr+1, modrm, len); |
2504 |
jitcEmit(instr, len+1); |
2505 |
} |
2506 |
void FASTCALL asmFILD_W(modrm_p modrm) |
2507 |
{ |
2508 |
int len = modrm++[0]; |
2509 |
|
2510 |
asmFILD16(modrm, len); |
2511 |
} |
2512 |
|
2513 |
void FASTCALL asmFILD(byte *modrm, int len) |
2514 |
{ |
2515 |
byte instr[15]; |
2516 |
instr[0] = 0xdb; |
2517 |
memcpy(instr+1, modrm, len); |
2518 |
jitcEmit(instr, len+1); |
2519 |
} |
2520 |
void FASTCALL asmFILD_D(modrm_p modrm) |
2521 |
{ |
2522 |
int len = modrm++[0]; |
2523 |
|
2524 |
asmFILD(modrm, len); |
2525 |
} |
2526 |
|
2527 |
void FASTCALL asmFILD_Q(modrm_p modrm) |
2528 |
{ |
2529 |
byte instr[15]; |
2530 |
instr[0] = 0xdf; |
2531 |
memcpy(instr+1, modrm+1, modrm[0]); |
2532 |
instr[1] |= 5<<3; |
2533 |
jitcEmit(instr, modrm[0]+1); |
2534 |
} |
2535 |
|
2536 |
void FASTCALL asmFSTSingleMem(byte *modrm, int len) |
2537 |
{ |
2538 |
byte instr[15]; |
2539 |
instr[0] = 0xd9; |
2540 |
memcpy(instr+1, modrm, len); |
2541 |
instr[1] |= 2<<3; |
2542 |
jitcEmit(instr, len+1); |
2543 |
} |
2544 |
void FASTCALL asmFST_Single(modrm_p modrm) |
2545 |
{ |
2546 |
int len = modrm++[0]; |
2547 |
|
2548 |
asmFSTSingleMem(modrm, len); |
2549 |
} |
2550 |
|
2551 |
void FASTCALL asmFSTPSingleMem(byte *modrm, int len) |
2552 |
{ |
2553 |
byte instr[15]; |
2554 |
instr[0] = 0xd9; |
2555 |
memcpy(instr+1, modrm, len); |
2556 |
instr[1] |= 3<<3; |
2557 |
jitcEmit(instr, len+1); |
2558 |
} |
2559 |
void FASTCALL asmFSTP_Single(modrm_p modrm) |
2560 |
{ |
2561 |
int len = modrm++[0]; |
2562 |
|
2563 |
asmFSTPSingleMem(modrm, len); |
2564 |
} |
2565 |
|
2566 |
void FASTCALL asmFSTDoubleMem(byte *modrm, int len) |
2567 |
{ |
2568 |
byte instr[15]; |
2569 |
instr[0] = 0xdd; |
2570 |
memcpy(instr+1, modrm, len); |
2571 |
instr[1] |= 2<<3; |
2572 |
jitcEmit(instr, len+1); |
2573 |
} |
2574 |
void FASTCALL asmFST_Double(modrm_p modrm) |
2575 |
{ |
2576 |
int len = modrm++[0]; |
2577 |
|
2578 |
asmFSTDoubleMem(modrm, len); |
2579 |
} |
2580 |
|
2581 |
void FASTCALL asmFSTPDoubleMem(byte *modrm, int len) |
2582 |
{ |
2583 |
byte instr[15]; |
2584 |
instr[0] = 0xdd; |
2585 |
memcpy(instr+1, modrm, len); |
2586 |
instr[1] |= 3<<3; |
2587 |
jitcEmit(instr, len+1); |
2588 |
} |
2589 |
void FASTCALL asmFSTP_Double(modrm_p modrm) |
2590 |
{ |
2591 |
int len = modrm++[0]; |
2592 |
|
2593 |
asmFSTPDoubleMem(modrm, len); |
2594 |
} |
2595 |
|
2596 |
void FASTCALL asmFST(NativeFloatReg sti) |
2597 |
{ |
2598 |
byte instr[2] = {0xdd, 0xd0+sti}; |
2599 |
jitcEmit(instr, sizeof instr); |
2600 |
} |
2601 |
void FASTCALL asmFSTDSTi(NativeFloatReg sti) |
2602 |
{ |
2603 |
asmFST(sti); |
2604 |
} |
2605 |
|
2606 |
void FASTCALL asmFSTP(NativeFloatReg sti) |
2607 |
{ |
2608 |
byte instr[2] = {0xdd, 0xd8+sti}; |
2609 |
jitcEmit(instr, sizeof instr); |
2610 |
} |
2611 |
void FASTCALL asmFSTDPSTi(NativeFloatReg sti) |
2612 |
{ |
2613 |
asmFSTP(sti); |
2614 |
} |
2615 |
|
2616 |
void FASTCALL asmFISTP_W(modrm_p modrm) |
2617 |
{ |
2618 |
byte instr[15]; |
2619 |
instr[0] = 0xdf; |
2620 |
memcpy(instr+1, modrm+1, modrm[0]); |
2621 |
instr[1] |= 3<<3; |
2622 |
jitcEmit(instr, modrm[0]+1); |
2623 |
} |
2624 |
|
2625 |
void FASTCALL asmFISTPMem(byte *modrm, int len) |
2626 |
{ |
2627 |
byte instr[15]; |
2628 |
instr[0] = 0xdb; |
2629 |
memcpy(instr+1, modrm, len); |
2630 |
instr[1] |= 3<<3; |
2631 |
jitcEmit(instr, len+1); |
2632 |
} |
2633 |
void FASTCALL asmFISTP_D(modrm_p modrm) |
2634 |
{ |
2635 |
int len = modrm++[0]; |
2636 |
|
2637 |
asmFISTPMem(modrm, len); |
2638 |
} |
2639 |
|
2640 |
void FASTCALL asmFISTPMem64(byte *modrm, int len) |
2641 |
{ |
2642 |
byte instr[15]; |
2643 |
instr[0] = 0xdf; |
2644 |
memcpy(instr+1, modrm, len); |
2645 |
instr[1] |= 7<<3; |
2646 |
jitcEmit(instr, len+1); |
2647 |
} |
2648 |
void FASTCALL asmFISTP_Q(modrm_p modrm) |
2649 |
{ |
2650 |
int len = modrm++[0]; |
2651 |
|
2652 |
asmFISTPMem64(modrm, len); |
2653 |
} |
2654 |
|
2655 |
void FASTCALL asmFISTTPMem(byte *modrm, int len) |
2656 |
{ |
2657 |
byte instr[15]; |
2658 |
instr[0] = 0xdb; |
2659 |
memcpy(instr+1, modrm, len); |
2660 |
instr[1] |= 1<<3; |
2661 |
jitcEmit(instr, len+1); |
2662 |
} |
2663 |
void FASTCALL asmFISTTP(modrm_p modrm) |
2664 |
{ |
2665 |
int len = modrm++[0]; |
2666 |
|
2667 |
asmFISTTPMem(modrm, len); |
2668 |
} |
2669 |
|
2670 |
void FASTCALL asmFLDCWMem(byte *modrm, int len) |
2671 |
{ |
2672 |
byte instr[15]; |
2673 |
instr[0] = 0xd9; |
2674 |
memcpy(instr+1, modrm, len); |
2675 |
instr[1] |= 5<<3; |
2676 |
jitcEmit(instr, len+1); |
2677 |
} |
2678 |
void FASTCALL asmFLDCW(modrm_p modrm) |
2679 |
{ |
2680 |
int len = modrm++[0]; |
2681 |
|
2682 |
asmFLDCWMem(modrm, len); |
2683 |
} |
2684 |
|
2685 |
void FASTCALL asmFSTCWMem(byte *modrm, int len) |
2686 |
{ |
2687 |
byte instr[15]; |
2688 |
instr[0] = 0xd9; |
2689 |
memcpy(instr+1, modrm, len); |
2690 |
instr[1] |= 7<<3; |
2691 |
jitcEmit(instr, len+1); |
2692 |
} |
2693 |
void FASTCALL asmFSTCW(modrm_p modrm) |
2694 |
{ |
2695 |
int len = modrm++[0]; |
2696 |
|
2697 |
asmFSTCWMem(modrm, len); |
2698 |
} |
2699 |
|
2700 |
void FASTCALL asmFSTSWMem(byte *modrm, int len) |
2701 |
{ |
2702 |
byte instr[15]; |
2703 |
instr[0] = 0xdd; |
2704 |
memcpy(instr+1, modrm, len); |
2705 |
instr[1] |= 7<<3; |
2706 |
jitcEmit(instr, len+1); |
2707 |
} |
2708 |
void FASTCALL asmFSTSW(modrm_p modrm) |
2709 |
{ |
2710 |
int len = modrm++[0]; |
2711 |
|
2712 |
asmFSTSWMem(modrm, len); |
2713 |
} |
2714 |
|
2715 |
void FASTCALL asmFSTSW_EAX(void) |
2716 |
{ |
2717 |
byte instr[15] = { 0xdf, 0xe0 }; |
2718 |
jitcEmit(instr, 2); |
2719 |
} |
2720 |
|
2721 |
/* |
2722 |
* Maps one client vector register to one native vector register |
2723 |
* Will never emit any code. |
2724 |
*/ |
2725 |
static inline void FASTCALL jitcMapVectorRegister(NativeVectorReg nreg, JitcVectorReg creg) |
2726 |
{ |
2727 |
//printf("*** map: XMM%u (vr%u)\n", nreg, creg); |
2728 |
gJITC.n2cVectorReg[nreg] = creg; |
2729 |
gJITC.c2nVectorReg[creg] = nreg; |
2730 |
|
2731 |
gJITC.nativeVectorRegState[nreg] = rsMapped; |
2732 |
} |
2733 |
|
2734 |
/* |
2735 |
* Unmaps the native vector register from any client vector register |
2736 |
* Will never emit any code. |
2737 |
*/ |
2738 |
static inline void FASTCALL jitcUnmapVectorRegister(NativeVectorReg nreg) |
2739 |
{ |
2740 |
JitcVectorReg creg = gJITC.n2cVectorReg[nreg]; |
2741 |
|
2742 |
if (nreg != VECTREG_NO && creg != PPC_VECTREG_NO) { |
2743 |
//printf("*** unmap: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]); |
2744 |
|
2745 |
gJITC.n2cVectorReg[nreg] = PPC_VECTREG_NO; |
2746 |
gJITC.c2nVectorReg[creg] = VECTREG_NO; |
2747 |
|
2748 |
gJITC.nativeVectorRegState[nreg] = rsUnused; |
2749 |
} |
2750 |
} |
2751 |
|
2752 |
/* |
2753 |
* Marks the native vector register as dirty. |
2754 |
* Does *not* touch native vector register. |
2755 |
* Will not produce code. |
2756 |
*/ |
2757 |
void FASTCALL jitcDirtyVectorRegister(NativeVectorReg nreg) |
2758 |
{ |
2759 |
JitcVectorReg creg = gJITC.n2cVectorReg[nreg]; |
2760 |
|
2761 |
//printf("*** dirty(%u) with creg = %u\n", nreg, creg); |
2762 |
|
2763 |
if (creg == JITC_VECTOR_NEG1 || creg == PPC_VECTREG_NO) { |
2764 |
//printf("*** dirty: %u = %u or %u\n", creg, JITC_VECTOR_NEG1, PPC_REG_NO); |
2765 |
return; |
2766 |
} |
2767 |
|
2768 |
if (gJITC.nativeVectorRegState[nreg] == rsUnused) { |
2769 |
printf("!!! Attemped dirty of an anonymous vector register!\n"); |
2770 |
return; |
2771 |
} |
2772 |
|
2773 |
if (creg == gJITC.nativeVectorReg) { |
2774 |
gJITC.nativeVectorReg = VECTREG_NO; |
2775 |
} |
2776 |
|
2777 |
gJITC.nativeVectorRegState[nreg] = rsDirty; |
2778 |
} |
2779 |
|
2780 |
/* |
2781 |
* Marks the native vector register as non-dirty. |
2782 |
* Does *not* flush native vector register. |
2783 |
* Will not produce code. |
2784 |
*/ |
2785 |
static inline void FASTCALL jitcUndirtyVectorRegister(NativeVectorReg nreg) |
2786 |
{ |
2787 |
if (gJITC.nativeVectorRegState[nreg] > rsMapped) { |
2788 |
//printf("*** undirty: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]); |
2789 |
|
2790 |
gJITC.nativeVectorRegState[nreg] = rsMapped; |
2791 |
} |
2792 |
} |
2793 |
|
2794 |
/* |
2795 |
* Loads a native vector register with its mapped value. |
2796 |
* Does not alter the native vector register's markings. |
2797 |
* Will always emit an load. |
2798 |
*/ |
2799 |
static inline void FASTCALL jitcLoadVectorRegister(NativeVectorReg nreg) |
2800 |
{ |
2801 |
JitcVectorReg creg = gJITC.n2cVectorReg[nreg]; |
2802 |
|
2803 |
if (creg == JITC_VECTOR_NEG1 && gJITC.hostCPUCaps.sse2) { |
2804 |
//printf("*** load neg1: XMM%u\n", nreg); |
2805 |
|
2806 |
/* On a P4, we can load -1 far faster with logic */ |
2807 |
asmPALU(PALUD(X86_PCMPEQ), nreg, nreg); |
2808 |
return; |
2809 |
} |
2810 |
|
2811 |
//printf("*** load: XMM%u (vr%u)\n", nreg, creg); |
2812 |
asmMOVAPS(nreg, &gCPU.vr[creg]); |
2813 |
} |
2814 |
|
2815 |
/* |
2816 |
* Stores a native vector register to its mapped client vector register. |
2817 |
* Does not alter the native vector register's markings. |
2818 |
* Will always emit a store. |
2819 |
*/ |
2820 |
static inline void FASTCALL jitcStoreVectorRegister(NativeVectorReg nreg) |
2821 |
{ |
2822 |
JitcVectorReg creg = gJITC.n2cVectorReg[nreg]; |
2823 |
|
2824 |
if (creg == JITC_VECTOR_NEG1 || creg == PPC_VECTREG_NO) |
2825 |
return; |
2826 |
|
2827 |
//printf("*** store: XMM%u (vr%u)\n", nreg, creg); |
2828 |
|
2829 |
asmMOVAPS(&gCPU.vr[creg], nreg); |
2830 |
} |
2831 |
|
2832 |
/* |
2833 |
* Returns the native vector register that is mapped to the client |
2834 |
* vector register. |
2835 |
* Will never emit any code. |
2836 |
*/ |
2837 |
NativeVectorReg FASTCALL jitcGetClientVectorRegisterMapping(JitcVectorReg creg) |
2838 |
{ |
2839 |
return gJITC.c2nVectorReg[creg]; |
2840 |
} |
2841 |
|
2842 |
/* |
2843 |
* Makes the vector register the least recently used vector register. |
2844 |
* Will never emit any code. |
2845 |
*/ |
2846 |
static inline void FASTCALL jitcDiscardVectorRegister(NativeVectorReg nreg) |
2847 |
{ |
2848 |
NativeVectorReg lreg, mreg; |
2849 |
|
2850 |
mreg = gJITC.MRUvregs[nreg]; |
2851 |
lreg = gJITC.LRUvregs[nreg]; |
2852 |
|
2853 |
// remove from the list |
2854 |
gJITC.MRUvregs[lreg] = mreg; |
2855 |
gJITC.LRUvregs[mreg] = lreg; |
2856 |
|
2857 |
mreg = gJITC.MRUvregs[XMM_SENTINEL]; |
2858 |
|
2859 |
// insert into the list in the LRU spot |
2860 |
gJITC.LRUvregs[nreg] = XMM_SENTINEL; |
2861 |
gJITC.MRUvregs[nreg] = mreg; |
2862 |
|
2863 |
gJITC.LRUvregs[mreg] = nreg; |
2864 |
gJITC.MRUvregs[XMM_SENTINEL] = nreg; |
2865 |
} |
2866 |
|
2867 |
/* |
2868 |
* Makes the vector register the most recently used vector register. |
2869 |
* Will never emit any code. |
2870 |
*/ |
2871 |
void FASTCALL jitcTouchVectorRegister(NativeVectorReg nreg) |
2872 |
{ |
2873 |
NativeVectorReg lreg, mreg; |
2874 |
|
2875 |
mreg = gJITC.MRUvregs[nreg]; |
2876 |
lreg = gJITC.LRUvregs[nreg]; |
2877 |
|
2878 |
// remove from the list |
2879 |
gJITC.MRUvregs[lreg] = mreg; |
2880 |
gJITC.LRUvregs[mreg] = lreg; |
2881 |
|
2882 |
lreg = gJITC.LRUvregs[XMM_SENTINEL]; |
2883 |
|
2884 |
// insert into the list in the LRU spot |
2885 |
gJITC.MRUvregs[nreg] = XMM_SENTINEL; |
2886 |
gJITC.LRUvregs[nreg] = lreg; |
2887 |
|
2888 |
gJITC.MRUvregs[lreg] = nreg; |
2889 |
gJITC.LRUvregs[XMM_SENTINEL] = nreg; |
2890 |
} |
2891 |
|
2892 |
/* |
2893 |
* Unmaps a native vector register, and marks it least recently used. |
2894 |
* Will not emit any code. |
2895 |
*/ |
2896 |
void FASTCALL jitcDropSingleVectorRegister(NativeVectorReg nreg) |
2897 |
{ |
2898 |
jitcDiscardVectorRegister(nreg); |
2899 |
jitcUnmapVectorRegister(nreg); |
2900 |
} |
2901 |
|
2902 |
int FASTCALL jitcAssertFlushedVectorRegister(JitcVectorReg creg) |
2903 |
{ |
2904 |
NativeVectorReg nreg = gJITC.c2nVectorReg[creg]; |
2905 |
|
2906 |
if (nreg != VECTREG_NO && gJITC.nativeVectorRegState[nreg] == rsDirty) { |
2907 |
printf("!!! Unflushed vector XMM%u (vr%u)!\n", nreg, creg); |
2908 |
return 1; |
2909 |
} |
2910 |
return 0; |
2911 |
} |
2912 |
int FASTCALL jitcAssertFlushedVectorRegisters() |
2913 |
{ |
2914 |
int ret = 0; |
2915 |
|
2916 |
for (JitcVectorReg i=0; i<32; i++) |
2917 |
ret |= jitcAssertFlushedVectorRegister(i); |
2918 |
|
2919 |
return ret; |
2920 |
} |
2921 |
|
2922 |
void FASTCALL jitcShowVectorRegisterStatus(JitcVectorReg creg) |
2923 |
{ |
2924 |
NativeVectorReg nreg = gJITC.c2nVectorReg[creg]; |
2925 |
|
2926 |
if (nreg != VECTREG_NO) { |
2927 |
int status = gJITC.nativeVectorRegState[nreg]; |
2928 |
char *text; |
2929 |
|
2930 |
if (status == rsUnused) |
2931 |
text = "unused"; |
2932 |
else if (status == rsMapped) |
2933 |
text = "mapped"; |
2934 |
else if (status == rsDirty) |
2935 |
text = "dirty"; |
2936 |
else |
2937 |
text = "unknown"; |
2938 |
|
2939 |
//printf("*** vr%u => XMM%u (%s)\n", creg, nreg, text); |
2940 |
} else { |
2941 |
//printf("*** vr%u => memory\n", creg); |
2942 |
} |
2943 |
} |
2944 |
|
2945 |
/* |
2946 |
* If the native vector register is marked dirty, then it writes that |
2947 |
* value out to the client vector register store. |
2948 |
* Will produce a store, if the native vector register is dirty. |
2949 |
*/ |
2950 |
static inline void FASTCALL jitcFlushSingleVectorRegister(NativeVectorReg nreg) |
2951 |
{ |
2952 |
if (gJITC.nativeVectorRegState[nreg] == rsDirty) { |
2953 |
//printf("*** flush: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]); |
2954 |
jitcStoreVectorRegister(nreg); |
2955 |
} |
2956 |
} |
2957 |
|
2958 |
/* |
2959 |
* Flushes the register, frees it, and makes it least recently used. |
2960 |
* Will produce a store, if the native vector register was dirty. |
2961 |
*/ |
2962 |
static inline void FASTCALL jitcTrashSingleVectorRegister(NativeVectorReg nreg) |
2963 |
{ |
2964 |
if (gJITC.nativeVectorRegState[nreg] > rsUnused) { |
2965 |
//printf("*** trash: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]); |
2966 |
} |
2967 |
|
2968 |
jitcFlushSingleVectorRegister(nreg); |
2969 |
jitcDropSingleVectorRegister(nreg); |
2970 |
} |
2971 |
|
2972 |
/* |
2973 |
* Flushes the register, frees it, and makes it most recently used. |
2974 |
* Will produce a store, if the native vector register was dirty. |
2975 |
*/ |
2976 |
static inline void FASTCALL jitcClobberSingleVectorRegister(NativeVectorReg nreg) |
2977 |
{ |
2978 |
if (gJITC.nativeVectorRegState[nreg] > rsUnused) { |
2979 |
//printf("*** clobber: XMM%u (vr%u)\n", nreg, gJITC.n2cVectorReg[nreg]); |
2980 |
} |
2981 |
|
2982 |
jitcFlushSingleVectorRegister(nreg); |
2983 |
jitcTouchVectorRegister(nreg); |
2984 |
jitcUnmapVectorRegister(nreg); |
2985 |
} |
2986 |
|
2987 |
/* |
2988 |
* Allocates a native vector register. |
2989 |
* If hint is non-zero, then it indicates that the value is unlikely |
2990 |
* to be re-used soon, so to keep it at the end of the LRU. |
2991 |
* To use hints, pass hint == the number of temporary registers |
2992 |
* May produce a store, if no native vector registers are available. |
2993 |
*/ |
2994 |
NativeVectorReg FASTCALL jitcAllocVectorRegister(int hint) |
2995 |
{ |
2996 |
NativeVectorReg nreg = gJITC.MRUvregs[XMM_SENTINEL]; |
2997 |
|
2998 |
if (hint >= XMM_SENTINEL) { |
2999 |
nreg = gJITC.LRUvregs[nreg]; |
3000 |
|
3001 |
jitcTrashSingleVectorRegister(nreg); |
3002 |
} else if (hint) { |
3003 |
for (int i=1; i<hint; i++) { |
3004 |
nreg = gJITC.MRUvregs[nreg]; |
3005 |
} |
3006 |
|
3007 |
jitcTrashSingleVectorRegister(nreg); |
3008 |
} else { |
3009 |
jitcClobberSingleVectorRegister(nreg); |
3010 |
} |
3011 |
|
3012 |
return nreg; |
3013 |
} |
3014 |
|
3015 |
/* |
3016 |
* Returns native vector register that contains value of client |
3017 |
* register or allocates new vector register which maps to |
3018 |
* the client register. |
3019 |
* Marks the register dirty. |
3020 |
* |
3021 |
* May produce a store, if no registers are available. |
3022 |
* Will never produce a load. |
3023 |
*/ |
3024 |
NativeVectorReg FASTCALL jitcMapClientVectorRegisterDirty(JitcVectorReg creg, int hint) |
3025 |
{ |
3026 |
NativeVectorReg nreg = gJITC.c2nVectorReg[creg]; |
3027 |
|
3028 |
if (nreg == VECTREG_NO) { |
3029 |
nreg = jitcAllocVectorRegister(hint); |
3030 |
|
3031 |
jitcMapVectorRegister(nreg, creg); |
3032 |
} else if (hint) { |
3033 |
jitcDiscardVectorRegister(nreg); |
3034 |
} else { |
3035 |
jitcTouchVectorRegister(nreg); |
3036 |
} |
3037 |
|
3038 |
jitcDirtyVectorRegister(nreg); |
3039 |
|
3040 |
return nreg; |
3041 |
} |
3042 |
|
3043 |
/* |
3044 |
* Returns native vector register that contains the value of the |
3045 |
* client vector register, or allocates new register, and |
3046 |
* loads this value into it. |
3047 |
* |
3048 |
* May produce a store, if no register are available. |
3049 |
* May produce a load, if client vector register isn't mapped. |
3050 |
*/ |
3051 |
NativeVectorReg FASTCALL jitcGetClientVectorRegister(JitcVectorReg creg, int hint) |
3052 |
{ |
3053 |
NativeVectorReg nreg = gJITC.c2nVectorReg[creg]; |
3054 |
|
3055 |
if (nreg == VECTREG_NO) { |
3056 |
nreg = jitcAllocVectorRegister(hint); |
3057 |
jitcMapVectorRegister(nreg, creg); |
3058 |
|
3059 |
jitcLoadVectorRegister(nreg); |
3060 |
} else if (hint) { |
3061 |
jitcDiscardVectorRegister(nreg); |
3062 |
} else { |
3063 |
jitcTouchVectorRegister(nreg); |
3064 |
} |
3065 |
|
3066 |
return nreg; |
3067 |
} |
3068 |
|
3069 |
/* |
3070 |
* Returns native vector register that contains the value of the |
3071 |
* client vector register, or allocates new register, and |
3072 |
* loads this value into it. |
3073 |
* Will mark the native vector register as dirty. |
3074 |
* |
3075 |
* May produce a store, if no register are available. |
3076 |
* May produce a load, if client vector register isn't mapped. |
3077 |
*/ |
3078 |
NativeVectorReg FASTCALL jitcGetClientVectorRegisterDirty(JitcVectorReg creg, int hint) |
3079 |
{ |
3080 |
NativeVectorReg nreg = jitcGetClientVectorRegister(creg, hint); |
3081 |
|
3082 |
jitcDirtyVectorRegister(nreg); |
3083 |
|
3084 |
return nreg; |
3085 |
} |
3086 |
|
3087 |
/* |
3088 |
* Flushes native vector register(s). |
3089 |
* Resets dirty flags. |
3090 |
* Will produce stores, if vector registers are dirty. |
3091 |
*/ |
3092 |
void FASTCALL jitcFlushVectorRegister(int options) |
3093 |
{ |
3094 |
if (options == JITC_VECTOR_REGS_ALL) { |
3095 |
for (unsigned int i = XMM0; i <= XMM7; i++) { |
3096 |
jitcFlushSingleVectorRegister((NativeVectorReg)i); |
3097 |
jitcUndirtyVectorRegister((NativeVectorReg)i); |
3098 |
} |
3099 |
} else if (options & NATIVE_REG) { |
3100 |
NativeVectorReg nreg = (NativeVectorReg)(options & 0xf); |
3101 |
|
3102 |
jitcFlushSingleVectorRegister(nreg); |
3103 |
jitcUndirtyVectorRegister(nreg); |
3104 |
} |
3105 |
} |
3106 |
|
3107 |
/* |
3108 |
* Flushes native vector register(s). |
3109 |
* Doesn't reset dirty flags. |
3110 |
* Will produce stores, if vector registers are dirty. |
3111 |
*/ |
3112 |
void FASTCALL jitcFlushVectorRegisterDirty(int options) |
3113 |
{ |
3114 |
if (options == JITC_VECTOR_REGS_ALL) { |
3115 |
for (unsigned int i = XMM0; i <= XMM7; i++) { |
3116 |
jitcFlushSingleVectorRegister((NativeVectorReg)i); |
3117 |
} |
3118 |
} else if (options & NATIVE_REG) { |
3119 |
NativeVectorReg nreg = (NativeVectorReg)(options & 0xf); |
3120 |
|
3121 |
jitcFlushSingleVectorRegister(nreg); |
3122 |
} |
3123 |
} |
3124 |
|
3125 |
/* |
3126 |
* Clobbers native vector register(s). |
3127 |
* Will produce stores, if vector registers are dirty. |
3128 |
*/ |
3129 |
void FASTCALL jitcClobberVectorRegister(int options) |
3130 |
{ |
3131 |
if (options == JITC_VECTOR_REGS_ALL) { |
3132 |
for (unsigned int i = XMM0; i <= XMM7; i++) { |
3133 |
jitcClobberSingleVectorRegister((NativeVectorReg)i); |
3134 |
} |
3135 |
} else if (options & NATIVE_REG) { |
3136 |
NativeVectorReg nreg = (NativeVectorReg)(options & 0xf); |
3137 |
|
3138 |
jitcClobberSingleVectorRegister(nreg); |
3139 |
} |
3140 |
} |
3141 |
|
3142 |
/* |
3143 |
* Trashes native vector register(s). |
3144 |
* Will produce stores, if vector registers are dirty. |
3145 |
*/ |
3146 |
void FASTCALL jitcTrashVectorRegister(int options) |
3147 |
{ |
3148 |
if (options == JITC_VECTOR_REGS_ALL) { |
3149 |
for (unsigned int i = XMM0; i <= XMM7; i++) { |
3150 |
jitcTrashSingleVectorRegister((NativeVectorReg)i); |
3151 |
} |
3152 |
} else if (options & NATIVE_REG) { |
3153 |
NativeVectorReg nreg = (NativeVectorReg)(options & 0xf); |
3154 |
|
3155 |
jitcTrashSingleVectorRegister(nreg); |
3156 |
} |
3157 |
} |
3158 |
|
3159 |
/* |
3160 |
* Drops native vector register(s). |
3161 |
* Will not produce any code. |
3162 |
*/ |
3163 |
void FASTCALL jitcDropVectorRegister(int options) |
3164 |
{ |
3165 |
if (options == JITC_VECTOR_REGS_ALL) { |
3166 |
for (unsigned int i = XMM0; i <= XMM7; i++) { |
3167 |
jitcDropSingleVectorRegister((NativeVectorReg)i); |
3168 |
} |
3169 |
} else if (options & NATIVE_REG) { |
3170 |
NativeVectorReg nreg = (NativeVectorReg)(options & 0xf); |
3171 |
|
3172 |
jitcDropSingleVectorRegister(nreg); |
3173 |
} |
3174 |
} |
3175 |
|
3176 |
void FASTCALL jitcFlushClientVectorRegister(JitcVectorReg creg) |
3177 |
{ |
3178 |
NativeVectorReg nreg = gJITC.c2nVectorReg[creg]; |
3179 |
|
3180 |
if (nreg != VECTREG_NO) { |
3181 |
jitcFlushSingleVectorRegister(nreg); |
3182 |
jitcUndirtyVectorRegister(nreg); |
3183 |
} |
3184 |
} |
3185 |
|
3186 |
void FASTCALL jitcTrashClientVectorRegister(JitcVectorReg creg) |
3187 |
{ |
3188 |
NativeVectorReg nreg = gJITC.c2nVectorReg[creg]; |
3189 |
|
3190 |
if (nreg != VECTREG_NO) { |
3191 |
jitcTrashSingleVectorRegister(nreg); |
3192 |
} |
3193 |
} |
3194 |
|
3195 |
void FASTCALL jitcClobberClientVectorRegister(JitcVectorReg creg) |
3196 |
{ |
3197 |
NativeVectorReg nreg = gJITC.c2nVectorReg[creg]; |
3198 |
|
3199 |
if (nreg != VECTREG_NO) { |
3200 |
jitcClobberSingleVectorRegister(nreg); |
3201 |
} |
3202 |
} |
3203 |
|
3204 |
void FASTCALL jitcDropClientVectorRegister(JitcVectorReg creg) |
3205 |
{ |
3206 |
NativeVectorReg nreg = gJITC.c2nVectorReg[creg]; |
3207 |
|
3208 |
if (nreg != VECTREG_NO) { |
3209 |
jitcDropSingleVectorRegister(nreg); |
3210 |
} |
3211 |
} |
3212 |
|
3213 |
/* |
3214 |
* Renames a native vector register to a different client register. |
3215 |
* Will not emit a load. |
3216 |
* May emit a reg->reg move, if the vector register was in memory. |
3217 |
* May emit a store, if the vector register was dirty |
3218 |
*/ |
3219 |
NativeVectorReg FASTCALL jitcRenameVectorRegisterDirty(NativeVectorReg reg, JitcVectorReg creg, int hint) |
3220 |
{ |
3221 |
NativeVectorReg nreg = gJITC.c2nVectorReg[creg]; |
3222 |
|
3223 |
if (nreg == reg) { |
3224 |
/* That's weird... it's already mapped... */ |
3225 |
} else if (nreg != VECTREG_NO) { |
3226 |
/* It's already in a register, so rather than losing |
3227 |
* reg pool depth, just move the value. |
3228 |
*/ |
3229 |
asmALUPS(X86_MOVAPS, nreg, reg); |
3230 |
} else { |
3231 |
/* Otherwise, only the source register is in the reg |
3232 |
* pool, so flush it, then remap it. |
3233 |
*/ |
3234 |
JitcVectorReg reg2 = gJITC.n2cVectorReg[reg]; |
3235 |
|
3236 |
if (reg2 != VECTREG_NO) { |
3237 |
jitcFlushSingleVectorRegister(reg); |
3238 |
jitcUnmapVectorRegister(reg); |
3239 |
} |
3240 |
|
3241 |
nreg = reg; |
3242 |
jitcMapVectorRegister(nreg, creg); |
3243 |
} |
3244 |
|
3245 |
if (hint) jitcDiscardVectorRegister(nreg); |
3246 |
else jitcTouchVectorRegister(nreg); |
3247 |
|
3248 |
jitcDirtyVectorRegister(nreg); |
3249 |
|
3250 |
return nreg; |
3251 |
} |
3252 |
|
3253 |
void asmMOVAPS(NativeVectorReg reg, const void *disp) |
3254 |
{ |
3255 |
byte instr[8] = { 0x0f, 0x28 }; |
3256 |
|
3257 |
instr[2] = 0x05 | (reg << 3); |
3258 |
*((uint32 *)&instr[3]) = (uint32)disp; |
3259 |
|
3260 |
jitcEmit(instr, 7); |
3261 |
} |
3262 |
|
3263 |
void asmMOVAPS(const void *disp, NativeVectorReg reg) |
3264 |
{ |
3265 |
byte instr[8] = { 0x0f, 0x29 }; |
3266 |
|
3267 |
instr[2] = 0x05 | (reg << 3); |
3268 |
*((uint32 *)&instr[3]) = (uint32)disp; |
3269 |
|
3270 |
jitcEmit(instr, 7); |
3271 |
} |
3272 |
|
3273 |
void asmMOVUPS(NativeVectorReg reg, const void *disp) |
3274 |
{ |
3275 |
byte instr[8] = { 0x0f, 0x10 }; |
3276 |
|
3277 |
instr[2] = 0x05 | (reg << 3); |
3278 |
*((uint32 *)&instr[3]) = (uint32)disp; |
3279 |
|
3280 |
jitcEmit(instr, 7); |
3281 |
} |
3282 |
|
3283 |
void asmMOVUPS(const void *disp, NativeVectorReg reg) |
3284 |
{ |
3285 |
byte instr[8] = { 0x0f, 0x11 }; |
3286 |
|
3287 |
instr[2] = 0x05 | (reg << 3); |
3288 |
*((uint32 *)&instr[3]) = (uint32)disp; |
3289 |
|
3290 |
jitcEmit(instr, 7); |
3291 |
} |
3292 |
|
3293 |
void asmMOVSS(NativeVectorReg reg, const void *disp) |
3294 |
{ |
3295 |
byte instr[10] = { 0xf3, 0x0f, 0x10 }; |
3296 |
|
3297 |
instr[3] = 0x05 | (reg << 3); |
3298 |
*((uint32 *)&instr[4]) = (uint32)disp; |
3299 |
|
3300 |
jitcEmit(instr, 8); |
3301 |
} |
3302 |
|
3303 |
void asmMOVSS(const void *disp, NativeVectorReg reg) |
3304 |
{ |
3305 |
byte instr[10] = { 0xf3, 0x0f, 0x11 }; |
3306 |
|
3307 |
instr[3] = 0x05 | (reg << 3); |
3308 |
*((uint32 *)&instr[4]) = (uint32)disp; |
3309 |
|
3310 |
jitcEmit(instr, 8); |
3311 |
} |
3312 |
|
3313 |
void asmALUPS(X86ALUPSopc opc, NativeVectorReg reg1, NativeVectorReg reg2) |
3314 |
{ |
3315 |
byte instr[4] = { 0x0f }; |
3316 |
|
3317 |
instr[1] = opc; |
3318 |
instr[2] = 0xc0 + (reg1 << 3) + reg2; |
3319 |
|
3320 |
jitcEmit(instr, 3); |
3321 |
} |
3322 |
|
3323 |
void asmALUPS(X86ALUPSopc opc, NativeVectorReg reg1, modrm_p modrm) |
3324 |
{ |
3325 |
byte instr[16] = { 0x0f }; |
3326 |
int len = modrm++[0]; |
3327 |
|
3328 |
instr[1] = opc; |
3329 |
memcpy(&instr[2], modrm, len); |
3330 |
instr[2] |= (reg1 << 3); |
3331 |
|
3332 |
jitcEmit(instr, len+2); |
3333 |
} |
3334 |
|
3335 |
void asmPALU(X86PALUopc opc, NativeVectorReg reg1, NativeVectorReg reg2) |
3336 |
{ |
3337 |
byte instr[5] = { 0x66, 0x0f }; |
3338 |
|
3339 |
instr[2] = opc; |
3340 |
instr[3] = 0xc0 + (reg1 << 3) + reg2; |
3341 |
|
3342 |
jitcEmit(instr, 4); |
3343 |
} |
3344 |
|
3345 |
void asmPALU(X86PALUopc opc, NativeVectorReg reg1, modrm_p modrm) |
3346 |
{ |
3347 |
byte instr[5] = { 0x66, 0x0f }; |
3348 |
int len = modrm++[0]; |
3349 |
|
3350 |
instr[2] = opc; |
3351 |
memcpy(&instr[3], modrm, len); |
3352 |
instr[3] |= (reg1 << 3); |
3353 |
|
3354 |
jitcEmit(instr, len+3); |
3355 |
} |
3356 |
|
3357 |
void asmSHUFPS(NativeVectorReg reg1, NativeVectorReg reg2, int order) |
3358 |
{ |
3359 |
byte instr[5] = { 0x0f, 0xc6, 0xc0+(reg1<<3)+reg2, order }; |
3360 |
|
3361 |
jitcEmit(instr, 4); |
3362 |
} |
3363 |
|
3364 |
void asmSHUFPS(NativeVectorReg reg1, modrm_p modrm, int order) |
3365 |
{ |
3366 |
byte instr[16] = { 0x0f, 0xc6 }; |
3367 |
int len = modrm++[0]; |
3368 |
|
3369 |
memcpy(&instr[2], modrm, len); |
3370 |
instr[2] |= (reg1 << 3); |
3371 |
instr[len+2] = order; |
3372 |
|
3373 |
jitcEmit(instr, len+3); |
3374 |
} |
3375 |
|
3376 |
void asmPSHUFD(NativeVectorReg reg1, NativeVectorReg reg2, int order) |
3377 |
{ |
3378 |
byte instr[6] = { 0x66, 0x0f, 0x70, 0xc0+(reg1<<3)+reg2, order }; |
3379 |
|
3380 |
jitcEmit(instr, 5); |
3381 |
} |
3382 |
|
3383 |
void asmPSHUFD(NativeVectorReg reg1, modrm_p modrm, int order) |
3384 |
{ |
3385 |
byte instr[5] = { 0x66, 0x0f, 0x70 }; |
3386 |
int len = modrm++[0]; |
3387 |
|
3388 |
memcpy(&instr[3], modrm, len); |
3389 |
instr[3] |= (reg1 << 3); |
3390 |
instr[len+3] = order; |
3391 |
|
3392 |
jitcEmit(instr, len+4); |
3393 |
} |