1 |
/* |
2 |
* PearPC |
3 |
* ppc_vec.h |
4 |
* |
5 |
* Copyright (C) 2004 Daniel Foesch (dfoesch@cs.nmsu.edu) |
6 |
* |
7 |
* This program is free software; you can redistribute it and/or modify |
8 |
* it under the terms of the GNU General Public License version 2 as |
9 |
* published by the Free Software Foundation. |
10 |
* |
11 |
* This program is distributed in the hope that it will be useful, |
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
* GNU General Public License for more details. |
15 |
* |
16 |
* You should have received a copy of the GNU General Public License |
17 |
* along with this program; if not, write to the Free Software |
18 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 |
*/ |
20 |
|
21 |
#ifndef __PPC_VEC_H__ |
22 |
#define __PPC_VEC_H__ |
23 |
|
24 |
#define PPC_OPC_VRc (1<<10) |
25 |
|
26 |
/* Rather than write each function to be endianless, we're writing these |
27 |
* defines to do an endianless access to elements of the vector. |
28 |
* |
29 |
* These are for ADDRESSED vector elements. Usually, most vector operations |
30 |
* can be performed in either direction without care, so most of the |
31 |
* for-loops should not use these, as it will introduce unneeded code |
32 |
* for little-endian systems. |
33 |
*/ |
34 |
#if HOST_ENDIANESS == HOST_ENDIANESS_LE |
35 |
|
36 |
#define VECT_B(reg, index) ((reg).b[15 - (index)]) |
37 |
#define VECT_SB(reg, index) ((reg).sb[15 - (index)]) |
38 |
#define VECT_H(reg, index) ((reg).h[7 - (index)]) |
39 |
#define VECT_SH(reg, index) ((reg).sh[7 - (index)]) |
40 |
#define VECT_W(reg, index) ((reg).w[3 - (index)]) |
41 |
#define VECT_SW(reg, index) ((reg).sw[3 - (index)]) |
42 |
#define VECT_D(reg, index) ((reg).d[1 - (index)]) |
43 |
#define VECT_SD(reg, index) ((reg).sd[1 - (index)]) |
44 |
|
45 |
#define VECT_EVEN(index) (((index) << 1) + 1) |
46 |
#define VECT_ODD(index) (((index) << 1) + 0) |
47 |
|
48 |
#define VECT_BUILD(x,y,z,w) w, z, y, x |
49 |
|
50 |
#elif HOST_ENDIANESS == HOST_ENDIANESS_BE |
51 |
|
52 |
#define VECT_B(reg, index) ((reg).b[(index)]) |
53 |
#define VECT_SB(reg, index) ((reg).sb[(index)]) |
54 |
#define VECT_H(reg, index) ((reg).h[(index)]) |
55 |
#define VECT_SH(reg, index) ((reg).sh[(index)]) |
56 |
#define VECT_W(reg, index) ((reg).w[(index)]) |
57 |
#define VECT_SW(reg, index) ((reg).sw[(index)]) |
58 |
#define VECT_D(reg, index) ((reg).d[(index)]) |
59 |
#define VECT_SD(reg, index) ((reg).sd[(index)]) |
60 |
|
61 |
#define VECT_EVEN(index) (((index) << 1) + 0) |
62 |
#define VECT_ODD(index) (((index) << 1) + 1) |
63 |
|
64 |
#define VECT_BUILD(x,y,z,w) x, y, z, w |
65 |
#else |
66 |
#error Endianess not supported! |
67 |
#endif |
68 |
|
69 |
//#define VECTOR_DEBUG fprintf(stderr, "[PPC/VEC] %s\n", __FUNCTION__) |
70 |
//#define VECTOR_DEBUG jitcAssertFlushedVectorRegisters() |
71 |
#define VECTOR_DEBUG |
72 |
|
73 |
#define VECTOR_DEBUG_COMMON VECTOR_DEBUG |
74 |
//#define VECTOR_DEBUG_COMMON |
75 |
|
76 |
/* Undefine this to turn of the MSR_VEC check for vector instructions. */ |
77 |
//#define __VEC_EXC_OFF__ |
78 |
|
79 |
#include "system/types.h" |
80 |
|
81 |
#include "tools/snprintf.h" |
82 |
|
83 |
#include "jitc.h" |
84 |
#include "jitc_asm.h" |
85 |
#include "x86asm.h" |
86 |
#include "ppc_exc.h" |
87 |
|
88 |
static UNUSED void ppc_opc_gen_check_vec() |
89 |
{ |
90 |
#ifndef __VEC_EXC_OFF__ |
91 |
if (!gJITC.checkedVector) { |
92 |
jitcFloatRegisterClobberAll(); |
93 |
jitcFlushVectorRegister(); |
94 |
jitcClobberCarryAndFlags(); |
95 |
|
96 |
NativeReg r1 = jitcGetClientRegister(PPC_MSR); |
97 |
asmALU(X86_TEST, r1, MSR_VEC); |
98 |
NativeAddress fixup = asmJxxFixup(X86_NZ); |
99 |
|
100 |
jitcFlushRegisterDirty(); |
101 |
asmALU(X86_MOV, ESI, gJITC.pc); |
102 |
asmJMP((NativeAddress)ppc_no_vec_exception_asm); |
103 |
|
104 |
asmResolveFixup(fixup); |
105 |
gJITC.checkedVector = true; |
106 |
} |
107 |
#endif |
108 |
} |
109 |
|
110 |
void ppc_opc_vperm(); |
111 |
void ppc_opc_vsel(); |
112 |
void ppc_opc_vsrb(); |
113 |
void ppc_opc_vsrh(); |
114 |
void ppc_opc_vsrw(); |
115 |
void ppc_opc_vsrab(); |
116 |
void ppc_opc_vsrah(); |
117 |
void ppc_opc_vsraw(); |
118 |
void ppc_opc_vsr(); |
119 |
void ppc_opc_vsro(); |
120 |
void ppc_opc_vslb(); |
121 |
void ppc_opc_vslh(); |
122 |
void ppc_opc_vslw(); |
123 |
void ppc_opc_vsl(); |
124 |
void ppc_opc_vslo(); |
125 |
void ppc_opc_vsldoi(); |
126 |
void ppc_opc_vrlb(); |
127 |
void ppc_opc_vrlh(); |
128 |
void ppc_opc_vrlw(); |
129 |
void ppc_opc_vmrghb(); |
130 |
void ppc_opc_vmrghh(); |
131 |
void ppc_opc_vmrghw(); |
132 |
void ppc_opc_vmrglb(); |
133 |
void ppc_opc_vmrglh(); |
134 |
void ppc_opc_vmrglw(); |
135 |
void ppc_opc_vspltb(); |
136 |
void ppc_opc_vsplth(); |
137 |
void ppc_opc_vspltw(); |
138 |
void ppc_opc_vspltisb(); |
139 |
void ppc_opc_vspltish(); |
140 |
void ppc_opc_vspltisw(); |
141 |
void ppc_opc_mfvscr(); |
142 |
void ppc_opc_mtvscr(); |
143 |
void ppc_opc_vpkuhum(); |
144 |
void ppc_opc_vpkuwum(); |
145 |
void ppc_opc_vpkpx(); |
146 |
void ppc_opc_vpkuhus(); |
147 |
void ppc_opc_vpkshss(); |
148 |
void ppc_opc_vpkuwus(); |
149 |
void ppc_opc_vpkswss(); |
150 |
void ppc_opc_vpkuhus(); |
151 |
void ppc_opc_vpkshus(); |
152 |
void ppc_opc_vpkuwus(); |
153 |
void ppc_opc_vpkswus(); |
154 |
void ppc_opc_vupkhsb(); |
155 |
void ppc_opc_vupkhpx(); |
156 |
void ppc_opc_vupkhsh(); |
157 |
void ppc_opc_vupklsb(); |
158 |
void ppc_opc_vupklpx(); |
159 |
void ppc_opc_vupklsh(); |
160 |
void ppc_opc_vaddubm(); |
161 |
void ppc_opc_vadduhm(); |
162 |
void ppc_opc_vadduwm(); |
163 |
void ppc_opc_vaddfp(); |
164 |
void ppc_opc_vaddcuw(); |
165 |
void ppc_opc_vaddubs(); |
166 |
void ppc_opc_vaddsbs(); |
167 |
void ppc_opc_vadduhs(); |
168 |
void ppc_opc_vaddshs(); |
169 |
void ppc_opc_vadduws(); |
170 |
void ppc_opc_vaddsws(); |
171 |
void ppc_opc_vsububm(); |
172 |
void ppc_opc_vsubuhm(); |
173 |
void ppc_opc_vsubuwm(); |
174 |
void ppc_opc_vsubfp(); |
175 |
void ppc_opc_vsubcuw(); |
176 |
void ppc_opc_vsububs(); |
177 |
void ppc_opc_vsubsbs(); |
178 |
void ppc_opc_vsubuhs(); |
179 |
void ppc_opc_vsubshs(); |
180 |
void ppc_opc_vsubuws(); |
181 |
void ppc_opc_vsubsws(); |
182 |
void ppc_opc_vmuleub(); |
183 |
void ppc_opc_vmulesb(); |
184 |
void ppc_opc_vmuleuh(); |
185 |
void ppc_opc_vmulesh(); |
186 |
void ppc_opc_vmuloub(); |
187 |
void ppc_opc_vmulosb(); |
188 |
void ppc_opc_vmulouh(); |
189 |
void ppc_opc_vmulosh(); |
190 |
void ppc_opc_vmaddfp(); |
191 |
void ppc_opc_vmhaddshs(); |
192 |
void ppc_opc_vmladduhm(); |
193 |
void ppc_opc_vmhraddshs(); |
194 |
void ppc_opc_vmsumubm(); |
195 |
void ppc_opc_vmsumuhm(); |
196 |
void ppc_opc_vmsummbm(); |
197 |
void ppc_opc_vmsumshm(); |
198 |
void ppc_opc_vmsumuhs(); |
199 |
void ppc_opc_vmsumshs(); |
200 |
void ppc_opc_vsum4ubs(); |
201 |
void ppc_opc_vsum4sbs(); |
202 |
void ppc_opc_vsum4shs(); |
203 |
void ppc_opc_vsum2sws(); |
204 |
void ppc_opc_vsumsws(); |
205 |
void ppc_opc_vnmsubfp(); |
206 |
void ppc_opc_vavgub(); |
207 |
void ppc_opc_vavgsb(); |
208 |
void ppc_opc_vavguh(); |
209 |
void ppc_opc_vavgsh(); |
210 |
void ppc_opc_vavguw(); |
211 |
void ppc_opc_vavgsw(); |
212 |
void ppc_opc_vmaxub(); |
213 |
void ppc_opc_vmaxsb(); |
214 |
void ppc_opc_vmaxuh(); |
215 |
void ppc_opc_vmaxsh(); |
216 |
void ppc_opc_vmaxuw(); |
217 |
void ppc_opc_vmaxsw(); |
218 |
void ppc_opc_vmaxfp(); |
219 |
void ppc_opc_vminub(); |
220 |
void ppc_opc_vminsb(); |
221 |
void ppc_opc_vminuh(); |
222 |
void ppc_opc_vminsh(); |
223 |
void ppc_opc_vminuw(); |
224 |
void ppc_opc_vminsw(); |
225 |
void ppc_opc_vminfp(); |
226 |
void ppc_opc_vrfin(); |
227 |
void ppc_opc_vrfip(); |
228 |
void ppc_opc_vrfim(); |
229 |
void ppc_opc_vrfiz(); |
230 |
void ppc_opc_vrefp(); |
231 |
void ppc_opc_vrsqrtefp(); |
232 |
void ppc_opc_vlogefp(); |
233 |
void ppc_opc_vexptefp(); |
234 |
void ppc_opc_vcfux(); |
235 |
void ppc_opc_vcfsx(); |
236 |
void ppc_opc_vctsxs(); |
237 |
void ppc_opc_vctuxs(); |
238 |
void ppc_opc_vand(); |
239 |
void ppc_opc_vandc(); |
240 |
void ppc_opc_vor(); |
241 |
void ppc_opc_vnor(); |
242 |
void ppc_opc_vxor(); |
243 |
void ppc_opc_vcmpequbx(); |
244 |
void ppc_opc_vcmpequhx(); |
245 |
void ppc_opc_vcmpequwx(); |
246 |
void ppc_opc_vcmpeqfpx(); |
247 |
void ppc_opc_vcmpgtubx(); |
248 |
void ppc_opc_vcmpgtsbx(); |
249 |
void ppc_opc_vcmpgtuhx(); |
250 |
void ppc_opc_vcmpgtshx(); |
251 |
void ppc_opc_vcmpgtuwx(); |
252 |
void ppc_opc_vcmpgtswx(); |
253 |
void ppc_opc_vcmpgtfpx(); |
254 |
void ppc_opc_vcmpgefpx(); |
255 |
void ppc_opc_vcmpbfpx(); |
256 |
|
257 |
JITCFlow ppc_opc_gen_vperm(); |
258 |
JITCFlow ppc_opc_gen_vsel(); |
259 |
JITCFlow ppc_opc_gen_vsrb(); |
260 |
JITCFlow ppc_opc_gen_vsrh(); |
261 |
JITCFlow ppc_opc_gen_vsrw(); |
262 |
JITCFlow ppc_opc_gen_vsrab(); |
263 |
JITCFlow ppc_opc_gen_vsrah(); |
264 |
JITCFlow ppc_opc_gen_vsraw(); |
265 |
JITCFlow ppc_opc_gen_vsr(); |
266 |
JITCFlow ppc_opc_gen_vsro(); |
267 |
JITCFlow ppc_opc_gen_vslb(); |
268 |
JITCFlow ppc_opc_gen_vslh(); |
269 |
JITCFlow ppc_opc_gen_vslw(); |
270 |
JITCFlow ppc_opc_gen_vsl(); |
271 |
JITCFlow ppc_opc_gen_vslo(); |
272 |
JITCFlow ppc_opc_gen_vsldoi(); |
273 |
JITCFlow ppc_opc_gen_vrlb(); |
274 |
JITCFlow ppc_opc_gen_vrlh(); |
275 |
JITCFlow ppc_opc_gen_vrlw(); |
276 |
JITCFlow ppc_opc_gen_vmrghb(); |
277 |
JITCFlow ppc_opc_gen_vmrghh(); |
278 |
JITCFlow ppc_opc_gen_vmrghw(); |
279 |
JITCFlow ppc_opc_gen_vmrglb(); |
280 |
JITCFlow ppc_opc_gen_vmrglh(); |
281 |
JITCFlow ppc_opc_gen_vmrglw(); |
282 |
JITCFlow ppc_opc_gen_vspltb(); |
283 |
JITCFlow ppc_opc_gen_vsplth(); |
284 |
JITCFlow ppc_opc_gen_vspltw(); |
285 |
JITCFlow ppc_opc_gen_vspltisb(); |
286 |
JITCFlow ppc_opc_gen_vspltish(); |
287 |
JITCFlow ppc_opc_gen_vspltisw(); |
288 |
JITCFlow ppc_opc_gen_mfvscr(); |
289 |
JITCFlow ppc_opc_gen_mtvscr(); |
290 |
JITCFlow ppc_opc_gen_vpkuhum(); |
291 |
JITCFlow ppc_opc_gen_vpkuwum(); |
292 |
JITCFlow ppc_opc_gen_vpkpx(); |
293 |
JITCFlow ppc_opc_gen_vpkuhus(); |
294 |
JITCFlow ppc_opc_gen_vpkshss(); |
295 |
JITCFlow ppc_opc_gen_vpkuwus(); |
296 |
JITCFlow ppc_opc_gen_vpkswss(); |
297 |
JITCFlow ppc_opc_gen_vpkuhus(); |
298 |
JITCFlow ppc_opc_gen_vpkshus(); |
299 |
JITCFlow ppc_opc_gen_vpkuwus(); |
300 |
JITCFlow ppc_opc_gen_vpkswus(); |
301 |
JITCFlow ppc_opc_gen_vupkhsb(); |
302 |
JITCFlow ppc_opc_gen_vupkhpx(); |
303 |
JITCFlow ppc_opc_gen_vupkhsh(); |
304 |
JITCFlow ppc_opc_gen_vupklsb(); |
305 |
JITCFlow ppc_opc_gen_vupklpx(); |
306 |
JITCFlow ppc_opc_gen_vupklsh(); |
307 |
JITCFlow ppc_opc_gen_vaddubm(); |
308 |
JITCFlow ppc_opc_gen_vadduhm(); |
309 |
JITCFlow ppc_opc_gen_vadduwm(); |
310 |
JITCFlow ppc_opc_gen_vaddfp(); |
311 |
JITCFlow ppc_opc_gen_vaddcuw(); |
312 |
JITCFlow ppc_opc_gen_vaddubs(); |
313 |
JITCFlow ppc_opc_gen_vaddsbs(); |
314 |
JITCFlow ppc_opc_gen_vadduhs(); |
315 |
JITCFlow ppc_opc_gen_vaddshs(); |
316 |
JITCFlow ppc_opc_gen_vadduws(); |
317 |
JITCFlow ppc_opc_gen_vaddsws(); |
318 |
JITCFlow ppc_opc_gen_vsububm(); |
319 |
JITCFlow ppc_opc_gen_vsubuhm(); |
320 |
JITCFlow ppc_opc_gen_vsubuwm(); |
321 |
JITCFlow ppc_opc_gen_vsubfp(); |
322 |
JITCFlow ppc_opc_gen_vsubcuw(); |
323 |
JITCFlow ppc_opc_gen_vsububs(); |
324 |
JITCFlow ppc_opc_gen_vsubsbs(); |
325 |
JITCFlow ppc_opc_gen_vsubuhs(); |
326 |
JITCFlow ppc_opc_gen_vsubshs(); |
327 |
JITCFlow ppc_opc_gen_vsubuws(); |
328 |
JITCFlow ppc_opc_gen_vsubsws(); |
329 |
JITCFlow ppc_opc_gen_vmuleub(); |
330 |
JITCFlow ppc_opc_gen_vmulesb(); |
331 |
JITCFlow ppc_opc_gen_vmuleuh(); |
332 |
JITCFlow ppc_opc_gen_vmulesh(); |
333 |
JITCFlow ppc_opc_gen_vmuloub(); |
334 |
JITCFlow ppc_opc_gen_vmulosb(); |
335 |
JITCFlow ppc_opc_gen_vmulouh(); |
336 |
JITCFlow ppc_opc_gen_vmulosh(); |
337 |
JITCFlow ppc_opc_gen_vmaddfp(); |
338 |
JITCFlow ppc_opc_gen_vmhaddshs(); |
339 |
JITCFlow ppc_opc_gen_vmladduhm(); |
340 |
JITCFlow ppc_opc_gen_vmhraddshs(); |
341 |
JITCFlow ppc_opc_gen_vmsumubm(); |
342 |
JITCFlow ppc_opc_gen_vmsumuhm(); |
343 |
JITCFlow ppc_opc_gen_vmsummbm(); |
344 |
JITCFlow ppc_opc_gen_vmsumshm(); |
345 |
JITCFlow ppc_opc_gen_vmsumuhs(); |
346 |
JITCFlow ppc_opc_gen_vmsumshs(); |
347 |
JITCFlow ppc_opc_gen_vsum4ubs(); |
348 |
JITCFlow ppc_opc_gen_vsum4sbs(); |
349 |
JITCFlow ppc_opc_gen_vsum4shs(); |
350 |
JITCFlow ppc_opc_gen_vsum2sws(); |
351 |
JITCFlow ppc_opc_gen_vsumsws(); |
352 |
JITCFlow ppc_opc_gen_vnmsubfp(); |
353 |
JITCFlow ppc_opc_gen_vavgub(); |
354 |
JITCFlow ppc_opc_gen_vavgsb(); |
355 |
JITCFlow ppc_opc_gen_vavguh(); |
356 |
JITCFlow ppc_opc_gen_vavgsh(); |
357 |
JITCFlow ppc_opc_gen_vavguw(); |
358 |
JITCFlow ppc_opc_gen_vavgsw(); |
359 |
JITCFlow ppc_opc_gen_vmaxub(); |
360 |
JITCFlow ppc_opc_gen_vmaxsb(); |
361 |
JITCFlow ppc_opc_gen_vmaxuh(); |
362 |
JITCFlow ppc_opc_gen_vmaxsh(); |
363 |
JITCFlow ppc_opc_gen_vmaxuw(); |
364 |
JITCFlow ppc_opc_gen_vmaxsw(); |
365 |
JITCFlow ppc_opc_gen_vmaxfp(); |
366 |
JITCFlow ppc_opc_gen_vminub(); |
367 |
JITCFlow ppc_opc_gen_vminsb(); |
368 |
JITCFlow ppc_opc_gen_vminuh(); |
369 |
JITCFlow ppc_opc_gen_vminsh(); |
370 |
JITCFlow ppc_opc_gen_vminuw(); |
371 |
JITCFlow ppc_opc_gen_vminsw(); |
372 |
JITCFlow ppc_opc_gen_vminfp(); |
373 |
JITCFlow ppc_opc_gen_vrfin(); |
374 |
JITCFlow ppc_opc_gen_vrfip(); |
375 |
JITCFlow ppc_opc_gen_vrfim(); |
376 |
JITCFlow ppc_opc_gen_vrfiz(); |
377 |
JITCFlow ppc_opc_gen_vrefp(); |
378 |
JITCFlow ppc_opc_gen_vrsqrtefp(); |
379 |
JITCFlow ppc_opc_gen_vlogefp(); |
380 |
JITCFlow ppc_opc_gen_vexptefp(); |
381 |
JITCFlow ppc_opc_gen_vcfux(); |
382 |
JITCFlow ppc_opc_gen_vcfsx(); |
383 |
JITCFlow ppc_opc_gen_vctsxs(); |
384 |
JITCFlow ppc_opc_gen_vctuxs(); |
385 |
JITCFlow ppc_opc_gen_vand(); |
386 |
JITCFlow ppc_opc_gen_vandc(); |
387 |
JITCFlow ppc_opc_gen_vor(); |
388 |
JITCFlow ppc_opc_gen_vnor(); |
389 |
JITCFlow ppc_opc_gen_vxor(); |
390 |
JITCFlow ppc_opc_gen_vcmpequbx(); |
391 |
JITCFlow ppc_opc_gen_vcmpequhx(); |
392 |
JITCFlow ppc_opc_gen_vcmpequwx(); |
393 |
JITCFlow ppc_opc_gen_vcmpeqfpx(); |
394 |
JITCFlow ppc_opc_gen_vcmpgtubx(); |
395 |
JITCFlow ppc_opc_gen_vcmpgtsbx(); |
396 |
JITCFlow ppc_opc_gen_vcmpgtuhx(); |
397 |
JITCFlow ppc_opc_gen_vcmpgtshx(); |
398 |
JITCFlow ppc_opc_gen_vcmpgtuwx(); |
399 |
JITCFlow ppc_opc_gen_vcmpgtswx(); |
400 |
JITCFlow ppc_opc_gen_vcmpgtfpx(); |
401 |
JITCFlow ppc_opc_gen_vcmpgefpx(); |
402 |
JITCFlow ppc_opc_gen_vcmpbfpx(); |
403 |
|
404 |
#endif |