/[rdesktop]/sourceforge.net/trunk/rdesktop/crypto/bn_asm.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /sourceforge.net/trunk/rdesktop/crypto/bn_asm.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 32 - (hide annotations)
Sat Sep 15 09:37:17 2001 UTC (22 years, 9 months ago) by matty
File MIME type: text/plain
File size: 18347 byte(s)
Synced crypto/ with latest OpenSSL.
Moved to OpenSSL big number routines to resolve licensing issues
with current code (although they add more bloat).

1 matty 32 /* crypto/bn/bn_asm.c */
2     /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3     * All rights reserved.
4     *
5     * This package is an SSL implementation written
6     * by Eric Young (eay@cryptsoft.com).
7     * The implementation was written so as to conform with Netscapes SSL.
8     *
9     * This library is free for commercial and non-commercial use as long as
10     * the following conditions are aheared to. The following conditions
11     * apply to all code found in this distribution, be it the RC4, RSA,
12     * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13     * included with this distribution is covered by the same copyright terms
14     * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15     *
16     * Copyright remains Eric Young's, and as such any Copyright notices in
17     * the code are not to be removed.
18     * If this package is used in a product, Eric Young should be given attribution
19     * as the author of the parts of the library used.
20     * This can be in the form of a textual message at program startup or
21     * in documentation (online or textual) provided with the package.
22     *
23     * Redistribution and use in source and binary forms, with or without
24     * modification, are permitted provided that the following conditions
25     * are met:
26     * 1. Redistributions of source code must retain the copyright
27     * notice, this list of conditions and the following disclaimer.
28     * 2. Redistributions in binary form must reproduce the above copyright
29     * notice, this list of conditions and the following disclaimer in the
30     * documentation and/or other materials provided with the distribution.
31     * 3. All advertising materials mentioning features or use of this software
32     * must display the following acknowledgement:
33     * "This product includes cryptographic software written by
34     * Eric Young (eay@cryptsoft.com)"
35     * The word 'cryptographic' can be left out if the rouines from the library
36     * being used are not cryptographic related :-).
37     * 4. If you include any Windows specific code (or a derivative thereof) from
38     * the apps directory (application code) you must include an acknowledgement:
39     * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40     *
41     * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42     * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44     * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45     * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46     * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47     * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48     * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49     * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50     * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51     * SUCH DAMAGE.
52     *
53     * The licence and distribution terms for any publically available version or
54     * derivative of this code cannot be changed. i.e. this code cannot simply be
55     * copied and put under another distribution licence
56     * [including the GNU Public Licence.]
57     */
58    
59     #ifndef BN_DEBUG
60     # undef NDEBUG /* avoid conflicting definitions */
61     # define NDEBUG
62     #endif
63    
64     #include <stdio.h>
65     #include <assert.h>
66     #include "bn_lcl.h"
67    
68     #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
69    
70     BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
71     {
72     BN_ULONG c1=0;
73    
74     assert(num >= 0);
75     if (num <= 0) return(c1);
76    
77     while (num&~3)
78     {
79     mul_add(rp[0],ap[0],w,c1);
80     mul_add(rp[1],ap[1],w,c1);
81     mul_add(rp[2],ap[2],w,c1);
82     mul_add(rp[3],ap[3],w,c1);
83     ap+=4; rp+=4; num-=4;
84     }
85     if (num)
86     {
87     mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
88     mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
89     mul_add(rp[2],ap[2],w,c1); return c1;
90     }
91    
92     return(c1);
93     }
94    
95     BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
96     {
97     BN_ULONG c1=0;
98    
99     assert(num >= 0);
100     if (num <= 0) return(c1);
101    
102     while (num&~3)
103     {
104     mul(rp[0],ap[0],w,c1);
105     mul(rp[1],ap[1],w,c1);
106     mul(rp[2],ap[2],w,c1);
107     mul(rp[3],ap[3],w,c1);
108     ap+=4; rp+=4; num-=4;
109     }
110     if (num)
111     {
112     mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
113     mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
114     mul(rp[2],ap[2],w,c1);
115     }
116     return(c1);
117     }
118    
119     void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
120     {
121     assert(n >= 0);
122     if (n <= 0) return;
123     while (n&~3)
124     {
125     sqr(r[0],r[1],a[0]);
126     sqr(r[2],r[3],a[1]);
127     sqr(r[4],r[5],a[2]);
128     sqr(r[6],r[7],a[3]);
129     a+=4; r+=8; n-=4;
130     }
131     if (n)
132     {
133     sqr(r[0],r[1],a[0]); if (--n == 0) return;
134     sqr(r[2],r[3],a[1]); if (--n == 0) return;
135     sqr(r[4],r[5],a[2]);
136     }
137     }
138    
139     #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
140    
141     BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
142     {
143     BN_ULONG c=0;
144     BN_ULONG bl,bh;
145    
146     assert(num >= 0);
147     if (num <= 0) return((BN_ULONG)0);
148    
149     bl=LBITS(w);
150     bh=HBITS(w);
151    
152     for (;;)
153     {
154     mul_add(rp[0],ap[0],bl,bh,c);
155     if (--num == 0) break;
156     mul_add(rp[1],ap[1],bl,bh,c);
157     if (--num == 0) break;
158     mul_add(rp[2],ap[2],bl,bh,c);
159     if (--num == 0) break;
160     mul_add(rp[3],ap[3],bl,bh,c);
161     if (--num == 0) break;
162     ap+=4;
163     rp+=4;
164     }
165     return(c);
166     }
167    
168     BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
169     {
170     BN_ULONG carry=0;
171     BN_ULONG bl,bh;
172    
173     assert(num >= 0);
174     if (num <= 0) return((BN_ULONG)0);
175    
176     bl=LBITS(w);
177     bh=HBITS(w);
178    
179     for (;;)
180     {
181     mul(rp[0],ap[0],bl,bh,carry);
182     if (--num == 0) break;
183     mul(rp[1],ap[1],bl,bh,carry);
184     if (--num == 0) break;
185     mul(rp[2],ap[2],bl,bh,carry);
186     if (--num == 0) break;
187     mul(rp[3],ap[3],bl,bh,carry);
188     if (--num == 0) break;
189     ap+=4;
190     rp+=4;
191     }
192     return(carry);
193     }
194    
195     void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
196     {
197     assert(n >= 0);
198     if (n <= 0) return;
199     for (;;)
200     {
201     sqr64(r[0],r[1],a[0]);
202     if (--n == 0) break;
203    
204     sqr64(r[2],r[3],a[1]);
205     if (--n == 0) break;
206    
207     sqr64(r[4],r[5],a[2]);
208     if (--n == 0) break;
209    
210     sqr64(r[6],r[7],a[3]);
211     if (--n == 0) break;
212    
213     a+=4;
214     r+=8;
215     }
216     }
217    
218     #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
219    
220     #if defined(BN_LLONG) && defined(BN_DIV2W)
221    
222     BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
223     {
224     return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d));
225     }
226    
227     #else
228    
229     /* Divide h,l by d and return the result. */
230     /* I need to test this some more :-( */
231     BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
232     {
233     BN_ULONG dh,dl,q,ret=0,th,tl,t;
234     int i,count=2;
235    
236     if (d == 0) return(BN_MASK2);
237    
238     i=BN_num_bits_word(d);
239     assert((i == BN_BITS2) || (h > (BN_ULONG)1<<i));
240    
241     i=BN_BITS2-i;
242     if (h >= d) h-=d;
243    
244     if (i)
245     {
246     d<<=i;
247     h=(h<<i)|(l>>(BN_BITS2-i));
248     l<<=i;
249     }
250     dh=(d&BN_MASK2h)>>BN_BITS4;
251     dl=(d&BN_MASK2l);
252     for (;;)
253     {
254     if ((h>>BN_BITS4) == dh)
255     q=BN_MASK2l;
256     else
257     q=h/dh;
258    
259     th=q*dh;
260     tl=dl*q;
261     for (;;)
262     {
263     t=h-th;
264     if ((t&BN_MASK2h) ||
265     ((tl) <= (
266     (t<<BN_BITS4)|
267     ((l&BN_MASK2h)>>BN_BITS4))))
268     break;
269     q--;
270     th-=dh;
271     tl-=dl;
272     }
273     t=(tl>>BN_BITS4);
274     tl=(tl<<BN_BITS4)&BN_MASK2h;
275     th+=t;
276    
277     if (l < tl) th++;
278     l-=tl;
279     if (h < th)
280     {
281     h+=d;
282     q--;
283     }
284     h-=th;
285    
286     if (--count == 0) break;
287    
288     ret=q<<BN_BITS4;
289     h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
290     l=(l&BN_MASK2l)<<BN_BITS4;
291     }
292     ret|=q;
293     return(ret);
294     }
295     #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
296    
297     #ifdef BN_LLONG
298     BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
299     {
300     BN_ULLONG ll=0;
301    
302     assert(n >= 0);
303     if (n <= 0) return((BN_ULONG)0);
304    
305     for (;;)
306     {
307     ll+=(BN_ULLONG)a[0]+b[0];
308     r[0]=(BN_ULONG)ll&BN_MASK2;
309     ll>>=BN_BITS2;
310     if (--n <= 0) break;
311    
312     ll+=(BN_ULLONG)a[1]+b[1];
313     r[1]=(BN_ULONG)ll&BN_MASK2;
314     ll>>=BN_BITS2;
315     if (--n <= 0) break;
316    
317     ll+=(BN_ULLONG)a[2]+b[2];
318     r[2]=(BN_ULONG)ll&BN_MASK2;
319     ll>>=BN_BITS2;
320     if (--n <= 0) break;
321    
322     ll+=(BN_ULLONG)a[3]+b[3];
323     r[3]=(BN_ULONG)ll&BN_MASK2;
324     ll>>=BN_BITS2;
325     if (--n <= 0) break;
326    
327     a+=4;
328     b+=4;
329     r+=4;
330     }
331     return((BN_ULONG)ll);
332     }
333     #else /* !BN_LLONG */
334     BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
335     {
336     BN_ULONG c,l,t;
337    
338     assert(n >= 0);
339     if (n <= 0) return((BN_ULONG)0);
340    
341     c=0;
342     for (;;)
343     {
344     t=a[0];
345     t=(t+c)&BN_MASK2;
346     c=(t < c);
347     l=(t+b[0])&BN_MASK2;
348     c+=(l < t);
349     r[0]=l;
350     if (--n <= 0) break;
351    
352     t=a[1];
353     t=(t+c)&BN_MASK2;
354     c=(t < c);
355     l=(t+b[1])&BN_MASK2;
356     c+=(l < t);
357     r[1]=l;
358     if (--n <= 0) break;
359    
360     t=a[2];
361     t=(t+c)&BN_MASK2;
362     c=(t < c);
363     l=(t+b[2])&BN_MASK2;
364     c+=(l < t);
365     r[2]=l;
366     if (--n <= 0) break;
367    
368     t=a[3];
369     t=(t+c)&BN_MASK2;
370     c=(t < c);
371     l=(t+b[3])&BN_MASK2;
372     c+=(l < t);
373     r[3]=l;
374     if (--n <= 0) break;
375    
376     a+=4;
377     b+=4;
378     r+=4;
379     }
380     return((BN_ULONG)c);
381     }
382     #endif /* !BN_LLONG */
383    
384     #if 0
385     BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
386     {
387     BN_ULONG t1,t2;
388     int c=0;
389    
390     assert(n >= 0);
391     if (n <= 0) return((BN_ULONG)0);
392    
393     for (;;)
394     {
395     t1=a[0]; t2=b[0];
396     r[0]=(t1-t2-c)&BN_MASK2;
397     if (t1 != t2) c=(t1 < t2);
398     if (--n <= 0) break;
399    
400     t1=a[1]; t2=b[1];
401     r[1]=(t1-t2-c)&BN_MASK2;
402     if (t1 != t2) c=(t1 < t2);
403     if (--n <= 0) break;
404    
405     t1=a[2]; t2=b[2];
406     r[2]=(t1-t2-c)&BN_MASK2;
407     if (t1 != t2) c=(t1 < t2);
408     if (--n <= 0) break;
409    
410     t1=a[3]; t2=b[3];
411     r[3]=(t1-t2-c)&BN_MASK2;
412     if (t1 != t2) c=(t1 < t2);
413     if (--n <= 0) break;
414    
415     a+=4;
416     b+=4;
417     r+=4;
418     }
419     return(c);
420     }
421     #endif
422    
423     #ifdef BN_MUL_COMBA
424    
425     #undef bn_mul_comba8
426     #undef bn_mul_comba4
427     #undef bn_sqr_comba8
428     #undef bn_sqr_comba4
429    
430     /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
431     /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
432     /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
433     /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
434    
435     #ifdef BN_LLONG
436     #define mul_add_c(a,b,c0,c1,c2) \
437     t=(BN_ULLONG)a*b; \
438     t1=(BN_ULONG)Lw(t); \
439     t2=(BN_ULONG)Hw(t); \
440     c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
441     c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
442    
443     #define mul_add_c2(a,b,c0,c1,c2) \
444     t=(BN_ULLONG)a*b; \
445     tt=(t+t)&BN_MASK; \
446     if (tt < t) c2++; \
447     t1=(BN_ULONG)Lw(tt); \
448     t2=(BN_ULONG)Hw(tt); \
449     c0=(c0+t1)&BN_MASK2; \
450     if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
451     c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
452    
453     #define sqr_add_c(a,i,c0,c1,c2) \
454     t=(BN_ULLONG)a[i]*a[i]; \
455     t1=(BN_ULONG)Lw(t); \
456     t2=(BN_ULONG)Hw(t); \
457     c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
458     c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
459    
460     #define sqr_add_c2(a,i,j,c0,c1,c2) \
461     mul_add_c2((a)[i],(a)[j],c0,c1,c2)
462    
463     #elif defined(BN_UMULT_HIGH)
464    
465     #define mul_add_c(a,b,c0,c1,c2) { \
466     BN_ULONG ta=(a),tb=(b); \
467     t1 = ta * tb; \
468     t2 = BN_UMULT_HIGH(ta,tb); \
469     c0 += t1; t2 += (c0<t1)?1:0; \
470     c1 += t2; c2 += (c1<t2)?1:0; \
471     }
472    
473     #define mul_add_c2(a,b,c0,c1,c2) { \
474     BN_ULONG ta=(a),tb=(b),t0; \
475     t1 = BN_UMULT_HIGH(ta,tb); \
476     t0 = ta * tb; \
477     t2 = t1+t1; c2 += (t2<t1)?1:0; \
478     t1 = t0+t0; t2 += (t1<t0)?1:0; \
479     c0 += t1; t2 += (c0<t1)?1:0; \
480     c1 += t2; c2 += (c1<t2)?1:0; \
481     }
482    
483     #define sqr_add_c(a,i,c0,c1,c2) { \
484     BN_ULONG ta=(a)[i]; \
485     t1 = ta * ta; \
486     t2 = BN_UMULT_HIGH(ta,ta); \
487     c0 += t1; t2 += (c0<t1)?1:0; \
488     c1 += t2; c2 += (c1<t2)?1:0; \
489     }
490    
491     #define sqr_add_c2(a,i,j,c0,c1,c2) \
492     mul_add_c2((a)[i],(a)[j],c0,c1,c2)
493    
494     #else /* !BN_LLONG */
495     #define mul_add_c(a,b,c0,c1,c2) \
496     t1=LBITS(a); t2=HBITS(a); \
497     bl=LBITS(b); bh=HBITS(b); \
498     mul64(t1,t2,bl,bh); \
499     c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
500     c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
501    
502     #define mul_add_c2(a,b,c0,c1,c2) \
503     t1=LBITS(a); t2=HBITS(a); \
504     bl=LBITS(b); bh=HBITS(b); \
505     mul64(t1,t2,bl,bh); \
506     if (t2 & BN_TBIT) c2++; \
507     t2=(t2+t2)&BN_MASK2; \
508     if (t1 & BN_TBIT) t2++; \
509     t1=(t1+t1)&BN_MASK2; \
510     c0=(c0+t1)&BN_MASK2; \
511     if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
512     c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
513    
514     #define sqr_add_c(a,i,c0,c1,c2) \
515     sqr64(t1,t2,(a)[i]); \
516     c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
517     c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
518    
519     #define sqr_add_c2(a,i,j,c0,c1,c2) \
520     mul_add_c2((a)[i],(a)[j],c0,c1,c2)
521     #endif /* !BN_LLONG */
522    
523     void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
524     {
525     #ifdef BN_LLONG
526     BN_ULLONG t;
527     #else
528     BN_ULONG bl,bh;
529     #endif
530     BN_ULONG t1,t2;
531     BN_ULONG c1,c2,c3;
532    
533     c1=0;
534     c2=0;
535     c3=0;
536     mul_add_c(a[0],b[0],c1,c2,c3);
537     r[0]=c1;
538     c1=0;
539     mul_add_c(a[0],b[1],c2,c3,c1);
540     mul_add_c(a[1],b[0],c2,c3,c1);
541     r[1]=c2;
542     c2=0;
543     mul_add_c(a[2],b[0],c3,c1,c2);
544     mul_add_c(a[1],b[1],c3,c1,c2);
545     mul_add_c(a[0],b[2],c3,c1,c2);
546     r[2]=c3;
547     c3=0;
548     mul_add_c(a[0],b[3],c1,c2,c3);
549     mul_add_c(a[1],b[2],c1,c2,c3);
550     mul_add_c(a[2],b[1],c1,c2,c3);
551     mul_add_c(a[3],b[0],c1,c2,c3);
552     r[3]=c1;
553     c1=0;
554     mul_add_c(a[4],b[0],c2,c3,c1);
555     mul_add_c(a[3],b[1],c2,c3,c1);
556     mul_add_c(a[2],b[2],c2,c3,c1);
557     mul_add_c(a[1],b[3],c2,c3,c1);
558     mul_add_c(a[0],b[4],c2,c3,c1);
559     r[4]=c2;
560     c2=0;
561     mul_add_c(a[0],b[5],c3,c1,c2);
562     mul_add_c(a[1],b[4],c3,c1,c2);
563     mul_add_c(a[2],b[3],c3,c1,c2);
564     mul_add_c(a[3],b[2],c3,c1,c2);
565     mul_add_c(a[4],b[1],c3,c1,c2);
566     mul_add_c(a[5],b[0],c3,c1,c2);
567     r[5]=c3;
568     c3=0;
569     mul_add_c(a[6],b[0],c1,c2,c3);
570     mul_add_c(a[5],b[1],c1,c2,c3);
571     mul_add_c(a[4],b[2],c1,c2,c3);
572     mul_add_c(a[3],b[3],c1,c2,c3);
573     mul_add_c(a[2],b[4],c1,c2,c3);
574     mul_add_c(a[1],b[5],c1,c2,c3);
575     mul_add_c(a[0],b[6],c1,c2,c3);
576     r[6]=c1;
577     c1=0;
578     mul_add_c(a[0],b[7],c2,c3,c1);
579     mul_add_c(a[1],b[6],c2,c3,c1);
580     mul_add_c(a[2],b[5],c2,c3,c1);
581     mul_add_c(a[3],b[4],c2,c3,c1);
582     mul_add_c(a[4],b[3],c2,c3,c1);
583     mul_add_c(a[5],b[2],c2,c3,c1);
584     mul_add_c(a[6],b[1],c2,c3,c1);
585     mul_add_c(a[7],b[0],c2,c3,c1);
586     r[7]=c2;
587     c2=0;
588     mul_add_c(a[7],b[1],c3,c1,c2);
589     mul_add_c(a[6],b[2],c3,c1,c2);
590     mul_add_c(a[5],b[3],c3,c1,c2);
591     mul_add_c(a[4],b[4],c3,c1,c2);
592     mul_add_c(a[3],b[5],c3,c1,c2);
593     mul_add_c(a[2],b[6],c3,c1,c2);
594     mul_add_c(a[1],b[7],c3,c1,c2);
595     r[8]=c3;
596     c3=0;
597     mul_add_c(a[2],b[7],c1,c2,c3);
598     mul_add_c(a[3],b[6],c1,c2,c3);
599     mul_add_c(a[4],b[5],c1,c2,c3);
600     mul_add_c(a[5],b[4],c1,c2,c3);
601     mul_add_c(a[6],b[3],c1,c2,c3);
602     mul_add_c(a[7],b[2],c1,c2,c3);
603     r[9]=c1;
604     c1=0;
605     mul_add_c(a[7],b[3],c2,c3,c1);
606     mul_add_c(a[6],b[4],c2,c3,c1);
607     mul_add_c(a[5],b[5],c2,c3,c1);
608     mul_add_c(a[4],b[6],c2,c3,c1);
609     mul_add_c(a[3],b[7],c2,c3,c1);
610     r[10]=c2;
611     c2=0;
612     mul_add_c(a[4],b[7],c3,c1,c2);
613     mul_add_c(a[5],b[6],c3,c1,c2);
614     mul_add_c(a[6],b[5],c3,c1,c2);
615     mul_add_c(a[7],b[4],c3,c1,c2);
616     r[11]=c3;
617     c3=0;
618     mul_add_c(a[7],b[5],c1,c2,c3);
619     mul_add_c(a[6],b[6],c1,c2,c3);
620     mul_add_c(a[5],b[7],c1,c2,c3);
621     r[12]=c1;
622     c1=0;
623     mul_add_c(a[6],b[7],c2,c3,c1);
624     mul_add_c(a[7],b[6],c2,c3,c1);
625     r[13]=c2;
626     c2=0;
627     mul_add_c(a[7],b[7],c3,c1,c2);
628     r[14]=c3;
629     r[15]=c1;
630     }
631    
632     void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
633     {
634     #ifdef BN_LLONG
635     BN_ULLONG t;
636     #else
637     BN_ULONG bl,bh;
638     #endif
639     BN_ULONG t1,t2;
640     BN_ULONG c1,c2,c3;
641    
642     c1=0;
643     c2=0;
644     c3=0;
645     mul_add_c(a[0],b[0],c1,c2,c3);
646     r[0]=c1;
647     c1=0;
648     mul_add_c(a[0],b[1],c2,c3,c1);
649     mul_add_c(a[1],b[0],c2,c3,c1);
650     r[1]=c2;
651     c2=0;
652     mul_add_c(a[2],b[0],c3,c1,c2);
653     mul_add_c(a[1],b[1],c3,c1,c2);
654     mul_add_c(a[0],b[2],c3,c1,c2);
655     r[2]=c3;
656     c3=0;
657     mul_add_c(a[0],b[3],c1,c2,c3);
658     mul_add_c(a[1],b[2],c1,c2,c3);
659     mul_add_c(a[2],b[1],c1,c2,c3);
660     mul_add_c(a[3],b[0],c1,c2,c3);
661     r[3]=c1;
662     c1=0;
663     mul_add_c(a[3],b[1],c2,c3,c1);
664     mul_add_c(a[2],b[2],c2,c3,c1);
665     mul_add_c(a[1],b[3],c2,c3,c1);
666     r[4]=c2;
667     c2=0;
668     mul_add_c(a[2],b[3],c3,c1,c2);
669     mul_add_c(a[3],b[2],c3,c1,c2);
670     r[5]=c3;
671     c3=0;
672     mul_add_c(a[3],b[3],c1,c2,c3);
673     r[6]=c1;
674     r[7]=c2;
675     }
676    
677     void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
678     {
679     #ifdef BN_LLONG
680     BN_ULLONG t,tt;
681     #else
682     BN_ULONG bl,bh;
683     #endif
684     BN_ULONG t1,t2;
685     BN_ULONG c1,c2,c3;
686    
687     c1=0;
688     c2=0;
689     c3=0;
690     sqr_add_c(a,0,c1,c2,c3);
691     r[0]=c1;
692     c1=0;
693     sqr_add_c2(a,1,0,c2,c3,c1);
694     r[1]=c2;
695     c2=0;
696     sqr_add_c(a,1,c3,c1,c2);
697     sqr_add_c2(a,2,0,c3,c1,c2);
698     r[2]=c3;
699     c3=0;
700     sqr_add_c2(a,3,0,c1,c2,c3);
701     sqr_add_c2(a,2,1,c1,c2,c3);
702     r[3]=c1;
703     c1=0;
704     sqr_add_c(a,2,c2,c3,c1);
705     sqr_add_c2(a,3,1,c2,c3,c1);
706     sqr_add_c2(a,4,0,c2,c3,c1);
707     r[4]=c2;
708     c2=0;
709     sqr_add_c2(a,5,0,c3,c1,c2);
710     sqr_add_c2(a,4,1,c3,c1,c2);
711     sqr_add_c2(a,3,2,c3,c1,c2);
712     r[5]=c3;
713     c3=0;
714     sqr_add_c(a,3,c1,c2,c3);
715     sqr_add_c2(a,4,2,c1,c2,c3);
716     sqr_add_c2(a,5,1,c1,c2,c3);
717     sqr_add_c2(a,6,0,c1,c2,c3);
718     r[6]=c1;
719     c1=0;
720     sqr_add_c2(a,7,0,c2,c3,c1);
721     sqr_add_c2(a,6,1,c2,c3,c1);
722     sqr_add_c2(a,5,2,c2,c3,c1);
723     sqr_add_c2(a,4,3,c2,c3,c1);
724     r[7]=c2;
725     c2=0;
726     sqr_add_c(a,4,c3,c1,c2);
727     sqr_add_c2(a,5,3,c3,c1,c2);
728     sqr_add_c2(a,6,2,c3,c1,c2);
729     sqr_add_c2(a,7,1,c3,c1,c2);
730     r[8]=c3;
731     c3=0;
732     sqr_add_c2(a,7,2,c1,c2,c3);
733     sqr_add_c2(a,6,3,c1,c2,c3);
734     sqr_add_c2(a,5,4,c1,c2,c3);
735     r[9]=c1;
736     c1=0;
737     sqr_add_c(a,5,c2,c3,c1);
738     sqr_add_c2(a,6,4,c2,c3,c1);
739     sqr_add_c2(a,7,3,c2,c3,c1);
740     r[10]=c2;
741     c2=0;
742     sqr_add_c2(a,7,4,c3,c1,c2);
743     sqr_add_c2(a,6,5,c3,c1,c2);
744     r[11]=c3;
745     c3=0;
746     sqr_add_c(a,6,c1,c2,c3);
747     sqr_add_c2(a,7,5,c1,c2,c3);
748     r[12]=c1;
749     c1=0;
750     sqr_add_c2(a,7,6,c2,c3,c1);
751     r[13]=c2;
752     c2=0;
753     sqr_add_c(a,7,c3,c1,c2);
754     r[14]=c3;
755     r[15]=c1;
756     }
757    
758     void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
759     {
760     #ifdef BN_LLONG
761     BN_ULLONG t,tt;
762     #else
763     BN_ULONG bl,bh;
764     #endif
765     BN_ULONG t1,t2;
766     BN_ULONG c1,c2,c3;
767    
768     c1=0;
769     c2=0;
770     c3=0;
771     sqr_add_c(a,0,c1,c2,c3);
772     r[0]=c1;
773     c1=0;
774     sqr_add_c2(a,1,0,c2,c3,c1);
775     r[1]=c2;
776     c2=0;
777     sqr_add_c(a,1,c3,c1,c2);
778     sqr_add_c2(a,2,0,c3,c1,c2);
779     r[2]=c3;
780     c3=0;
781     sqr_add_c2(a,3,0,c1,c2,c3);
782     sqr_add_c2(a,2,1,c1,c2,c3);
783     r[3]=c1;
784     c1=0;
785     sqr_add_c(a,2,c2,c3,c1);
786     sqr_add_c2(a,3,1,c2,c3,c1);
787     r[4]=c2;
788     c2=0;
789     sqr_add_c2(a,3,2,c3,c1,c2);
790     r[5]=c3;
791     c3=0;
792     sqr_add_c(a,3,c1,c2,c3);
793     r[6]=c1;
794     r[7]=c2;
795     }
796     #else /* !BN_MUL_COMBA */
797    
798     /* hmm... is it faster just to do a multiply? */
799     #undef bn_sqr_comba4
800     void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
801     {
802     BN_ULONG t[8];
803     bn_sqr_normal(r,a,4,t);
804     }
805    
806     #undef bn_sqr_comba8
807     void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
808     {
809     BN_ULONG t[16];
810     bn_sqr_normal(r,a,8,t);
811     }
812    
813     void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
814     {
815     r[4]=bn_mul_words( &(r[0]),a,4,b[0]);
816     r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]);
817     r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]);
818     r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]);
819     }
820    
821     void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
822     {
823     r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]);
824     r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
825     r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
826     r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
827     r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
828     r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
829     r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
830     r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
831     }
832    
833     #endif /* !BN_MUL_COMBA */

  ViewVC Help
Powered by ViewVC 1.1.26