1 |
/* crypto/rc4/rc4_enc.org */ |
/* crypto/rc4/rc4_enc.c */ |
2 |
/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) |
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 |
* All rights reserved. |
* All rights reserved. |
4 |
* |
* |
5 |
* This package is an SSL implementation written |
* This package is an SSL implementation written |
57 |
*/ |
*/ |
58 |
|
|
59 |
#include "rc4.h" |
#include "rc4.h" |
|
#include "rc4_locl.h" |
|
60 |
|
|
61 |
/* RC4 as implemented from a posting from |
/* RC4 as implemented from a posting from |
62 |
* Newsgroups: sci.crypt |
* Newsgroups: sci.crypt |
66 |
* Date: Wed, 14 Sep 1994 06:35:31 GMT |
* Date: Wed, 14 Sep 1994 06:35:31 GMT |
67 |
*/ |
*/ |
68 |
|
|
69 |
void RC4(key, len, indata, outdata) |
void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata, |
70 |
RC4_KEY *key; |
unsigned char *outdata) |
|
unsigned long len; |
|
|
unsigned char *indata; |
|
|
unsigned char *outdata; |
|
71 |
{ |
{ |
72 |
register RC4_INT *d; |
register RC4_INT *d; |
73 |
register RC4_INT x,y,tx,ty; |
register RC4_INT x,y,tx,ty; |
77 |
y=key->y; |
y=key->y; |
78 |
d=key->data; |
d=key->data; |
79 |
|
|
80 |
|
#if defined(RC4_CHUNK) |
81 |
|
/* |
82 |
|
* The original reason for implementing this(*) was the fact that |
83 |
|
* pre-21164a Alpha CPUs don't have byte load/store instructions |
84 |
|
* and e.g. a byte store has to be done with 64-bit load, shift, |
85 |
|
* and, or and finally 64-bit store. Peaking data and operating |
86 |
|
* at natural word size made it possible to reduce amount of |
87 |
|
* instructions as well as to perform early read-ahead without |
88 |
|
* suffering from RAW (read-after-write) hazard. This resulted |
89 |
|
* in ~40%(**) performance improvement on 21064 box with gcc. |
90 |
|
* But it's not only Alpha users who win here:-) Thanks to the |
91 |
|
* early-n-wide read-ahead this implementation also exhibits |
92 |
|
* >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending |
93 |
|
* on sizeof(RC4_INT)). |
94 |
|
* |
95 |
|
* (*) "this" means code which recognizes the case when input |
96 |
|
* and output pointers appear to be aligned at natural CPU |
97 |
|
* word boundary |
98 |
|
* (**) i.e. according to 'apps/openssl speed rc4' benchmark, |
99 |
|
* crypto/rc4/rc4speed.c exhibits almost 70% speed-up... |
100 |
|
* |
101 |
|
* Cavets. |
102 |
|
* |
103 |
|
* - RC4_CHUNK="unsigned long long" should be a #1 choice for |
104 |
|
* UltraSPARC. Unfortunately gcc generates very slow code |
105 |
|
* (2.5-3 times slower than one generated by Sun's WorkShop |
106 |
|
* C) and therefore gcc (at least 2.95 and earlier) should |
107 |
|
* always be told that RC4_CHUNK="unsigned long". |
108 |
|
* |
109 |
|
* <appro@fy.chalmers.se> |
110 |
|
*/ |
111 |
|
|
112 |
|
# define RC4_STEP ( \ |
113 |
|
x=(x+1) &0xff, \ |
114 |
|
tx=d[x], \ |
115 |
|
y=(tx+y)&0xff, \ |
116 |
|
ty=d[y], \ |
117 |
|
d[y]=tx, \ |
118 |
|
d[x]=ty, \ |
119 |
|
(RC4_CHUNK)d[(tx+ty)&0xff]\ |
120 |
|
) |
121 |
|
|
122 |
|
if ( ( ((unsigned long)indata & (sizeof(RC4_CHUNK)-1)) | |
123 |
|
((unsigned long)outdata & (sizeof(RC4_CHUNK)-1)) ) == 0 ) |
124 |
|
{ |
125 |
|
RC4_CHUNK ichunk,otp; |
126 |
|
const union { long one; char little; } is_endian = {1}; |
127 |
|
|
128 |
|
/* |
129 |
|
* I reckon we can afford to implement both endian |
130 |
|
* cases and to decide which way to take at run-time |
131 |
|
* because the machine code appears to be very compact |
132 |
|
* and redundant 1-2KB is perfectly tolerable (i.e. |
133 |
|
* in case the compiler fails to eliminate it:-). By |
134 |
|
* suggestion from Terrel Larson <terr@terralogic.net> |
135 |
|
* who also stands for the is_endian union:-) |
136 |
|
* |
137 |
|
* Special notes. |
138 |
|
* |
139 |
|
* - is_endian is declared automatic as doing otherwise |
140 |
|
* (declaring static) prevents gcc from eliminating |
141 |
|
* the redundant code; |
142 |
|
* - compilers (those I've tried) don't seem to have |
143 |
|
* problems eliminating either the operators guarded |
144 |
|
* by "if (sizeof(RC4_CHUNK)==8)" or the condition |
145 |
|
* expressions themselves so I've got 'em to replace |
146 |
|
* corresponding #ifdefs from the previous version; |
147 |
|
* - I chose to let the redundant switch cases when |
148 |
|
* sizeof(RC4_CHUNK)!=8 be (were also #ifdefed |
149 |
|
* before); |
150 |
|
* - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in |
151 |
|
* [LB]ESHFT guards against "shift is out of range" |
152 |
|
* warnings when sizeof(RC4_CHUNK)!=8 |
153 |
|
* |
154 |
|
* <appro@fy.chalmers.se> |
155 |
|
*/ |
156 |
|
if (!is_endian.little) |
157 |
|
{ /* BIG-ENDIAN CASE */ |
158 |
|
# define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1)) |
159 |
|
for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK)) |
160 |
|
{ |
161 |
|
ichunk = *(RC4_CHUNK *)indata; |
162 |
|
otp = RC4_STEP<<BESHFT(0); |
163 |
|
otp |= RC4_STEP<<BESHFT(1); |
164 |
|
otp |= RC4_STEP<<BESHFT(2); |
165 |
|
otp |= RC4_STEP<<BESHFT(3); |
166 |
|
if (sizeof(RC4_CHUNK)==8) |
167 |
|
{ |
168 |
|
otp |= RC4_STEP<<BESHFT(4); |
169 |
|
otp |= RC4_STEP<<BESHFT(5); |
170 |
|
otp |= RC4_STEP<<BESHFT(6); |
171 |
|
otp |= RC4_STEP<<BESHFT(7); |
172 |
|
} |
173 |
|
*(RC4_CHUNK *)outdata = otp^ichunk; |
174 |
|
indata += sizeof(RC4_CHUNK); |
175 |
|
outdata += sizeof(RC4_CHUNK); |
176 |
|
} |
177 |
|
if (len) |
178 |
|
{ |
179 |
|
RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk; |
180 |
|
|
181 |
|
ichunk = *(RC4_CHUNK *)indata; |
182 |
|
ochunk = *(RC4_CHUNK *)outdata; |
183 |
|
otp = 0; |
184 |
|
i = BESHFT(0); |
185 |
|
mask <<= (sizeof(RC4_CHUNK)-len)<<3; |
186 |
|
switch (len&(sizeof(RC4_CHUNK)-1)) |
187 |
|
{ |
188 |
|
case 7: otp = RC4_STEP<<i, i-=8; |
189 |
|
case 6: otp |= RC4_STEP<<i, i-=8; |
190 |
|
case 5: otp |= RC4_STEP<<i, i-=8; |
191 |
|
case 4: otp |= RC4_STEP<<i, i-=8; |
192 |
|
case 3: otp |= RC4_STEP<<i, i-=8; |
193 |
|
case 2: otp |= RC4_STEP<<i, i-=8; |
194 |
|
case 1: otp |= RC4_STEP<<i, i-=8; |
195 |
|
case 0: ; /* |
196 |
|
* it's never the case, |
197 |
|
* but it has to be here |
198 |
|
* for ultrix? |
199 |
|
*/ |
200 |
|
} |
201 |
|
ochunk &= ~mask; |
202 |
|
ochunk |= (otp^ichunk) & mask; |
203 |
|
*(RC4_CHUNK *)outdata = ochunk; |
204 |
|
} |
205 |
|
key->x=x; |
206 |
|
key->y=y; |
207 |
|
return; |
208 |
|
} |
209 |
|
else |
210 |
|
{ /* LITTLE-ENDIAN CASE */ |
211 |
|
# define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1)) |
212 |
|
for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK)) |
213 |
|
{ |
214 |
|
ichunk = *(RC4_CHUNK *)indata; |
215 |
|
otp = RC4_STEP; |
216 |
|
otp |= RC4_STEP<<8; |
217 |
|
otp |= RC4_STEP<<16; |
218 |
|
otp |= RC4_STEP<<24; |
219 |
|
if (sizeof(RC4_CHUNK)==8) |
220 |
|
{ |
221 |
|
otp |= RC4_STEP<<LESHFT(4); |
222 |
|
otp |= RC4_STEP<<LESHFT(5); |
223 |
|
otp |= RC4_STEP<<LESHFT(6); |
224 |
|
otp |= RC4_STEP<<LESHFT(7); |
225 |
|
} |
226 |
|
*(RC4_CHUNK *)outdata = otp^ichunk; |
227 |
|
indata += sizeof(RC4_CHUNK); |
228 |
|
outdata += sizeof(RC4_CHUNK); |
229 |
|
} |
230 |
|
if (len) |
231 |
|
{ |
232 |
|
RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk; |
233 |
|
|
234 |
|
ichunk = *(RC4_CHUNK *)indata; |
235 |
|
ochunk = *(RC4_CHUNK *)outdata; |
236 |
|
otp = 0; |
237 |
|
i = 0; |
238 |
|
mask >>= (sizeof(RC4_CHUNK)-len)<<3; |
239 |
|
switch (len&(sizeof(RC4_CHUNK)-1)) |
240 |
|
{ |
241 |
|
case 7: otp = RC4_STEP, i+=8; |
242 |
|
case 6: otp |= RC4_STEP<<i, i+=8; |
243 |
|
case 5: otp |= RC4_STEP<<i, i+=8; |
244 |
|
case 4: otp |= RC4_STEP<<i, i+=8; |
245 |
|
case 3: otp |= RC4_STEP<<i, i+=8; |
246 |
|
case 2: otp |= RC4_STEP<<i, i+=8; |
247 |
|
case 1: otp |= RC4_STEP<<i, i+=8; |
248 |
|
case 0: ; /* |
249 |
|
* it's never the case, |
250 |
|
* but it has to be here |
251 |
|
* for ultrix? |
252 |
|
*/ |
253 |
|
} |
254 |
|
ochunk &= ~mask; |
255 |
|
ochunk |= (otp^ichunk) & mask; |
256 |
|
*(RC4_CHUNK *)outdata = ochunk; |
257 |
|
} |
258 |
|
key->x=x; |
259 |
|
key->y=y; |
260 |
|
return; |
261 |
|
} |
262 |
|
} |
263 |
|
#endif |
264 |
#define LOOP(in,out) \ |
#define LOOP(in,out) \ |
265 |
x=((x+1)&0xff); \ |
x=((x+1)&0xff); \ |
266 |
tx=d[x]; \ |
tx=d[x]; \ |
275 |
#define RC4_LOOP(a,b,i) LOOP(a[i],b[i]) |
#define RC4_LOOP(a,b,i) LOOP(a[i],b[i]) |
276 |
#endif |
#endif |
277 |
|
|
|
i= -(int)len; |
|
278 |
i=(int)(len>>3L); |
i=(int)(len>>3L); |
279 |
if (i) |
if (i) |
280 |
{ |
{ |