1 |
matty |
32 |
/* crypto/rc4/rc4_enc.c */ |
2 |
|
|
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 |
matty |
11 |
* All rights reserved. |
4 |
|
|
* |
5 |
|
|
* This package is an SSL implementation written |
6 |
|
|
* by Eric Young (eay@cryptsoft.com). |
7 |
|
|
* The implementation was written so as to conform with Netscapes SSL. |
8 |
|
|
* |
9 |
|
|
* This library is free for commercial and non-commercial use as long as |
10 |
|
|
* the following conditions are aheared to. The following conditions |
11 |
|
|
* apply to all code found in this distribution, be it the RC4, RSA, |
12 |
|
|
* lhash, DES, etc., code; not just the SSL code. The SSL documentation |
13 |
|
|
* included with this distribution is covered by the same copyright terms |
14 |
|
|
* except that the holder is Tim Hudson (tjh@cryptsoft.com). |
15 |
|
|
* |
16 |
|
|
* Copyright remains Eric Young's, and as such any Copyright notices in |
17 |
|
|
* the code are not to be removed. |
18 |
|
|
* If this package is used in a product, Eric Young should be given attribution |
19 |
|
|
* as the author of the parts of the library used. |
20 |
|
|
* This can be in the form of a textual message at program startup or |
21 |
|
|
* in documentation (online or textual) provided with the package. |
22 |
|
|
* |
23 |
|
|
* Redistribution and use in source and binary forms, with or without |
24 |
|
|
* modification, are permitted provided that the following conditions |
25 |
|
|
* are met: |
26 |
|
|
* 1. Redistributions of source code must retain the copyright |
27 |
|
|
* notice, this list of conditions and the following disclaimer. |
28 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
29 |
|
|
* notice, this list of conditions and the following disclaimer in the |
30 |
|
|
* documentation and/or other materials provided with the distribution. |
31 |
|
|
* 3. All advertising materials mentioning features or use of this software |
32 |
|
|
* must display the following acknowledgement: |
33 |
|
|
* "This product includes cryptographic software written by |
34 |
|
|
* Eric Young (eay@cryptsoft.com)" |
35 |
|
|
* The word 'cryptographic' can be left out if the rouines from the library |
36 |
|
|
* being used are not cryptographic related :-). |
37 |
|
|
* 4. If you include any Windows specific code (or a derivative thereof) from |
38 |
|
|
* the apps directory (application code) you must include an acknowledgement: |
39 |
|
|
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" |
40 |
|
|
* |
41 |
|
|
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND |
42 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
43 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
44 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
45 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
46 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
47 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
48 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
49 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
50 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
51 |
|
|
* SUCH DAMAGE. |
52 |
|
|
* |
53 |
|
|
* The licence and distribution terms for any publically available version or |
54 |
|
|
* derivative of this code cannot be changed. i.e. this code cannot simply be |
55 |
|
|
* copied and put under another distribution licence |
56 |
|
|
* [including the GNU Public Licence.] |
57 |
|
|
*/ |
58 |
|
|
|
59 |
|
|
#include "rc4.h" |
60 |
|
|
|
61 |
|
|
/* RC4 as implemented from a posting from |
62 |
|
|
* Newsgroups: sci.crypt |
63 |
|
|
* From: sterndark@netcom.com (David Sterndark) |
64 |
|
|
* Subject: RC4 Algorithm revealed. |
65 |
|
|
* Message-ID: <sternCvKL4B.Hyy@netcom.com> |
66 |
|
|
* Date: Wed, 14 Sep 1994 06:35:31 GMT |
67 |
|
|
*/ |
68 |
|
|
|
69 |
matty |
32 |
void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata, |
70 |
|
|
unsigned char *outdata) |
71 |
matty |
11 |
{ |
72 |
|
|
register RC4_INT *d; |
73 |
|
|
register RC4_INT x,y,tx,ty; |
74 |
|
|
int i; |
75 |
|
|
|
76 |
|
|
x=key->x; |
77 |
|
|
y=key->y; |
78 |
|
|
d=key->data; |
79 |
|
|
|
80 |
matty |
32 |
#if defined(RC4_CHUNK) |
81 |
|
|
/* |
82 |
|
|
* The original reason for implementing this(*) was the fact that |
83 |
|
|
* pre-21164a Alpha CPUs don't have byte load/store instructions |
84 |
|
|
* and e.g. a byte store has to be done with 64-bit load, shift, |
85 |
|
|
* and, or and finally 64-bit store. Peaking data and operating |
86 |
|
|
* at natural word size made it possible to reduce amount of |
87 |
|
|
* instructions as well as to perform early read-ahead without |
88 |
|
|
* suffering from RAW (read-after-write) hazard. This resulted |
89 |
|
|
* in ~40%(**) performance improvement on 21064 box with gcc. |
90 |
|
|
* But it's not only Alpha users who win here:-) Thanks to the |
91 |
|
|
* early-n-wide read-ahead this implementation also exhibits |
92 |
|
|
* >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending |
93 |
|
|
* on sizeof(RC4_INT)). |
94 |
|
|
* |
95 |
|
|
* (*) "this" means code which recognizes the case when input |
96 |
|
|
* and output pointers appear to be aligned at natural CPU |
97 |
|
|
* word boundary |
98 |
|
|
* (**) i.e. according to 'apps/openssl speed rc4' benchmark, |
99 |
|
|
* crypto/rc4/rc4speed.c exhibits almost 70% speed-up... |
100 |
|
|
* |
101 |
|
|
* Cavets. |
102 |
|
|
* |
103 |
|
|
* - RC4_CHUNK="unsigned long long" should be a #1 choice for |
104 |
|
|
* UltraSPARC. Unfortunately gcc generates very slow code |
105 |
|
|
* (2.5-3 times slower than one generated by Sun's WorkShop |
106 |
|
|
* C) and therefore gcc (at least 2.95 and earlier) should |
107 |
|
|
* always be told that RC4_CHUNK="unsigned long". |
108 |
|
|
* |
109 |
|
|
* <appro@fy.chalmers.se> |
110 |
|
|
*/ |
111 |
|
|
|
112 |
|
|
# define RC4_STEP ( \ |
113 |
|
|
x=(x+1) &0xff, \ |
114 |
|
|
tx=d[x], \ |
115 |
|
|
y=(tx+y)&0xff, \ |
116 |
|
|
ty=d[y], \ |
117 |
|
|
d[y]=tx, \ |
118 |
|
|
d[x]=ty, \ |
119 |
|
|
(RC4_CHUNK)d[(tx+ty)&0xff]\ |
120 |
|
|
) |
121 |
|
|
|
122 |
|
|
if ( ( ((unsigned long)indata & (sizeof(RC4_CHUNK)-1)) | |
123 |
|
|
((unsigned long)outdata & (sizeof(RC4_CHUNK)-1)) ) == 0 ) |
124 |
|
|
{ |
125 |
|
|
RC4_CHUNK ichunk,otp; |
126 |
|
|
const union { long one; char little; } is_endian = {1}; |
127 |
|
|
|
128 |
|
|
/* |
129 |
|
|
* I reckon we can afford to implement both endian |
130 |
|
|
* cases and to decide which way to take at run-time |
131 |
|
|
* because the machine code appears to be very compact |
132 |
|
|
* and redundant 1-2KB is perfectly tolerable (i.e. |
133 |
|
|
* in case the compiler fails to eliminate it:-). By |
134 |
|
|
* suggestion from Terrel Larson <terr@terralogic.net> |
135 |
|
|
* who also stands for the is_endian union:-) |
136 |
|
|
* |
137 |
|
|
* Special notes. |
138 |
|
|
* |
139 |
|
|
* - is_endian is declared automatic as doing otherwise |
140 |
|
|
* (declaring static) prevents gcc from eliminating |
141 |
|
|
* the redundant code; |
142 |
|
|
* - compilers (those I've tried) don't seem to have |
143 |
|
|
* problems eliminating either the operators guarded |
144 |
|
|
* by "if (sizeof(RC4_CHUNK)==8)" or the condition |
145 |
|
|
* expressions themselves so I've got 'em to replace |
146 |
|
|
* corresponding #ifdefs from the previous version; |
147 |
|
|
* - I chose to let the redundant switch cases when |
148 |
|
|
* sizeof(RC4_CHUNK)!=8 be (were also #ifdefed |
149 |
|
|
* before); |
150 |
|
|
* - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in |
151 |
|
|
* [LB]ESHFT guards against "shift is out of range" |
152 |
|
|
* warnings when sizeof(RC4_CHUNK)!=8 |
153 |
|
|
* |
154 |
|
|
* <appro@fy.chalmers.se> |
155 |
|
|
*/ |
156 |
|
|
if (!is_endian.little) |
157 |
|
|
{ /* BIG-ENDIAN CASE */ |
158 |
|
|
# define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1)) |
159 |
|
|
for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK)) |
160 |
|
|
{ |
161 |
|
|
ichunk = *(RC4_CHUNK *)indata; |
162 |
|
|
otp = RC4_STEP<<BESHFT(0); |
163 |
|
|
otp |= RC4_STEP<<BESHFT(1); |
164 |
|
|
otp |= RC4_STEP<<BESHFT(2); |
165 |
|
|
otp |= RC4_STEP<<BESHFT(3); |
166 |
|
|
if (sizeof(RC4_CHUNK)==8) |
167 |
|
|
{ |
168 |
|
|
otp |= RC4_STEP<<BESHFT(4); |
169 |
|
|
otp |= RC4_STEP<<BESHFT(5); |
170 |
|
|
otp |= RC4_STEP<<BESHFT(6); |
171 |
|
|
otp |= RC4_STEP<<BESHFT(7); |
172 |
|
|
} |
173 |
|
|
*(RC4_CHUNK *)outdata = otp^ichunk; |
174 |
|
|
indata += sizeof(RC4_CHUNK); |
175 |
|
|
outdata += sizeof(RC4_CHUNK); |
176 |
|
|
} |
177 |
|
|
if (len) |
178 |
|
|
{ |
179 |
|
|
RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk; |
180 |
|
|
|
181 |
|
|
ichunk = *(RC4_CHUNK *)indata; |
182 |
|
|
ochunk = *(RC4_CHUNK *)outdata; |
183 |
|
|
otp = 0; |
184 |
|
|
i = BESHFT(0); |
185 |
|
|
mask <<= (sizeof(RC4_CHUNK)-len)<<3; |
186 |
|
|
switch (len&(sizeof(RC4_CHUNK)-1)) |
187 |
|
|
{ |
188 |
|
|
case 7: otp = RC4_STEP<<i, i-=8; |
189 |
|
|
case 6: otp |= RC4_STEP<<i, i-=8; |
190 |
|
|
case 5: otp |= RC4_STEP<<i, i-=8; |
191 |
|
|
case 4: otp |= RC4_STEP<<i, i-=8; |
192 |
|
|
case 3: otp |= RC4_STEP<<i, i-=8; |
193 |
|
|
case 2: otp |= RC4_STEP<<i, i-=8; |
194 |
|
|
case 1: otp |= RC4_STEP<<i, i-=8; |
195 |
|
|
case 0: ; /* |
196 |
|
|
* it's never the case, |
197 |
|
|
* but it has to be here |
198 |
|
|
* for ultrix? |
199 |
|
|
*/ |
200 |
|
|
} |
201 |
|
|
ochunk &= ~mask; |
202 |
|
|
ochunk |= (otp^ichunk) & mask; |
203 |
|
|
*(RC4_CHUNK *)outdata = ochunk; |
204 |
|
|
} |
205 |
|
|
key->x=x; |
206 |
|
|
key->y=y; |
207 |
|
|
return; |
208 |
|
|
} |
209 |
|
|
else |
210 |
|
|
{ /* LITTLE-ENDIAN CASE */ |
211 |
|
|
# define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1)) |
212 |
|
|
for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK)) |
213 |
|
|
{ |
214 |
|
|
ichunk = *(RC4_CHUNK *)indata; |
215 |
|
|
otp = RC4_STEP; |
216 |
|
|
otp |= RC4_STEP<<8; |
217 |
|
|
otp |= RC4_STEP<<16; |
218 |
|
|
otp |= RC4_STEP<<24; |
219 |
|
|
if (sizeof(RC4_CHUNK)==8) |
220 |
|
|
{ |
221 |
|
|
otp |= RC4_STEP<<LESHFT(4); |
222 |
|
|
otp |= RC4_STEP<<LESHFT(5); |
223 |
|
|
otp |= RC4_STEP<<LESHFT(6); |
224 |
|
|
otp |= RC4_STEP<<LESHFT(7); |
225 |
|
|
} |
226 |
|
|
*(RC4_CHUNK *)outdata = otp^ichunk; |
227 |
|
|
indata += sizeof(RC4_CHUNK); |
228 |
|
|
outdata += sizeof(RC4_CHUNK); |
229 |
|
|
} |
230 |
|
|
if (len) |
231 |
|
|
{ |
232 |
|
|
RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk; |
233 |
|
|
|
234 |
|
|
ichunk = *(RC4_CHUNK *)indata; |
235 |
|
|
ochunk = *(RC4_CHUNK *)outdata; |
236 |
|
|
otp = 0; |
237 |
|
|
i = 0; |
238 |
|
|
mask >>= (sizeof(RC4_CHUNK)-len)<<3; |
239 |
|
|
switch (len&(sizeof(RC4_CHUNK)-1)) |
240 |
|
|
{ |
241 |
|
|
case 7: otp = RC4_STEP, i+=8; |
242 |
|
|
case 6: otp |= RC4_STEP<<i, i+=8; |
243 |
|
|
case 5: otp |= RC4_STEP<<i, i+=8; |
244 |
|
|
case 4: otp |= RC4_STEP<<i, i+=8; |
245 |
|
|
case 3: otp |= RC4_STEP<<i, i+=8; |
246 |
|
|
case 2: otp |= RC4_STEP<<i, i+=8; |
247 |
|
|
case 1: otp |= RC4_STEP<<i, i+=8; |
248 |
|
|
case 0: ; /* |
249 |
|
|
* it's never the case, |
250 |
|
|
* but it has to be here |
251 |
|
|
* for ultrix? |
252 |
|
|
*/ |
253 |
|
|
} |
254 |
|
|
ochunk &= ~mask; |
255 |
|
|
ochunk |= (otp^ichunk) & mask; |
256 |
|
|
*(RC4_CHUNK *)outdata = ochunk; |
257 |
|
|
} |
258 |
|
|
key->x=x; |
259 |
|
|
key->y=y; |
260 |
|
|
return; |
261 |
|
|
} |
262 |
|
|
} |
263 |
|
|
#endif |
264 |
matty |
11 |
#define LOOP(in,out) \ |
265 |
|
|
x=((x+1)&0xff); \ |
266 |
|
|
tx=d[x]; \ |
267 |
|
|
y=(tx+y)&0xff; \ |
268 |
|
|
d[x]=ty=d[y]; \ |
269 |
|
|
d[y]=tx; \ |
270 |
|
|
(out) = d[(tx+ty)&0xff]^ (in); |
271 |
|
|
|
272 |
|
|
#ifndef RC4_INDEX |
273 |
|
|
#define RC4_LOOP(a,b,i) LOOP(*((a)++),*((b)++)) |
274 |
|
|
#else |
275 |
|
|
#define RC4_LOOP(a,b,i) LOOP(a[i],b[i]) |
276 |
|
|
#endif |
277 |
|
|
|
278 |
|
|
i=(int)(len>>3L); |
279 |
|
|
if (i) |
280 |
|
|
{ |
281 |
|
|
for (;;) |
282 |
|
|
{ |
283 |
|
|
RC4_LOOP(indata,outdata,0); |
284 |
|
|
RC4_LOOP(indata,outdata,1); |
285 |
|
|
RC4_LOOP(indata,outdata,2); |
286 |
|
|
RC4_LOOP(indata,outdata,3); |
287 |
|
|
RC4_LOOP(indata,outdata,4); |
288 |
|
|
RC4_LOOP(indata,outdata,5); |
289 |
|
|
RC4_LOOP(indata,outdata,6); |
290 |
|
|
RC4_LOOP(indata,outdata,7); |
291 |
|
|
#ifdef RC4_INDEX |
292 |
|
|
indata+=8; |
293 |
|
|
outdata+=8; |
294 |
|
|
#endif |
295 |
|
|
if (--i == 0) break; |
296 |
|
|
} |
297 |
|
|
} |
298 |
|
|
i=(int)len&0x07; |
299 |
|
|
if (i) |
300 |
|
|
{ |
301 |
|
|
for (;;) |
302 |
|
|
{ |
303 |
|
|
RC4_LOOP(indata,outdata,0); if (--i == 0) break; |
304 |
|
|
RC4_LOOP(indata,outdata,1); if (--i == 0) break; |
305 |
|
|
RC4_LOOP(indata,outdata,2); if (--i == 0) break; |
306 |
|
|
RC4_LOOP(indata,outdata,3); if (--i == 0) break; |
307 |
|
|
RC4_LOOP(indata,outdata,4); if (--i == 0) break; |
308 |
|
|
RC4_LOOP(indata,outdata,5); if (--i == 0) break; |
309 |
|
|
RC4_LOOP(indata,outdata,6); if (--i == 0) break; |
310 |
|
|
} |
311 |
|
|
} |
312 |
|
|
key->x=x; |
313 |
|
|
key->y=y; |
314 |
|
|
} |