aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/rc4/rc4_enc.c
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/rc4/rc4_enc.c')
-rw-r--r--crypto/rc4/rc4_enc.c491
1 files changed, 255 insertions, 236 deletions
diff --git a/crypto/rc4/rc4_enc.c b/crypto/rc4/rc4_enc.c
index 8c4fc6c7a3da..6ebd54d46cad 100644
--- a/crypto/rc4/rc4_enc.c
+++ b/crypto/rc4/rc4_enc.c
@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
- *
+ *
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
- *
+ *
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
+ * 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
- *
+ *
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
+ *
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -59,7 +59,8 @@
#include <openssl/rc4.h>
#include "rc4_locl.h"
-/* RC4 as implemented from a posting from
+/*-
+ * RC4 as implemented from a posting from
* Newsgroups: sci.crypt
* From: sterndark@netcom.com (David Sterndark)
* Subject: RC4 Algorithm revealed.
@@ -68,248 +69,266 @@
*/
void RC4(RC4_KEY *key, size_t len, const unsigned char *indata,
- unsigned char *outdata)
- {
- register RC4_INT *d;
- register RC4_INT x,y,tx,ty;
- size_t i;
-
- x=key->x;
- y=key->y;
- d=key->data;
+ unsigned char *outdata)
+{
+ register RC4_INT *d;
+ register RC4_INT x, y, tx, ty;
+ size_t i;
+
+ x = key->x;
+ y = key->y;
+ d = key->data;
#if defined(RC4_CHUNK)
- /*
- * The original reason for implementing this(*) was the fact that
- * pre-21164a Alpha CPUs don't have byte load/store instructions
- * and e.g. a byte store has to be done with 64-bit load, shift,
- * and, or and finally 64-bit store. Peaking data and operating
- * at natural word size made it possible to reduce amount of
- * instructions as well as to perform early read-ahead without
- * suffering from RAW (read-after-write) hazard. This resulted
- * in ~40%(**) performance improvement on 21064 box with gcc.
- * But it's not only Alpha users who win here:-) Thanks to the
- * early-n-wide read-ahead this implementation also exhibits
- * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending
- * on sizeof(RC4_INT)).
- *
- * (*) "this" means code which recognizes the case when input
- * and output pointers appear to be aligned at natural CPU
- * word boundary
- * (**) i.e. according to 'apps/openssl speed rc4' benchmark,
- * crypto/rc4/rc4speed.c exhibits almost 70% speed-up...
- *
- * Cavets.
- *
- * - RC4_CHUNK="unsigned long long" should be a #1 choice for
- * UltraSPARC. Unfortunately gcc generates very slow code
- * (2.5-3 times slower than one generated by Sun's WorkShop
- * C) and therefore gcc (at least 2.95 and earlier) should
- * always be told that RC4_CHUNK="unsigned long".
- *
- * <appro@fy.chalmers.se>
- */
+ /*-
+ * The original reason for implementing this(*) was the fact that
+ * pre-21164a Alpha CPUs don't have byte load/store instructions
+ * and e.g. a byte store has to be done with 64-bit load, shift,
+ * and, or and finally 64-bit store. Peaking data and operating
+ * at natural word size made it possible to reduce amount of
+ * instructions as well as to perform early read-ahead without
+ * suffering from RAW (read-after-write) hazard. This resulted
+ * in ~40%(**) performance improvement on 21064 box with gcc.
+ * But it's not only Alpha users who win here:-) Thanks to the
+ * early-n-wide read-ahead this implementation also exhibits
+ * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending
+ * on sizeof(RC4_INT)).
+ *
+ * (*) "this" means code which recognizes the case when input
+ * and output pointers appear to be aligned at natural CPU
+ * word boundary
+ * (**) i.e. according to 'apps/openssl speed rc4' benchmark,
+ * crypto/rc4/rc4speed.c exhibits almost 70% speed-up...
+ *
+ * Cavets.
+ *
+ * - RC4_CHUNK="unsigned long long" should be a #1 choice for
+ * UltraSPARC. Unfortunately gcc generates very slow code
+ * (2.5-3 times slower than one generated by Sun's WorkShop
+ * C) and therefore gcc (at least 2.95 and earlier) should
+ * always be told that RC4_CHUNK="unsigned long".
+ *
+ * <appro@fy.chalmers.se>
+ */
-# define RC4_STEP ( \
- x=(x+1) &0xff, \
- tx=d[x], \
- y=(tx+y)&0xff, \
- ty=d[y], \
- d[y]=tx, \
- d[x]=ty, \
- (RC4_CHUNK)d[(tx+ty)&0xff]\
- )
+# define RC4_STEP ( \
+ x=(x+1) &0xff, \
+ tx=d[x], \
+ y=(tx+y)&0xff, \
+ ty=d[y], \
+ d[y]=tx, \
+ d[x]=ty, \
+ (RC4_CHUNK)d[(tx+ty)&0xff]\
+ )
- if ( ( ((size_t)indata & (sizeof(RC4_CHUNK)-1)) |
- ((size_t)outdata & (sizeof(RC4_CHUNK)-1)) ) == 0 )
- {
- RC4_CHUNK ichunk,otp;
- const union { long one; char little; } is_endian = {1};
+ if ((((size_t)indata & (sizeof(RC4_CHUNK) - 1)) |
+ ((size_t)outdata & (sizeof(RC4_CHUNK) - 1))) == 0) {
+ RC4_CHUNK ichunk, otp;
+ const union {
+ long one;
+ char little;
+ } is_endian = {
+ 1
+ };
- /*
- * I reckon we can afford to implement both endian
- * cases and to decide which way to take at run-time
- * because the machine code appears to be very compact
- * and redundant 1-2KB is perfectly tolerable (i.e.
- * in case the compiler fails to eliminate it:-). By
- * suggestion from Terrel Larson <terr@terralogic.net>
- * who also stands for the is_endian union:-)
- *
- * Special notes.
- *
- * - is_endian is declared automatic as doing otherwise
- * (declaring static) prevents gcc from eliminating
- * the redundant code;
- * - compilers (those I've tried) don't seem to have
- * problems eliminating either the operators guarded
- * by "if (sizeof(RC4_CHUNK)==8)" or the condition
- * expressions themselves so I've got 'em to replace
- * corresponding #ifdefs from the previous version;
- * - I chose to let the redundant switch cases when
- * sizeof(RC4_CHUNK)!=8 be (were also #ifdefed
- * before);
- * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in
- * [LB]ESHFT guards against "shift is out of range"
- * warnings when sizeof(RC4_CHUNK)!=8
- *
- * <appro@fy.chalmers.se>
- */
- if (!is_endian.little)
- { /* BIG-ENDIAN CASE */
-# define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1))
- for (;len&(0-sizeof(RC4_CHUNK));len-=sizeof(RC4_CHUNK))
- {
- ichunk = *(RC4_CHUNK *)indata;
- otp = RC4_STEP<<BESHFT(0);
- otp |= RC4_STEP<<BESHFT(1);
- otp |= RC4_STEP<<BESHFT(2);
- otp |= RC4_STEP<<BESHFT(3);
- if (sizeof(RC4_CHUNK)==8)
- {
- otp |= RC4_STEP<<BESHFT(4);
- otp |= RC4_STEP<<BESHFT(5);
- otp |= RC4_STEP<<BESHFT(6);
- otp |= RC4_STEP<<BESHFT(7);
- }
- *(RC4_CHUNK *)outdata = otp^ichunk;
- indata += sizeof(RC4_CHUNK);
- outdata += sizeof(RC4_CHUNK);
- }
- if (len)
- {
- RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk;
+ /*-
+ * I reckon we can afford to implement both endian
+ * cases and to decide which way to take at run-time
+ * because the machine code appears to be very compact
+ * and redundant 1-2KB is perfectly tolerable (i.e.
+ * in case the compiler fails to eliminate it:-). By
+ * suggestion from Terrel Larson <terr@terralogic.net>
+ * who also stands for the is_endian union:-)
+ *
+ * Special notes.
+ *
+ * - is_endian is declared automatic as doing otherwise
+ * (declaring static) prevents gcc from eliminating
+ * the redundant code;
+ * - compilers (those I've tried) don't seem to have
+ * problems eliminating either the operators guarded
+ * by "if (sizeof(RC4_CHUNK)==8)" or the condition
+ * expressions themselves so I've got 'em to replace
+ * corresponding #ifdefs from the previous version;
+ * - I chose to let the redundant switch cases when
+ * sizeof(RC4_CHUNK)!=8 be (were also #ifdefed
+ * before);
+ * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in
+ * [LB]ESHFT guards against "shift is out of range"
+ * warnings when sizeof(RC4_CHUNK)!=8
+ *
+ * <appro@fy.chalmers.se>
+ */
+ if (!is_endian.little) { /* BIG-ENDIAN CASE */
+# define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1))
+ for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) {
+ ichunk = *(RC4_CHUNK *) indata;
+ otp = RC4_STEP << BESHFT(0);
+ otp |= RC4_STEP << BESHFT(1);
+ otp |= RC4_STEP << BESHFT(2);
+ otp |= RC4_STEP << BESHFT(3);
+ if (sizeof(RC4_CHUNK) == 8) {
+ otp |= RC4_STEP << BESHFT(4);
+ otp |= RC4_STEP << BESHFT(5);
+ otp |= RC4_STEP << BESHFT(6);
+ otp |= RC4_STEP << BESHFT(7);
+ }
+ *(RC4_CHUNK *) outdata = otp ^ ichunk;
+ indata += sizeof(RC4_CHUNK);
+ outdata += sizeof(RC4_CHUNK);
+ }
+ if (len) {
+ RC4_CHUNK mask = (RC4_CHUNK) - 1, ochunk;
- ichunk = *(RC4_CHUNK *)indata;
- ochunk = *(RC4_CHUNK *)outdata;
- otp = 0;
- i = BESHFT(0);
- mask <<= (sizeof(RC4_CHUNK)-len)<<3;
- switch (len&(sizeof(RC4_CHUNK)-1))
- {
- case 7: otp = RC4_STEP<<i, i-=8;
- case 6: otp |= RC4_STEP<<i, i-=8;
- case 5: otp |= RC4_STEP<<i, i-=8;
- case 4: otp |= RC4_STEP<<i, i-=8;
- case 3: otp |= RC4_STEP<<i, i-=8;
- case 2: otp |= RC4_STEP<<i, i-=8;
- case 1: otp |= RC4_STEP<<i, i-=8;
- case 0: ; /*
- * it's never the case,
- * but it has to be here
- * for ultrix?
- */
- }
- ochunk &= ~mask;
- ochunk |= (otp^ichunk) & mask;
- *(RC4_CHUNK *)outdata = ochunk;
- }
- key->x=x;
- key->y=y;
- return;
- }
- else
- { /* LITTLE-ENDIAN CASE */
-# define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1))
- for (;len&(0-sizeof(RC4_CHUNK));len-=sizeof(RC4_CHUNK))
- {
- ichunk = *(RC4_CHUNK *)indata;
- otp = RC4_STEP;
- otp |= RC4_STEP<<8;
- otp |= RC4_STEP<<16;
- otp |= RC4_STEP<<24;
- if (sizeof(RC4_CHUNK)==8)
- {
- otp |= RC4_STEP<<LESHFT(4);
- otp |= RC4_STEP<<LESHFT(5);
- otp |= RC4_STEP<<LESHFT(6);
- otp |= RC4_STEP<<LESHFT(7);
- }
- *(RC4_CHUNK *)outdata = otp^ichunk;
- indata += sizeof(RC4_CHUNK);
- outdata += sizeof(RC4_CHUNK);
- }
- if (len)
- {
- RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk;
+ ichunk = *(RC4_CHUNK *) indata;
+ ochunk = *(RC4_CHUNK *) outdata;
+ otp = 0;
+ i = BESHFT(0);
+ mask <<= (sizeof(RC4_CHUNK) - len) << 3;
+ switch (len & (sizeof(RC4_CHUNK) - 1)) {
+ case 7:
+ otp = RC4_STEP << i, i -= 8;
+ case 6:
+ otp |= RC4_STEP << i, i -= 8;
+ case 5:
+ otp |= RC4_STEP << i, i -= 8;
+ case 4:
+ otp |= RC4_STEP << i, i -= 8;
+ case 3:
+ otp |= RC4_STEP << i, i -= 8;
+ case 2:
+ otp |= RC4_STEP << i, i -= 8;
+ case 1:
+ otp |= RC4_STEP << i, i -= 8;
+ case 0:; /*
+ * it's never the case,
+ * but it has to be here
+ * for ultrix?
+ */
+ }
+ ochunk &= ~mask;
+ ochunk |= (otp ^ ichunk) & mask;
+ *(RC4_CHUNK *) outdata = ochunk;
+ }
+ key->x = x;
+ key->y = y;
+ return;
+ } else { /* LITTLE-ENDIAN CASE */
+# define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1))
+ for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) {
+ ichunk = *(RC4_CHUNK *) indata;
+ otp = RC4_STEP;
+ otp |= RC4_STEP << 8;
+ otp |= RC4_STEP << 16;
+ otp |= RC4_STEP << 24;
+ if (sizeof(RC4_CHUNK) == 8) {
+ otp |= RC4_STEP << LESHFT(4);
+ otp |= RC4_STEP << LESHFT(5);
+ otp |= RC4_STEP << LESHFT(6);
+ otp |= RC4_STEP << LESHFT(7);
+ }
+ *(RC4_CHUNK *) outdata = otp ^ ichunk;
+ indata += sizeof(RC4_CHUNK);
+ outdata += sizeof(RC4_CHUNK);
+ }
+ if (len) {
+ RC4_CHUNK mask = (RC4_CHUNK) - 1, ochunk;
- ichunk = *(RC4_CHUNK *)indata;
- ochunk = *(RC4_CHUNK *)outdata;
- otp = 0;
- i = 0;
- mask >>= (sizeof(RC4_CHUNK)-len)<<3;
- switch (len&(sizeof(RC4_CHUNK)-1))
- {
- case 7: otp = RC4_STEP, i+=8;
- case 6: otp |= RC4_STEP<<i, i+=8;
- case 5: otp |= RC4_STEP<<i, i+=8;
- case 4: otp |= RC4_STEP<<i, i+=8;
- case 3: otp |= RC4_STEP<<i, i+=8;
- case 2: otp |= RC4_STEP<<i, i+=8;
- case 1: otp |= RC4_STEP<<i, i+=8;
- case 0: ; /*
- * it's never the case,
- * but it has to be here
- * for ultrix?
- */
- }
- ochunk &= ~mask;
- ochunk |= (otp^ichunk) & mask;
- *(RC4_CHUNK *)outdata = ochunk;
- }
- key->x=x;
- key->y=y;
- return;
- }
- }
+ ichunk = *(RC4_CHUNK *) indata;
+ ochunk = *(RC4_CHUNK *) outdata;
+ otp = 0;
+ i = 0;
+ mask >>= (sizeof(RC4_CHUNK) - len) << 3;
+ switch (len & (sizeof(RC4_CHUNK) - 1)) {
+ case 7:
+ otp = RC4_STEP, i += 8;
+ case 6:
+ otp |= RC4_STEP << i, i += 8;
+ case 5:
+ otp |= RC4_STEP << i, i += 8;
+ case 4:
+ otp |= RC4_STEP << i, i += 8;
+ case 3:
+ otp |= RC4_STEP << i, i += 8;
+ case 2:
+ otp |= RC4_STEP << i, i += 8;
+ case 1:
+ otp |= RC4_STEP << i, i += 8;
+ case 0:; /*
+ * it's never the case,
+ * but it has to be here
+ * for ultrix?
+ */
+ }
+ ochunk &= ~mask;
+ ochunk |= (otp ^ ichunk) & mask;
+ *(RC4_CHUNK *) outdata = ochunk;
+ }
+ key->x = x;
+ key->y = y;
+ return;
+ }
+ }
#endif
#define LOOP(in,out) \
- x=((x+1)&0xff); \
- tx=d[x]; \
- y=(tx+y)&0xff; \
- d[x]=ty=d[y]; \
- d[y]=tx; \
- (out) = d[(tx+ty)&0xff]^ (in);
+ x=((x+1)&0xff); \
+ tx=d[x]; \
+ y=(tx+y)&0xff; \
+ d[x]=ty=d[y]; \
+ d[y]=tx; \
+ (out) = d[(tx+ty)&0xff]^ (in);
#ifndef RC4_INDEX
-#define RC4_LOOP(a,b,i) LOOP(*((a)++),*((b)++))
+# define RC4_LOOP(a,b,i) LOOP(*((a)++),*((b)++))
#else
-#define RC4_LOOP(a,b,i) LOOP(a[i],b[i])
+# define RC4_LOOP(a,b,i) LOOP(a[i],b[i])
#endif
- i=len>>3;
- if (i)
- {
- for (;;)
- {
- RC4_LOOP(indata,outdata,0);
- RC4_LOOP(indata,outdata,1);
- RC4_LOOP(indata,outdata,2);
- RC4_LOOP(indata,outdata,3);
- RC4_LOOP(indata,outdata,4);
- RC4_LOOP(indata,outdata,5);
- RC4_LOOP(indata,outdata,6);
- RC4_LOOP(indata,outdata,7);
+ i = len >> 3;
+ if (i) {
+ for (;;) {
+ RC4_LOOP(indata, outdata, 0);
+ RC4_LOOP(indata, outdata, 1);
+ RC4_LOOP(indata, outdata, 2);
+ RC4_LOOP(indata, outdata, 3);
+ RC4_LOOP(indata, outdata, 4);
+ RC4_LOOP(indata, outdata, 5);
+ RC4_LOOP(indata, outdata, 6);
+ RC4_LOOP(indata, outdata, 7);
#ifdef RC4_INDEX
- indata+=8;
- outdata+=8;
+ indata += 8;
+ outdata += 8;
#endif
- if (--i == 0) break;
- }
- }
- i=len&0x07;
- if (i)
- {
- for (;;)
- {
- RC4_LOOP(indata,outdata,0); if (--i == 0) break;
- RC4_LOOP(indata,outdata,1); if (--i == 0) break;
- RC4_LOOP(indata,outdata,2); if (--i == 0) break;
- RC4_LOOP(indata,outdata,3); if (--i == 0) break;
- RC4_LOOP(indata,outdata,4); if (--i == 0) break;
- RC4_LOOP(indata,outdata,5); if (--i == 0) break;
- RC4_LOOP(indata,outdata,6); if (--i == 0) break;
- }
- }
- key->x=x;
- key->y=y;
- }
+ if (--i == 0)
+ break;
+ }
+ }
+ i = len & 0x07;
+ if (i) {
+ for (;;) {
+ RC4_LOOP(indata, outdata, 0);
+ if (--i == 0)
+ break;
+ RC4_LOOP(indata, outdata, 1);
+ if (--i == 0)
+ break;
+ RC4_LOOP(indata, outdata, 2);
+ if (--i == 0)
+ break;
+ RC4_LOOP(indata, outdata, 3);
+ if (--i == 0)
+ break;
+ RC4_LOOP(indata, outdata, 4);
+ if (--i == 0)
+ break;
+ RC4_LOOP(indata, outdata, 5);
+ if (--i == 0)
+ break;
+ RC4_LOOP(indata, outdata, 6);
+ if (--i == 0)
+ break;
+ }
+ }
+ key->x = x;
+ key->y = y;
+}