=== libs/libmythtv/dvbrecorder.cpp Index: libs/libmythtv/dvbrecorder.h =================================================================== --- libs/libmythtv/dvbrecorder.h.orig 2006-04-10 21:26:56.000000000 -0400 +++ libs/libmythtv/dvbrecorder.h 2006-06-20 17:36:06.000000000 -0400 @@ -28,6 +28,15 @@ class ProgramMapTable; class TSPacket; +#define MAX_CSA_PIDS 8192 +struct decsaKey { + bool use_decsa; + unsigned char pidmap[MAX_CSA_PIDS]; //max # of pids + void *keys[16]; + unsigned char valid_keys; + pthread_mutex_t keylock; +}; + class PIDInfo { public: @@ -60,6 +69,10 @@ DVBRecorder(TVRec *rec, DVBChannel* dvbchannel); ~DVBRecorder(); + static struct decsaKey *DVBRecorder::UpdateDeCSAKeys( + int cardnum, unsigned char keytype, + int index, unsigned char *key, int pid); + void SetOption(const QString &name, int value); void SetOptionsFromProfile(RecordingProfile *profile, @@ -159,6 +172,18 @@ static const int TSPACKETS_BETWEEN_PSIP_SYNC; static const int POLL_INTERVAL; static const int POLL_WARNING_TIMEOUT; + + void initDeCSA(); + uint PostProcessDataTS(unsigned char *buffer, uint offset, uint len); + int PreProcessDataTS(unsigned char *buffer, uint len); + int _csa_pkt_buf_count; + unsigned char **_csa_cluster; + unsigned char **_csa_clusterptr; + int _csa_cluster_size; + + struct decsaKey *_csa_softKey; + unsigned char _csa_even_ck[8], _csa_odd_ck[8]; + }; inline void PIDInfo::Close(void) Index: libs/libmythtv/libmythtv.pro =================================================================== --- libs/libmythtv/libmythtv.pro.orig 2006-04-10 21:26:56.000000000 -0400 +++ libs/libmythtv/libmythtv.pro 2006-06-20 17:36:06.000000000 -0400 @@ -14,6 +14,10 @@ DEPENDPATH += ../libmythmpeg2 DEPENDPATH += ./dvbdev ./mpeg +# (begin)softcam-0.4 +LIBS += FFdecsa/FFdecsa.o +# (end)softcam-0.4 + LIBS += -L../libmyth -L../libavutil -L../libavcodec -L../libavformat -L../libmythmpeg2 LIBS += -lmyth-$${LIBVERSION} -lmythavutil-$${LIBVERSION} \ -lmythavcodec-$${LIBVERSION} \ Index: libs/libmythtv/FFdecsa/FFdecsa_test_testcases.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/FFdecsa_test_testcases.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,279 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +// TEST DATA + +////////// used as a wrong key +unsigned char test_invalid_key[0x08] = { + 0x0f, 0x1e, 0x2d, 0x3c, 0x4b, 0x5a, 0x69, 0x78 +}; + + +////////// test 1: odd key +unsigned char test_1_key[0x8] = { + 0x07, 0xe0, 0x1b, 0x02, 0xc9, 0xe0, 0x45, 0xee +}; +unsigned char test_1_encrypted[0x100] = { + 0x47, 0x00, 0x00, 0xd0, + 0xde, 0xcf, 0x0a, 0x0d, 0xb2, 0xd7, 0xc4, 0x40, 0xde, 0x5d, 0x63, 0x18, 0x5a, 0x98, 0x17, 0xaa, + 0xc9, 0xbc, 0x27, 0xc6, 0xcb, 0x49, 0x40, 0x48, 0xfd, 0x20, 0xb7, 0x05, 0x5b, 0x27, 0xcb, 0xeb, + 0x9a, 0xf0, 0xac, 0x45, 0x6d, 0x56, 0xf4, 0x7b, 0x6f, 0xa0, 0x57, 0xf3, 0x9b, 0xf7, 0xa2, 0xc7, + 0xd4, 0x68, 0x24, 0x00, 0x2f, 0x28, 0x13, 0x96, 0x94, 0xa8, 0x7c, 0xf4, 0x6f, 0x07, 0x2a, 0x0e, + 0xe8, 0xa1, 0xeb, 0xc7, 0x80, 0xac, 0x1f, 0x79, 0xbf, 0x5d, 0xb6, 0x10, 0x7c, 0x2e, 0x52, 0xe9, + 0x34, 0x2c, 0xa8, 0x39, 0x01, 0x73, 0x04, 0x24, 0xa8, 0x1e, 0xdb, 0x5b, 0xcb, 0x24, 0xf6, 0x31, + 0xab, 0x02, 0x6b, 0xf9, 0xf6, 0xf7, 0xe9, 0x52, 0xad, 0xcf, 0x62, 0x0f, 0x42, 0xf6, 0x66, 0x5d, + 0xc0, 0x86, 0xf2, 0x7b, 0x40, 0x20, 0xa9, 0xbd, 0x1f, 0xfd, 0x16, 0xad, 0x2e, 0x75, 0xa6, 0xa0, + 0x85, 0xf3, 0x9c, 0x31, 0x20, 0x4e, 0xfb, 0x95, 0x61, 0x78, 0xce, 0x10, 0xc1, 0x48, 0x5f, 0xd3, + 0x61, 0x05, 0x12, 0xf4, 0xe2, 0x04, 0xae, 0xe0, 0x86, 0x01, 0x56, 0x55, 0xb1, 0x0f, 0xa6, 0x33, + 0x95, 0x20, 0x92, 0xf0, 0xbe, 0x39, 0x31, 0xe1, 0x2a, 0xf7, 0x93, 0xb4, 0xf7, 0xe4, 0xf1, 0x85, + 0xae, 0x50, 0xf1, 0x63, 0xd4, 0x5d, 0x9c, 0x6c +}; +unsigned char test_1_expected[0x100] = { + 0x47, 0x00, 0x00, 0xd0, + 0xaf, 0xbe, 0xfb, 0xef, 0xbe, 0xfb, 0xef, 0xbe, 0xfb, 0xef, 0xbe, 0xfb, 0xe6, 0xb5, 0xad, 0x7c, + 0xf9, 0xf3, 0xe5, 0xb1, 0x6c, 0x7c, 0xf9, 0xf3, 0xe6, 0xb5, 0xad, 0x6b, 0x5f, 0x3e, 0x7c, 0xf9, + 0x6c, 0x5b, 0x1f, 0x3e, 0x7c, 0xf9, 0xad, 0x6b, 0x5a, 0xd7, 0xcf, 0x9f, 0x3e, 0x5b, 0x16, 0xc7, + 0xcf, 0x9f, 0x3e, 0x6b, 0x5a, 0xd6, 0xb5, 0xf3, 0xe7, 0xcf, 0x96, 0xc5, 0xb1, 0xf3, 0xe7, 0xcf, + 0x9a, 0xd6, 0xb5, 0xad, 0x7c, 0xf9, 0xf3, 0xe5, 0xb1, 0x6c, 0x7c, 0xf9, 0xf3, 0xe6, 0xb5, 0xad, + 0x6b, 0x5f, 0x3e, 0x7c, 0xf9, 0x6c, 0x5b, 0x1f, 0x3e, 0x7c, 0xf9, 0xad, 0x6b, 0x5a, 0xd7, 0xcf, + 0x9f, 0x3e, 0x5b, 0x16, 0xc7, 0xcf, 0x9f, 0x3e, 0x6b, 0x5a, 0xd6, 0xb5, 0xf3, 0xe7, 0xcf, 0x96, + 0xc5, 0xb1, 0xf3, 0xe7, 0xcf, 0x9a, 0xd6, 0xb5, 0xad, 0x7c, 0xf9, 0xf3, 0xe5, 0xb1, 0x6c, 0x7c, + 0xf9, 0xf3, 0xe6, 0xb5, 0xad, 0x6b, 0x5f, 0x3e, 0x7c, 0xf9, 0x6c, 0x5b, 0x1f, 0x3e, 0x7c, 0xf9, + 0xad, 0x6b, 0x5a, 0xd7, 0xcf, 0x9f, 0x3e, 0x5b, 0x16, 0xc7, 0xcf, 0x9f, 0x3e, 0x6b, 0x5a, 0xd6, + 0xb5, 0xf3, 0xe7, 0xcf, 0x96, 0xc5, 0xb1, 0xf3, 0xe7, 0xcf, 0x9a, 0xd0, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xfc, 0x44, 0x00, 0x66, 0xb1, 0x11, 0x11 +}; +unsigned char test_1_expected_stream[0x100] = { + 0xdc, 0x15, 0xde, 0xf1, 0x4a, 0xf1, 0xf8, 0x2c, + 0x75, 0xc8, 0x3a, 0x1f, 0xbf, 0x67, 0x19, 0xe1, + 0xf4, 0x6c, 0x78, 0x99, 0x48, 0xaf, 0xef, 0x94, + 0x71, 0x6b, 0x23, 0x9e, 0x29, 0x69, 0x2d, 0xa1, + 0x8a, 0xbb, 0xf4, 0x16, 0x68, 0xa5, 0x7f, 0x14, + 0xa9, 0x37, 0x24, 0x05, 0x5e, 0xdd, 0xec, 0x4b, + 0xb5, 0xcb, 0x7f, 0x1d, 0xa7, 0x09, 0x2a, 0xce, + 0xc4, 0x30, 0x83, 0xfd, 0xd9, 0x88, 0xa9, 0xf3, + 0x85, 0x9c, 0x38, 0x31, 0x88, 0xac, 0x74, 0x02, + 0x44, 0xdc, 0xb7, 0x81, 0x07, 0xc8, 0x1b, 0x03, + 0x9c, 0x76, 0xbe, 0xe9, 0x4d, 0x3e, 0x19, 0xad, + 0xe1, 0xf1, 0xa5, 0x13, 0xe8, 0xc0, 0x12, 0x57, + 0x68, 0xb1, 0x9c, 0x6c, 0x9f, 0x58, 0x78, 0xee, + 0x4f, 0x5b, 0x33, 0x1e, 0xc6, 0x29, 0xfc, 0x40, + 0x58, 0x22, 0xa2, 0xd8, 0x32, 0xdd, 0x29, 0x4f, + 0x2b, 0xe1, 0xef, 0xe4, 0xbb, 0xf2, 0x60, 0x94, + 0x6c, 0xc5, 0x51, 0xec, 0x35, 0x4c, 0x27, 0xc6, + 0x9d, 0x73, 0xe0, 0xf4, 0x2b, 0xfa, 0x62, 0x12, + 0xcd, 0x44, 0xbe, 0x57, 0xfe, 0x80, 0xe7, 0xa9, + 0x3c, 0x49, 0x42, 0xb6, 0xed, 0x05, 0x57, 0x00, + 0xd2, 0x25, 0x90, 0xb3, 0xe4, 0x65, 0x8f, 0xd6, + 0x4e, 0x0c, 0x73, 0x30, 0x3b, 0x68, 0x48, 0xdd, +// stream ^ sb +// 0x02, 0x48, 0xbd, 0xe9, 0x10, 0x69, 0xef, 0x86, +// 0xbc, 0x74, 0x1d, 0xd9, 0x74, 0x2e, 0x59, 0xa9, +// 0x09, 0x4c, 0xcf, 0x9c, 0x13, 0x88, 0x24, 0x7f, +// 0xeb, 0x9b, 0x8f, 0xdb, 0x44, 0x3f, 0xd9, 0xda, +}; +unsigned char test_1_expected_block[0x100] = { + 0xad, 0xf6, 0x46, 0x06, 0xae, 0x92, 0x00, 0x38, + 0x47, 0x9b, 0xa3, 0x22, 0x92, 0x9b, 0xf4, 0xd5, + 0xf0, 0xbf, 0x2a, 0x2d, 0x7f, 0xf4, 0xdd, 0x8c, + 0x0d, 0x2e, 0x22, 0xb0, 0x1b, 0x01, 0xa5, 0x23, + 0x89, 0x40, 0xbc, 0xdb, 0x8f, 0xab, 0x70, 0xb8, + 0x27, 0x88, 0xcf, 0x9a, 0x4f, 0xae, 0xe9, 0x1a, + 0xee, 0xfc, 0x3d, 0x82, 0x92, 0xd8, 0xb5, 0x33, + 0xcb, 0x5e, 0xfe, 0xff, 0xe8, 0xd7, 0x51, 0x45, + 0xa0, 0x17, 0x3b, 0x8c, 0x88, 0x7b, 0xd5, 0x0e, + 0xc1, 0x9c, 0x63, 0x41, 0xf5, 0x5d, 0xaa, 0x8a, + 0x5f, 0x37, 0x5b, 0xce, 0x7f, 0x76, 0xb4, 0x83, + 0x74, 0x8f, 0x37, 0x47, 0x75, 0x6d, 0x2c, 0xca, + 0x5a, 0x40, 0xa5, 0x75, 0x1a, 0x61, 0x81, 0x8d, + 0xe4, 0x87, 0x17, 0xd0, 0x75, 0xee, 0x9a, 0x6b, + 0x82, 0x6e, 0x47, 0x92, 0xd3, 0x32, 0x59, 0x5a, + 0x03, 0x6e, 0x8a, 0x26, 0x7e, 0x0d, 0xf7, 0x7d, + 0xf4, 0x4e, 0x79, 0x49, 0x59, 0x6f, 0x27, 0x2b, + 0x80, 0x8f, 0x9e, 0x5b, 0xd6, 0xc0, 0xb0, 0x0b, + 0xe6, 0x2e, 0xb2, 0xd5, 0x80, 0x10, 0x7f, 0xc1, + 0xbf, 0xae, 0x1f, 0xd9, 0x6d, 0x57, 0x3c, 0x37, + 0x4d, 0x21, 0xe4, 0xc8, 0x85, 0x44, 0xcf, 0xa0, + 0x07, 0x93, 0x18, 0x83, 0xef, 0x35, 0xd4, 0xb1, + 0xff, 0xfc, 0x44, 0x00, 0x66, 0xb1, 0x11, 0x11 +}; +unsigned char test_1_expected_kb[] = { + 0xEE, 0x45, 0xE0, 0xC9, 0x02, 0x1B, 0xE0, 0x07, + 0x46, 0xA4, 0x1C, 0x26, 0x7B, 0x0C, 0x01, 0xED, + 0x93, 0x99, 0xC3, 0x14, 0xC4, 0x4A, 0x8D, 0x54, + 0x19, 0x82, 0x39, 0xD1, 0x33, 0xB0, 0x33, 0x52, + 0x75, 0x62, 0x80, 0x3A, 0xC8, 0x83, 0x5E, 0x23, + 0xA2, 0x57, 0x0C, 0xC4, 0x2C, 0x2D, 0xD2, 0x98, + 0xA0, 0x6C, 0x77, 0x29, 0x11, 0x42, 0x49, 0xCE, +}; +unsigned char test_1_expected_kk[] = { + 0x5e, 0x9d, 0xff, 0x2e, 0xbb, 0xaa, 0xa8, 0xe9, + 0xf6, 0x0e, 0xff, 0x7c, 0xda, 0xce, 0x55, 0x03, + 0xd9, 0xde, 0x79, 0xf5, 0x2c, 0xaf, 0x06, 0xf8, + 0xb2, 0xc9, 0xf8, 0x78, 0x54, 0xf9, 0xd1, 0xe7, + 0xeb, 0xbe, 0xd7, 0xeb, 0x25, 0xe9, 0x17, 0x99, + 0xbf, 0x24, 0xce, 0x2a, 0x73, 0xfe, 0xf9, 0xbc, + 0xd9, 0x55, 0x91, 0xcf, 0xe0, 0xc9, 0xdf, 0x88, +}; + + +////////// test 2: even key +unsigned char test_2_key[0x8] = { + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 +}; +unsigned char test_2_encrypted[0x100] = { + 0x47, 0x00, 0x00, 0x90, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, +}; +unsigned char test_2_expected[0x100] = { + 0x47, 0x00, 0x00, 0x90, + 0x2d, 0x0a, 0x47, 0x20, 0x18, 0x11, 0x9c, 0x8a, 0xd1, 0x2a, 0x65, 0x6b, 0x89, 0xe4, 0x35, 0x2b, + 0xc2, 0xb5, 0x90, 0x61, 0xd1, 0x7e, 0x02, 0xe1, 0x3f, 0x46, 0x70, 0xcf, 0x77, 0x91, 0x2f, 0x22, + 0x93, 0xc1, 0x6c, 0xfe, 0x49, 0xad, 0x7c, 0xc2, 0xaf, 0x86, 0x1b, 0xa3, 0x29, 0xbe, 0xaa, 0x64, + 0xf0, 0x22, 0xb9, 0x5e, 0x98, 0xaa, 0x60, 0xef, 0xdf, 0xd6, 0x44, 0x77, 0xe6, 0xbf, 0xbb, 0x94, + 0xb2, 0x0a, 0x63, 0x0e, 0x5c, 0xf2, 0xac, 0xb4, 0x49, 0xcc, 0x9e, 0x4f, 0x94, 0x4c, 0x30, 0x12, + 0xe8, 0x55, 0xc2, 0x44, 0xa4, 0x52, 0xcb, 0x61, 0x81, 0xc9, 0xb6, 0xa6, 0x6b, 0xef, 0xaf, 0xa6, + 0x71, 0x1d, 0x7b, 0x58, 0x2f, 0xfa, 0xd1, 0x0c, 0x07, 0x9d, 0x1f, 0x35, 0x87, 0xbe, 0x02, 0x9f, + 0x20, 0xc6, 0x60, 0x8f, 0x1c, 0x30, 0x0f, 0x96, 0xd0, 0x71, 0xd6, 0x51, 0x10, 0xdf, 0x5b, 0xf6, + 0x44, 0x2f, 0x80, 0x28, 0xb7, 0xec, 0x23, 0x59, 0x4b, 0x94, 0x0b, 0x9a, 0x74, 0xa1, 0x1f, 0xf7, + 0x9e, 0x76, 0xb4, 0xdf, 0xbb, 0x3c, 0x8c, 0x88, 0x97, 0x22, 0x56, 0x73, 0x16, 0x05, 0xac, 0xf9, + 0x4f, 0x77, 0x9d, 0x38, 0xa0, 0x6b, 0x05, 0xd2, 0xe6, 0x15, 0x01, 0xb1, 0x5c, 0xc9, 0x62, 0xa9, + 0x9b, 0x1a, 0x6a, 0x1a, 0xcf, 0xe6, 0xa8, 0xba, +}; + + +////////// test 3: even key +unsigned char test_3_key[0x8] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +unsigned char test_3_encrypted[0x100] = { + 0x47, 0x00, 0x00, 0x90, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + +}; +unsigned char test_3_expected[0x100] = { + 0x47, 0x00, 0x00, 0x90, + 0xfe, 0x91, 0xa7, 0x2f, 0xbf, 0xb0, 0x6a, 0x54, 0xc1, 0xe4, 0x33, 0x27, 0x18, 0xd5, 0x9c, 0x43, + 0xea, 0xaa, 0x6b, 0x38, 0x5c, 0xe7, 0xae, 0xc9, 0xac, 0xec, 0xef, 0xc3, 0x51, 0x7d, 0x53, 0x47, + 0xa0, 0xa7, 0x6d, 0x73, 0x8a, 0x9d, 0x16, 0x7d, 0x05, 0x2d, 0xd6, 0x6b, 0xf4, 0x8d, 0x4b, 0x81, + 0x98, 0x2f, 0x46, 0xa5, 0x34, 0x84, 0xf3, 0x70, 0xa4, 0xe9, 0x04, 0x84, 0x7b, 0x87, 0x79, 0x3c, + 0x01, 0x25, 0xb5, 0xfc, 0x3d, 0xd0, 0x25, 0xea, 0x2f, 0x91, 0xf0, 0x3f, 0x7f, 0xd4, 0x8e, 0x1e, + 0x36, 0x83, 0x22, 0x91, 0x57, 0x92, 0x36, 0x0b, 0x44, 0xa5, 0xcc, 0x5e, 0xef, 0x44, 0x3e, 0xf8, + 0xe9, 0x7b, 0x5e, 0x0c, 0xea, 0xb2, 0x50, 0x39, 0xb7, 0xea, 0xc4, 0xfb, 0xe4, 0x37, 0xf8, 0x85, + 0xc2, 0xdc, 0x01, 0x98, 0x01, 0x2a, 0x44, 0xd3, 0x75, 0x10, 0x38, 0xf4, 0x85, 0x3e, 0xc9, 0xf7, + 0xe7, 0xe4, 0xec, 0x40, 0x3d, 0x8f, 0xa5, 0xd2, 0x8a, 0xca, 0x62, 0x03, 0x3f, 0x65, 0x28, 0x8d, + 0xf5, 0x56, 0xa7, 0xea, 0xd1, 0x0d, 0x70, 0x82, 0xbc, 0x90, 0x59, 0xf8, 0x3e, 0x08, 0xc9, 0xe1, + 0x97, 0xef, 0x82, 0x43, 0x35, 0x41, 0x3e, 0x7f, 0x00, 0x96, 0x3f, 0x90, 0xe5, 0x1e, 0x96, 0xba, + 0xce, 0x6d, 0xd2, 0x54, 0xce, 0x84, 0x76, 0x3c +}; + + +////////// odd key, only 80 (0x50) bytes of payload (10 groups of 8 bytes + 0 byte residue) +unsigned char test_p_10_0_key[0x8] = { + 0x2d, 0x11, 0x5f, 0x9d, 0x29, 0xbf, 0x7f, 0x67 +}; +unsigned char test_p_10_0_encrypted[0x100] = { + 0x47, 0x00, 0x7a, 0xbe, + 0x67, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x71, 0xa5, 0x7b, 0x8f, 0xf9, 0x87, 0xcb, 0xac, + 0xea, 0x08, 0x0c, 0x02, 0x87, 0x7b, 0xad, 0x10, 0x40, 0x28, 0x8e, 0xd4, 0x4e, 0x62, 0xc7, 0x74, + 0xd6, 0xbb, 0x3a, 0xaa, 0xb0, 0x7b, 0x70, 0xbe, 0x06, 0xc9, 0xdc, 0x07, 0xd2, 0x2d, 0xab, 0x2d, + 0xe2, 0xc6, 0x36, 0xa6, 0xda, 0x64, 0x61, 0x15, 0xd1, 0x6a, 0x40, 0xc0, 0xa9, 0xfb, 0x3f, 0xb2, + 0x6d, 0xa5, 0x59, 0xae, 0x57, 0x88, 0x6b, 0x0e, 0x00, 0xae, 0xce, 0x64, 0xee, 0xfd, 0xb1, 0x7f, + 0x78, 0x9c, 0x12, 0x42, 0xbe, 0x30, 0x8a, 0xa3 +}; +unsigned char test_p_10_0_expected[0x100] = { + 0x47, 0x00, 0x7a, 0xbe, + 0x67, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa7, 0xca, 0x32, 0xaf, 0x2e, 0x6a, 0xea, 0x05, + 0x39, 0x33, 0x67, 0x5d, 0xa3, 0x61, 0x0f, 0x34, 0x40, 0x6c, 0x1a, 0xb3, 0xee, 0x54, 0x64, 0xd5, + 0xa3, 0x01, 0x95, 0x87, 0x9d, 0x3d, 0x38, 0xc5, 0x82, 0x8b, 0x8d, 0xab, 0xad, 0x93, 0x0f, 0xe8, + 0xf9, 0xbd, 0x52, 0x98, 0x59, 0xb2, 0x41, 0x95, 0xcd, 0xae, 0x9b, 0x3e, 0xdf, 0xdb, 0x14, 0x9b, + 0xa9, 0x22, 0x0d, 0x2d, 0x61, 0xf5, 0xf2, 0x52, 0x83, 0x20, 0xae, 0xb8, 0x83, 0x52, 0x02, 0xee, + 0xbd, 0xd2, 0x94, 0x6c, 0x27, 0x58, 0x55, 0xd0 +}; + + +////////// odd key, only 14 (0x0e) bytes of payload (1 group of 8 bytes + 6 byte residue) +unsigned char test_p_1_6_key[0x8] = { + 0x2d, 0x11, 0x5f, 0x9d, 0x29, 0xbf, 0x7f, 0x67 +}; +unsigned char test_p_1_6_encrypted[0x100] = { + 0x47, 0x00, 0x7a, 0xb7, + 0xa9, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc0, 0x5e, 0xfb, 0xc8, 0x4a, 0x63, + 0xe3, 0x3c, 0x11, 0xd9, 0xe0, 0x75, 0x8e, 0xf2 +}; +unsigned char test_p_1_6_expected[0x100] = { + 0x47, 0x00, 0x7a, 0xb7, + 0xa9, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5a, 0x2c, 0xee, 0xb3, 0xde, 0x92, + 0xe7, 0xa6, 0x6c, 0xaa, 0x99, 0x84, 0xe4, 0x00 +}; Index: libs/libmythtv/FFdecsa/COPYING =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/COPYING 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. Index: libs/libmythtv/FFdecsa/parallel_032_int.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/parallel_032_int.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,55 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "parallel_std_def.h" + +typedef unsigned int group; +#define GROUP_PARALLELISM 32 +#define FF0() 0x0 +#define FF1() 0xffffffff + +/* 64 rows of 32 bits */ + +void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){ + *(((int *)tab)+g)=*((int *)data); + *(((int *)tab)+32+g)=*(((int *)data)+1); +} + +void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){ + *((int *)data)=*(((int *)tab)+g); + *(((int *)data)+1)=*(((int *)tab)+32+g); +} + +void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){ + int j; + for(j=0;j=4?32-1:0))+j); + } +} + +typedef unsigned int batch; +#define BYTES_PER_BATCH 4 +#define B_FFN_ALL_29() 0x29292929 +#define B_FFN_ALL_02() 0x02020202 +#define B_FFN_ALL_04() 0x04040404 +#define B_FFN_ALL_10() 0x10101010 +#define B_FFN_ALL_40() 0x40404040 +#define B_FFN_ALL_80() 0x80808080 + +#define M_EMPTY() Index: libs/libmythtv/FFdecsa/tmp_autogenerated_stuff_stream.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/tmp_autogenerated_stuff_stream.c 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,814 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + + +// define statics only once, when STREAM_INIT +#ifdef STREAM_INIT +static group A[32+10][4]; // 32 because we will move back (virtual shift register) +static group B[32+10][4]; // 32 because we will move back (virtual shift register) +static group X[4]; +static group Y[4]; +static group Z[4]; +static group D[4]; +static group E[4]; +static group F[4]; +static group p; +static group q; +static group r; + +static inline void trasp64_32_88ccw(unsigned char *data){ +/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/ +#define row ((unsigned int *)data) + int i,j; + for(j=0;j<64;j+=32){ + unsigned int t,b; + for(i=0;i<16;i++){ + t=row[j+i]; + b=row[j+16+i]; + row[j+i] = (t&0x0000ffff) | ((b )<<16); + row[j+16+i]=((t )>>16) | (b&0xffff0000) ; + } + } + for(j=0;j<64;j+=16){ + unsigned int t,b; + for(i=0;i<8;i++){ + t=row[j+i]; + b=row[j+8+i]; + row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8); + row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00); + } + } + for(j=0;j<64;j+=8){ + unsigned int t,b; + for(i=0;i<4;i++){ + t=row[j+i]; + b=row[j+4+i]; + row[j+i] =((t&0x0f0f0f0f)<<4) | (b&0x0f0f0f0f); + row[j+4+i] = (t&0xf0f0f0f0) | ((b&0xf0f0f0f0)>>4); + } + } + for(j=0;j<64;j+=4){ + unsigned int t,b; + for(i=0;i<2;i++){ + t=row[j+i]; + b=row[j+2+i]; + row[j+i] =((t&0x33333333)<<2) | (b&0x33333333); + row[j+2+i] = (t&0xcccccccc) | ((b&0xcccccccc)>>2); + } + } + for(j=0;j<64;j+=2){ + unsigned int t,b; + for(i=0;i<1;i++){ + t=row[j+i]; + b=row[j+1+i]; + row[j+i] =((t&0x55555555)<<1) | (b&0x55555555); + row[j+1+i] = (t&0xaaaaaaaa) | ((b&0xaaaaaaaa)>>1); + } + } +#undef row +} + +static inline void trasp64_32_88cw(unsigned char *data){ +/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate clockwise)*/ +#define row ((unsigned int *)data) + int i,j; + for(j=0;j<64;j+=32){ + unsigned int t,b; + for(i=0;i<16;i++){ + t=row[j+i]; + b=row[j+16+i]; + row[j+i] = (t&0x0000ffff) | ((b )<<16); + row[j+16+i]=((t )>>16) | (b&0xffff0000) ; + } + } + for(j=0;j<64;j+=16){ + unsigned int t,b; + for(i=0;i<8;i++){ + t=row[j+i]; + b=row[j+8+i]; + row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8); + row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00); + } + } + for(j=0;j<64;j+=8){ + unsigned int t,b; + for(i=0;i<4;i++){ + t=row[j+i]; + b=row[j+4+i]; + row[j+i] =((t&0xf0f0f0f0)>>4) | (b&0xf0f0f0f0); + row[j+4+i]= (t&0x0f0f0f0f) | ((b&0x0f0f0f0f)<<4); + } + } + for(j=0;j<64;j+=4){ + unsigned int t,b; + for(i=0;i<2;i++){ + t=row[j+i]; + b=row[j+2+i]; + row[j+i] =((t&0xcccccccc)>>2) | (b&0xcccccccc); + row[j+2+i]= (t&0x33333333) | ((b&0x33333333)<<2); + } + } + for(j=0;j<64;j+=2){ + unsigned int t,b; + for(i=0;i<1;i++){ + t=row[j+i]; + b=row[j+1+i]; + row[j+i] =((t&0xaaaaaaaa)>>1) | (b&0xaaaaaaaa); + row[j+1+i]= (t&0x55555555) | ((b&0x55555555)<<1); + } + } +#undef row +} + +//64-64---------------------------------------------------------- +static inline void trasp64_64_88ccw(unsigned char *data){ +/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/ +#define row ((unsigned long long int *)data) + int i,j; + for(j=0;j<64;j+=64){ + unsigned long long int t,b; + for(i=0;i<32;i++){ + t=row[j+i]; + b=row[j+32+i]; + row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32); + row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ; + } + } + for(j=0;j<64;j+=32){ + unsigned long long int t,b; + for(i=0;i<16;i++){ + t=row[j+i]; + b=row[j+16+i]; + row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); + row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; + } + } + for(j=0;j<64;j+=16){ + unsigned long long int t,b; + for(i=0;i<8;i++){ + t=row[j+i]; + b=row[j+8+i]; + row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); + row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); + } + } + for(j=0;j<64;j+=8){ + unsigned long long int t,b; + for(i=0;i<4;i++){ + t=row[j+i]; + b=row[j+4+i]; + row[j+i] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL); + row[j+4+i] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4); + } + } + for(j=0;j<64;j+=4){ + unsigned long long int t,b; + for(i=0;i<2;i++){ + t=row[j+i]; + b=row[j+2+i]; + row[j+i] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL); + row[j+2+i] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2); + } + } + for(j=0;j<64;j+=2){ + unsigned long long int t,b; + for(i=0;i<1;i++){ + t=row[j+i]; + b=row[j+1+i]; + row[j+i] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL); + row[j+1+i] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1); + } + } +#undef row +} + +static inline void trasp64_64_88cw(unsigned char *data){ +/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate clockwise)*/ +#define row ((unsigned long long int *)data) + int i,j; + for(j=0;j<64;j+=64){ + unsigned long long int t,b; + for(i=0;i<32;i++){ + t=row[j+i]; + b=row[j+32+i]; + row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32); + row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ; + } + } + for(j=0;j<64;j+=32){ + unsigned long long int t,b; + for(i=0;i<16;i++){ + t=row[j+i]; + b=row[j+16+i]; + row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); + row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; + } + } + for(j=0;j<64;j+=16){ + unsigned long long int t,b; + for(i=0;i<8;i++){ + t=row[j+i]; + b=row[j+8+i]; + row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); + row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); + } + } + for(j=0;j<64;j+=8){ + unsigned long long int t,b; + for(i=0;i<4;i++){ + t=row[j+i]; + b=row[j+4+i]; + row[j+i] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL); + row[j+4+i] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4); + } + } + for(j=0;j<64;j+=4){ + unsigned long long int t,b; + for(i=0;i<2;i++){ + t=row[j+i]; + b=row[j+2+i]; + row[j+i] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL); + row[j+2+i] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2); + } + } + for(j=0;j<64;j+=2){ + unsigned long long int t,b; + for(i=0;i<1;i++){ + t=row[j+i]; + b=row[j+1+i]; + row[j+i] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL); + row[j+1+i] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1); + } + } +#undef row +} + +//64-128---------------------------------------------------------- +static inline void trasp64_128_88ccw(unsigned char *data){ +/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/ +#define halfrow ((unsigned long long int *)data) + int i,j; + for(j=0;j<64;j+=64){ + unsigned long long int t,b; + for(i=0;i<32;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+32+i)]; + halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32); + halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ; + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+32+i)+1]; + halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32); + halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ; + } + } + for(j=0;j<64;j+=32){ + unsigned long long int t,b; + for(i=0;i<16;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+16+i)]; + halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); + halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+16+i)+1]; + halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); + halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; + } + } + for(j=0;j<64;j+=16){ + unsigned long long int t,b; + for(i=0;i<8;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+8+i)]; + halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); + halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+8+i)+1]; + halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); + halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); + } + } + for(j=0;j<64;j+=8){ + unsigned long long int t,b; + for(i=0;i<4;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+4+i)]; + halfrow[2*(j+i)] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL); + halfrow[2*(j+4+i)] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+4+i)+1]; + halfrow[2*(j+i)+1] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL); + halfrow[2*(j+4+i)+1] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4); + } + } + for(j=0;j<64;j+=4){ + unsigned long long int t,b; + for(i=0;i<2;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+2+i)]; + halfrow[2*(j+i)] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL); + halfrow[2*(j+2+i)] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+2+i)+1]; + halfrow[2*(j+i)+1] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL); + halfrow[2*(j+2+i)+1] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2); + } + } + for(j=0;j<64;j+=2){ + unsigned long long int t,b; + for(i=0;i<1;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+1+i)]; + halfrow[2*(j+i)] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL); + halfrow[2*(j+1+i)] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+1+i)+1]; + halfrow[2*(j+i)+1] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL); + halfrow[2*(j+1+i)+1] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1); + } + } +#undef halfrow +} + +static inline void trasp64_128_88cw(unsigned char *data){ +/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate clockwise)*/ +#define halfrow ((unsigned long long int *)data) + int i,j; + for(j=0;j<64;j+=64){ + unsigned long long int t,b; + for(i=0;i<32;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+32+i)]; + halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32); + halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ; + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+32+i)+1]; + halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32); + halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ; + } + } + for(j=0;j<64;j+=32){ + unsigned long long int t,b; + for(i=0;i<16;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+16+i)]; + halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); + halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+16+i)+1]; + halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); + halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; + } + } + for(j=0;j<64;j+=16){ + unsigned long long int t,b; + for(i=0;i<8;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+8+i)]; + halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); + halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+8+i)+1]; + halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); + halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); + } + } + for(j=0;j<64;j+=8){ + unsigned long long int t,b; + for(i=0;i<4;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+4+i)]; + halfrow[2*(j+i)] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL); + halfrow[2*(j+4+i)] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+4+i)+1]; + halfrow[2*(j+i)+1] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL); + halfrow[2*(j+4+i)+1] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4); + } + } + for(j=0;j<64;j+=4){ + unsigned long long int t,b; + for(i=0;i<2;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+2+i)]; + halfrow[2*(j+i)] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL); + halfrow[2*(j+2+i)] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+2+i)+1]; + halfrow[2*(j+i)+1] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL); + halfrow[2*(j+2+i)+1] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2); + } + } + for(j=0;j<64;j+=2){ + unsigned long long int t,b; + for(i=0;i<1;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+1+i)]; + halfrow[2*(j+i)] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL); + halfrow[2*(j+1+i)] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+1+i)+1]; + halfrow[2*(j+i)+1] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL); + halfrow[2*(j+1+i)+1] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1); + } + } +#undef halfrow +} +#endif + + +#ifdef STREAM_INIT +void stream_cypher_group_init( + group iA[8][4], // [In] iA00,iA01,...iA73 32 groups | Derived from key. + group iB[8][4], // [In] iB00,iB01,...iB73 32 groups | Derived from key. + unsigned char *sb) // [In] (SB0,SB1,...SB7)...x32 32*8 bytes | Extra input. +#endif +#ifdef STREAM_NORMAL +void stream_cypher_group_normal( + unsigned char *cb) // [Out] (CB0,CB1,...CB7)...x32 32*8 bytes | Output. +#endif +{ +#ifdef STREAM_INIT + group in1[4]; + group in2[4]; +#endif + group extra_B[4]; + group fa,fb,fc,fd,fe; + group s1a,s1b,s2a,s2b,s3a,s3b,s4a,s4b,s5a,s5b,s6a,s6b,s7a,s7b; + group next_E[4]; + group tmp0,tmp1,tmp2,tmp3,tmp4; +#ifdef STREAM_INIT + group *sb_g=(group *)sb; +#endif +#ifdef STREAM_NORMAL + group *cb_g=(group *)cb; +#endif + int aboff; + int i,j,k,b; + +#ifdef STREAM_INIT +#endif +#ifdef STREAM_NORMAL +#endif +#ifdef STREAM_INIT + +#if GROUP_PARALLELISM==32 +trasp64_32_88ccw(sb); +#endif +#if GROUP_PARALLELISM==64 +trasp64_64_88ccw(sb); +#endif +#if GROUP_PARALLELISM==128 +trasp64_128_88ccw(sb); +#endif + +#endif + + aboff=32; + +#ifdef STREAM_INIT + // load first 32 bits of ck into A[aboff+0]..A[aboff+7] + // load last 32 bits of ck into B[aboff+0]..B[aboff+7] + // all other regs = 0 + for(i=0;i<8;i++){ + for(b=0;b<4;b++){ + A[aboff+i][b]=iA[i][b]; + B[aboff+i][b]=iB[i][b]; + } + } + for(b=0;b<4;b++){ + A[aboff+8][b]=FF0(); + A[aboff+9][b]=FF0(); + B[aboff+8][b]=FF0(); + B[aboff+9][b]=FF0(); + } + for(b=0;b<4;b++){ + X[b]=FF0(); + Y[b]=FF0(); + Z[b]=FF0(); + D[b]=FF0(); + E[b]=FF0(); + F[b]=FF0(); + } + p=FF0(); + q=FF0(); + r=FF0(); +#endif + + +//////////////////////////////////////////////////////////////////////////////// + + // EXTERNAL LOOP - 8 bytes per operation + for(i=0;i<8;i++){ + + +#ifdef STREAM_INIT + for(b=0;b<4;b++){ + in1[b]=sb_g[8*i+4+b]; + in2[b]=sb_g[8*i+b]; + } +#endif + + // INTERNAL LOOP - 2 bits per iteration + for(j=0; j<4; j++){ + + + // from A0..A9, 35 bits are selected as inputs to 7 s-boxes + // 5 bits input per s-box, 2 bits output per s-box + + // we can select bits with zero masking and shifting operations + // and synthetize s-boxes with optimized boolean functions. + // this is the actual reason we do all the crazy transposition + // stuff to switch between normal and bit slice representations. + // this code really flies. + + fe=A[aboff+3][0];fa=A[aboff+0][2];fb=A[aboff+5][1];fc=A[aboff+6][3];fd=A[aboff+8][0]; +/* 1000 1110 1110 0001 : lev 7: */ //tmp0=( fa^( fb^( ( ( ( fa|fb )^fc )|( fc^fd ) )^ALL_ONES ) ) ); +/* 1110 0010 0011 0011 : lev 6: */ //tmp1=( ( fa|fb )^( ( fc&( fa|( fb^fd ) ) )^ALL_ONES ) ); +/* 0011 0110 1000 1101 : lev 5: */ //tmp2=( fa^( ( fb&fd )^( ( fa&fd )|fc ) ) ); +/* 0101 0101 1001 0011 : lev 5: */ //tmp3=( ( fa&fc )^( fa^( ( fa&fb )|fd ) ) ); +/* 1000 1110 1110 0001 : lev 7: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFOR(FFXOR(FFOR(fa,fb),fc),FFXOR(fc,fd)),FF1()))); +/* 1110 0010 0011 0011 : lev 6: */ tmp1=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fa,FFXOR(fb,fd))),FF1())); +/* 0011 0110 1000 1101 : lev 5: */ tmp2=FFXOR(fa,FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),fc))); +/* 0101 0101 1001 0011 : lev 5: */ tmp3=FFXOR(FFAND(fa,fc),FFXOR(fa,FFOR(FFAND(fa,fb),fd))); + s1a=FFXOR(tmp0,FFAND(fe,tmp1)); + s1b=FFXOR(tmp2,FFAND(fe,tmp3)); +//dump_mem("s1as1b-fe",&fe,BYPG,BYPG); +//dump_mem("s1as1b-fa",&fa,BYPG,BYPG); +//dump_mem("s1as1b-fb",&fb,BYPG,BYPG); +//dump_mem("s1as1b-fc",&fc,BYPG,BYPG); +//dump_mem("s1as1b-fd",&fd,BYPG,BYPG); + + fe=A[aboff+1][1];fa=A[aboff+2][2];fb=A[aboff+5][3];fc=A[aboff+6][0];fd=A[aboff+8][1]; +/* 1001 1110 0110 0001 : lev 6: */ //tmp0=( fa^( ( fb&( fc|fd ) )^( fc^( fd^ALL_ONES ) ) ) ); +/* 0000 0011 0111 1011 : lev 5: */ //tmp1=( ( fa&( fb^fd ) )|( ( fa|fb )&fc ) ); +/* 1100 0110 1101 0010 : lev 6: */ //tmp2=( ( fb&fd )^( ( fa&fd )|( fb^( fc^ALL_ONES ) ) ) ); +/* 0001 1110 1111 0101 : lev 5: */ //tmp3=( ( fa&fd )|( fa^( fb^( fc&fd ) ) ) ); +/* 1001 1110 0110 0001 : lev 6: */ tmp0=FFXOR(fa,FFXOR(FFAND(fb,FFOR(fc,fd)),FFXOR(fc,FFXOR(fd,FF1())))); +/* 0000 0011 0111 1011 : lev 5: */ tmp1=FFOR(FFAND(fa,FFXOR(fb,fd)),FFAND(FFOR(fa,fb),fc)); +/* 1100 0110 1101 0010 : lev 6: */ tmp2=FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),FFXOR(fb,FFXOR(fc,FF1())))); +/* 0001 1110 1111 0101 : lev 5: */ tmp3=FFOR(FFAND(fa,fd),FFXOR(fa,FFXOR(fb,FFAND(fc,fd)))); + s2a=FFXOR(tmp0,FFAND(fe,tmp1)); + s2b=FFXOR(tmp2,FFAND(fe,tmp3)); + + fe=A[aboff+0][3];fa=A[aboff+1][0];fb=A[aboff+4][1];fc=A[aboff+4][3];fd=A[aboff+5][2]; +/* 0100 1011 1001 0110 : lev 5: */ //tmp0=( fa^( fb^( ( fc&( fa|fd ) )^fd ) ) ); +/* 1101 0101 1000 1100 : lev 7: */ //tmp1=( ( fa&fc )^( ( fa^fd )|( ( fb|fc )^( fd^ALL_ONES ) ) ) ); +/* 0010 0111 1101 1000 : lev 4: */ //tmp2=( fa^( ( ( fb^fc )&fd )^fc ) ); +/* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES; +/* 0100 1011 1001 0110 : lev 5: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFAND(fc,FFOR(fa,fd)),fd))); +/* 1101 0101 1000 1100 : lev 7: */ tmp1=FFXOR(FFAND(fa,fc),FFOR(FFXOR(fa,fd),FFXOR(FFOR(fb,fc),FFXOR(fd,FF1())))); +/* 0010 0111 1101 1000 : lev 4: */ tmp2=FFXOR(fa,FFXOR(FFAND(FFXOR(fb,fc),fd),fc)); +/* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1(); + s3a=FFXOR(tmp0,FFAND(FFNOT(fe),tmp1)); + s3b=FFXOR(tmp2,FFAND(fe,tmp3)); + + fe=A[aboff+2][3];fa=A[aboff+0][1];fb=A[aboff+1][3];fc=A[aboff+3][2];fd=A[aboff+7][0]; +/* 1011 0101 0100 1001 : lev 7: */ //tmp0=( fa^( ( fc&( fa^fd ) )|( fb^( fc|( fd^ALL_ONES ) ) ) ) ); +/* 0010 1101 0110 0110 : lev 6: */ //tmp1=( ( fa&fb )^( fb^( ( ( fa|fc )&fd )^fc ) ) ); +/* 0110 0111 1101 0000 : lev 7: */ //tmp2=( fa^( ( fb&fc )|( ( ( fa&( fb^fd ) )|fc )^fd ) ) ); +/* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES; +/* 1011 0101 0100 1001 : lev 7: */ tmp0=FFXOR(fa,FFOR(FFAND(fc,FFXOR(fa,fd)),FFXOR(fb,FFOR(fc,FFXOR(fd,FF1()))))); +/* 0010 1101 0110 0110 : lev 6: */ tmp1=FFXOR(FFAND(fa,fb),FFXOR(fb,FFXOR(FFAND(FFOR(fa,fc),fd),fc))); +/* 0110 0111 1101 0000 : lev 7: */ tmp2=FFXOR(fa,FFOR(FFAND(fb,fc),FFXOR(FFOR(FFAND(fa,FFXOR(fb,fd)),fc),fd))); +/* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1(); + s4a=FFXOR(tmp0,FFAND(fe,FFXOR(tmp1,tmp0))); + s4b=FFXOR(FFXOR(s4a,tmp2),FFAND(fe,tmp3)); + + fe=A[aboff+4][2];fa=A[aboff+3][3];fb=A[aboff+5][0];fc=A[aboff+7][1];fd=A[aboff+8][2]; +/* 1000 1111 0011 0010 : lev 7: */ //tmp0=( ( ( fa&( fb|fc ) )^fb )|( ( ( fa^fc )|fd )^ALL_ONES ) ); +/* 0110 1011 0000 1011 : lev 6: */ //tmp1=( fb^( ( fc^fd )&( fc^( fb|( fa^fd ) ) ) ) ); +/* 0001 1010 0111 1001 : lev 6: */ //tmp2=( ( fa&fc )^( fb^( ( fb|( fa^fc ) )&fd ) ) ); +/* 0101 1101 1101 0101 : lev 4: */ //tmp3=( ( ( fa^fb )&( fc^ALL_ONES ) )|fd ); +/* 1000 1111 0011 0010 : lev 7: */ tmp0=FFOR(FFXOR(FFAND(fa,FFOR(fb,fc)),fb),FFXOR(FFOR(FFXOR(fa,fc),fd),FF1())); +/* 0110 1011 0000 1011 : lev 6: */ tmp1=FFXOR(fb,FFAND(FFXOR(fc,fd),FFXOR(fc,FFOR(fb,FFXOR(fa,fd))))); +/* 0001 1010 0111 1001 : lev 6: */ tmp2=FFXOR(FFAND(fa,fc),FFXOR(fb,FFAND(FFOR(fb,FFXOR(fa,fc)),fd))); +/* 0101 1101 1101 0101 : lev 4: */ tmp3=FFOR(FFAND(FFXOR(fa,fb),FFXOR(fc,FF1())),fd); + s5a=FFXOR(tmp0,FFAND(fe,tmp1)); + s5b=FFXOR(tmp2,FFAND(fe,tmp3)); + + fe=A[aboff+2][1];fa=A[aboff+3][1];fb=A[aboff+4][0];fc=A[aboff+6][2];fd=A[aboff+8][3]; +/* 0011 0110 0010 1101 : lev 6: */ //tmp0=( ( ( fa&fc )&fd )^( ( fb&( fa|fd ) )^fc ) ); +/* 1110 1110 1011 1011 : lev 3: */ //tmp1=( ( ( fa^fc )&fd )^ALL_ONES ); +/* 0101 1000 0110 0111 : lev 6: */ //tmp2=( ( fa&( fb|fc ) )^( fb^( ( fb&fc )|fd ) ) ); +/* 0001 0011 0000 0001 : lev 5: */ //tmp3=( fc&( ( fa&( fb^fd ) )^( fb|fd ) ) ); +/* 0011 0110 0010 1101 : lev 6: */ tmp0=FFXOR(FFAND(FFAND(fa,fc),fd),FFXOR(FFAND(fb,FFOR(fa,fd)),fc)); +/* 1110 1110 1011 1011 : lev 3: */ tmp1=FFXOR(FFAND(FFXOR(fa,fc),fd),FF1()); +/* 0101 1000 0110 0111 : lev 6: */ tmp2=FFXOR(FFAND(fa,FFOR(fb,fc)),FFXOR(fb,FFOR(FFAND(fb,fc),fd))); +/* 0001 0011 0000 0001 : lev 5: */ tmp3=FFAND(fc,FFXOR(FFAND(fa,FFXOR(fb,fd)),FFOR(fb,fd))); + s6a=FFXOR(tmp0,FFAND(fe,tmp1)); + s6b=FFXOR(tmp2,FFAND(fe,tmp3)); + + fe=A[aboff+1][2];fa=A[aboff+2][0];fb=A[aboff+6][1];fc=A[aboff+7][2];fd=A[aboff+7][3]; +/* 0111 1000 1001 0110 : lev 5: */ //tmp0=( fb^( ( fc&fd )|( fa^( fc^fd ) ) ) ); +/* 0100 1001 0101 1011 : lev 6: */ //tmp1=( ( fb|fd )&( ( fa&fc )|( fb^( fc^fd ) ) ) ); +/* 0100 1001 1011 1001 : lev 5: */ //tmp2=( ( fa|fb )^( ( fc&( fb|fd ) )^fd ) ); +/* 1111 1111 1101 1101 : lev 3: */ //tmp3=( fd|( ( fa&fc )^ALL_ONES ) ); +/* 0111 1000 1001 0110 : lev 5: */ tmp0=FFXOR(fb,FFOR(FFAND(fc,fd),FFXOR(fa,FFXOR(fc,fd)))); +/* 0100 1001 0101 1011 : lev 6: */ tmp1=FFAND(FFOR(fb,fd),FFOR(FFAND(fa,fc),FFXOR(fb,FFXOR(fc,fd)))); +/* 0100 1001 1011 1001 : lev 5: */ tmp2=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fb,fd)),fd)); +/* 1111 1111 1101 1101 : lev 3: */ tmp3=FFOR(fd,FFXOR(FFAND(fa,fc),FF1())); + s7a=FFXOR(tmp0,FFAND(fe,tmp1)); + s7b=FFXOR(tmp2,FFAND(fe,tmp3)); + + +/* + we have just done this: + + int sbox1[0x20] = {2,0,1,1,2,3,3,0, 3,2,2,0,1,1,0,3, 0,3,3,0,2,2,1,1, 2,2,0,3,1,1,3,0}; + int sbox2[0x20] = {3,1,0,2,2,3,3,0, 1,3,2,1,0,0,1,2, 3,1,0,3,3,2,0,2, 0,0,1,2,2,1,3,1}; + int sbox3[0x20] = {2,0,1,2,2,3,3,1, 1,1,0,3,3,0,2,0, 1,3,0,1,3,0,2,2, 2,0,1,2,0,3,3,1}; + int sbox4[0x20] = {3,1,2,3,0,2,1,2, 1,2,0,1,3,0,0,3, 1,0,3,1,2,3,0,3, 0,3,2,0,1,2,2,1}; + int sbox5[0x20] = {2,0,0,1,3,2,3,2, 0,1,3,3,1,0,2,1, 2,3,2,0,0,3,1,1, 1,0,3,2,3,1,0,2}; + int sbox6[0x20] = {0,1,2,3,1,2,2,0, 0,1,3,0,2,3,1,3, 2,3,0,2,3,0,1,1, 2,1,1,2,0,3,3,0}; + int sbox7[0x20] = {0,3,2,2,3,0,0,1, 3,0,1,3,1,2,2,1, 1,0,3,3,0,1,1,2, 2,3,1,0,2,3,0,2}; + + s12 = sbox1[ (((A3>>0)&1)<<4) | (((A0>>2)&1)<<3) | (((A5>>1)&1)<<2) | (((A6>>3)&1)<<1) | (((A8>>0)&1)<<0) ] + |sbox2[ (((A1>>1)&1)<<4) | (((A2>>2)&1)<<3) | (((A5>>3)&1)<<2) | (((A6>>0)&1)<<1) | (((A8>>1)&1)<<0) ]; + s34 = sbox3[ (((A0>>3)&1)<<4) | (((A1>>0)&1)<<3) | (((A4>>1)&1)<<2) | (((A4>>3)&1)<<1) | (((A5>>2)&1)<<0) ] + |sbox4[ (((A2>>3)&1)<<4) | (((A0>>1)&1)<<3) | (((A1>>3)&1)<<2) | (((A3>>2)&1)<<1) | (((A7>>0)&1)<<0) ]; + s56 = sbox5[ (((A4>>2)&1)<<4) | (((A3>>3)&1)<<3) | (((A5>>0)&1)<<2) | (((A7>>1)&1)<<1) | (((A8>>2)&1)<<0) ] + |sbox6[ (((A2>>1)&1)<<4) | (((A3>>1)&1)<<3) | (((A4>>0)&1)<<2) | (((A6>>2)&1)<<1) | (((A8>>3)&1)<<0) ]; + s7 = sbox7[ (((A1>>2)&1)<<4) | (((A2>>0)&1)<<3) | (((A6>>1)&1)<<2) | (((A7>>2)&1)<<1) | (((A7>>3)&1)<<0) ]; +*/ + + // use 4x4 xor to produce extra nibble for T3 + + extra_B[3]=FFXOR(FFXOR(FFXOR(B[aboff+2][0],B[aboff+5][1]),B[aboff+6][2]),B[aboff+8][3]); + extra_B[2]=FFXOR(FFXOR(FFXOR(B[aboff+5][0],B[aboff+7][1]),B[aboff+2][3]),B[aboff+3][2]); + extra_B[1]=FFXOR(FFXOR(FFXOR(B[aboff+4][3],B[aboff+7][2]),B[aboff+3][0]),B[aboff+4][1]); + extra_B[0]=FFXOR(FFXOR(FFXOR(B[aboff+8][2],B[aboff+5][3]),B[aboff+2][1]),B[aboff+7][0]); + + // T1 = xor all inputs + // in1, in2, D are only used in T1 during initialisation, not generation + for(b=0;b<4;b++){ + A[aboff-1][b]=FFXOR(A[aboff+9][b],X[b]); + } + +#ifdef STREAM_INIT + for(b=0;b<4;b++){ + A[aboff-1][b]=FFXOR(FFXOR(A[aboff-1][b],D[b]),((j % 2) ? in2[b] : in1[b])); + } +#endif + + + // T2 = xor all inputs + // in1, in2 are only used in T1 during initialisation, not generation + // if p=0, use this, if p=1, rotate the result left + for(b=0;b<4;b++){ + B[aboff-1][b]=FFXOR(FFXOR(B[aboff+6][b],B[aboff+9][b]),Y[b]); + } + +#ifdef STREAM_INIT + for(b=0;b<4;b++){ + B[aboff-1][b]=FFXOR(B[aboff-1][b],((j % 2) ? in1[b] : in2[b])); + } +#endif + + + // if p=1, rotate left (yes, this is what we're doing) + tmp3=B[aboff-1][3]; + B[aboff-1][3]=FFXOR(B[aboff-1][3],FFAND(FFXOR(B[aboff-1][3],B[aboff-1][2]),p)); + B[aboff-1][2]=FFXOR(B[aboff-1][2],FFAND(FFXOR(B[aboff-1][2],B[aboff-1][1]),p)); + B[aboff-1][1]=FFXOR(B[aboff-1][1],FFAND(FFXOR(B[aboff-1][1],B[aboff-1][0]),p)); + B[aboff-1][0]=FFXOR(B[aboff-1][0],FFAND(FFXOR(B[aboff-1][0],tmp3),p)); + + + // T3 = xor all inputs + for(b=0;b<4;b++){ + D[b]=FFXOR(FFXOR(E[b],Z[b]),extra_B[b]); + } + + + // T4 = sum, carry of Z + E + r + for(b=0;b<4;b++){ + next_E[b]=F[b]; + } + + tmp0=FFXOR(Z[0],E[0]); + tmp1=FFAND(Z[0],E[0]); + F[0]=FFXOR(E[0],FFAND(q,FFXOR(Z[0],r))); + tmp3=FFAND(tmp0,r); + tmp4=FFOR(tmp1,tmp3); + + tmp0=FFXOR(Z[1],E[1]); + tmp1=FFAND(Z[1],E[1]); + F[1]=FFXOR(E[1],FFAND(q,FFXOR(Z[1],tmp4))); + tmp3=FFAND(tmp0,tmp4); + tmp4=FFOR(tmp1,tmp3); + + tmp0=FFXOR(Z[2],E[2]); + tmp1=FFAND(Z[2],E[2]); + F[2]=FFXOR(E[2],FFAND(q,FFXOR(Z[2],tmp4))); + tmp3=FFAND(tmp0,tmp4); + tmp4=FFOR(tmp1,tmp3); + + tmp0=FFXOR(Z[3],E[3]); + tmp1=FFAND(Z[3],E[3]); + F[3]=FFXOR(E[3],FFAND(q,FFXOR(Z[3],tmp4))); + tmp3=FFAND(tmp0,tmp4); + r=FFXOR(r,FFAND(q,FFXOR(FFOR(tmp1,tmp3),r))); // ultimate carry + +/* + we have just done this: (believe it or not) + + if (q) { + F = Z + E + r; + r = (F >> 4) & 1; + F = F & 0x0f; + } + else { + F = E; + } +*/ + for(b=0;b<4;b++){ + E[b]=next_E[b]; + } + + // this simple instruction is virtually shifting all the shift registers + aboff--; + +/* + we've just done this: + + A9=A8;A8=A7;A7=A6;A6=A5;A5=A4;A4=A3;A3=A2;A2=A1;A1=A0;A0=next_A0; + B9=B8;B8=B7;B7=B6;B6=B5;B5=B4;B4=B3;B3=B2;B2=B1;B1=B0;B0=next_B0; +*/ + + X[0]=s1a; + X[1]=s2a; + X[2]=s3b; + X[3]=s4b; + Y[0]=s3a; + Y[1]=s4a; + Y[2]=s5b; + Y[3]=s6b; + Z[0]=s5a; + Z[1]=s6a; + Z[2]=s1b; + Z[3]=s2b; + p=s7a; + q=s7b; + +#ifdef STREAM_NORMAL + // require 4 loops per output byte + // 2 output bits are a function of the 4 bits of D + // xor 2 by 2 + cb_g[8*i+7-2*j]=FFXOR(D[2],D[3]); + cb_g[8*i+6-2*j]=FFXOR(D[0],D[1]); +#endif + + + } // INTERNAL LOOP + + + } // EXTERNAL LOOP + + // move 32 steps forward, ready for next call + for(k=0;k<10;k++){ + for(b=0;b<4;b++){ + A[32+k][b]=A[k][b]; + B[32+k][b]=B[k][b]; + } + } + + +//////////////////////////////////////////////////////////////////////////////// + +#ifdef STREAM_NORMAL + +#if GROUP_PARALLELISM==32 +trasp64_32_88cw(cb); +#endif +#if GROUP_PARALLELISM==64 +trasp64_64_88cw(cb); +#endif +#if GROUP_PARALLELISM==128 +trasp64_128_88cw(cb); +#endif + +#endif + +#ifdef STREAM_INIT +#endif +#ifdef STREAM_NORMAL +#endif + +} + Index: libs/libmythtv/FFdecsa/parallel_064_8charA.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/parallel_064_8charA.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,171 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +struct group_t{ + unsigned char s1[8]; +}; +typedef struct group_t group; + +#define GROUP_PARALLELISM 64 + +group static inline FF0(){ + group res; + int i; + for(i=0;i<8;i++) res.s1[i]=0x0; + return res; +} + +group static inline FF1(){ + group res; + int i; + for(i=0;i<8;i++) res.s1[i]=0xff; + return res; +} + +group static inline FFAND(group a,group b){ + group res; + int i; + for(i=0;i<8;i++) res.s1[i]=a.s1[i]&b.s1[i]; + return res; +} + +group static inline FFOR(group a,group b){ + group res; + int i; + for(i=0;i<8;i++) res.s1[i]=a.s1[i]|b.s1[i]; + return res; +} + +group static inline FFXOR(group a,group b){ + group res; + int i; + for(i=0;i<8;i++) res.s1[i]=a.s1[i]^b.s1[i]; + return res; +} + +group static inline FFNOT(group a){ + group res; + int i; + for(i=0;i<8;i++) res.s1[i]=~a.s1[i]; + return res; +} + + +/* 64 rows of 64 bits */ + +void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){ + *(((int *)tab)+2*g)=*((int *)data); + *(((int *)tab)+2*g+1)=*(((int *)data)+1); +} + +void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){ + *((int *)data)=*(((int *)tab)+2*g); + *(((int *)data)+1)=*(((int *)tab)+2*g+1); +} + +void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){ + int j; + for(j=0;j>n; + return res; +} + +void static inline M_EMPTY(void){ +} Index: libs/libmythtv/FFdecsa/parallel_128_sse.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/parallel_128_sse.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,184 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include + +struct group_t{ + __m128 s1; +}; +typedef struct group_t group; + +#define GROUP_PARALLELISM 128 + +group static inline FF0(){ + group res; + static unsigned long long l[2]={0x0000000000000000ULL,0x0000000000000000ULL}; + res.s1=*(__m128*)l; + return res; +} + +group static inline FF1(){ + group res; + static unsigned long long l[2]={0xffffffffffffffffULL,0xffffffffffffffffULL}; + res.s1=*(__m128*)l; + return res; +} + +group static inline FFAND(group a,group b){ + group res; + res.s1=_mm_and_ps(a.s1,b.s1); + return res; +} + +group static inline FFOR(group a,group b){ + group res; + res.s1=_mm_or_ps(a.s1,b.s1); + return res; +} + +group static inline FFXOR(group a,group b){ + group res; + res.s1=_mm_xor_ps(a.s1,b.s1); + return res; +} + +group static inline FFNOT(group a){ + group res; + res.s1=_mm_xor_ps(a.s1,FF1().s1); + return res; +} + + +/* 64 rows of 128 bits */ + +void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){ + *(((int *)tab)+2*g)=*((int *)data); + *(((int *)tab)+2*g+1)=*(((int *)data)+1); +} + +void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){ + *((int *)data)=*(((int *)tab)+2*g); + *(((int *)data)+1)=*(((int *)tab)+2*g+1); +} + +void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){ + int j; + for(j=0;j=3.3.3 is highly recommended. Older versions could give performance +problems. + +icc is currently unusable. In the initial phases of development of +FFdecsa icc was able to compile the code and gave interesting speed +results when using the 8charA grouping mode (array of 8 characters are +automatically manipulated through MMX instructions). At some point the +code began to work incorrectly because of a compiler bug (but I found a +workaround). Then, the performance dropped with no reason; I found a +workaround by adding an unused variable (alignment problem, grep for icc +in the code to see where it happens). Then, with the introduction of +group modes based on intrinsics, gcc was finally able to go beyond the +speed record originally set by icc. Additional code tweaks added more +speed to gcc, while icc started to segfault on compilation (both version +7 and 8). In conclusion, icc is bugged and this code is too hard for it. +gcc on the other hand is great. I tried to inspect generated assembler +to find weak spots, and the generated code is very good indeed. + +Note: the code can be compiled with gcc or g++. g++ is 3% faster for +some reason. + +You should not get any errors or warnings. I only get two "inlining +failed" warnings on two functions I asked to be inlined but gcc doesn't +want to inline. + +The build process creates additional temp files by running grep +commands. This is how debugging output is handled. All the lines +containing DBG are removed and the temp file is compiled (so the line +numbers change between temp and original files). Don't edit the temp +files, they will be overwritten. If you don't remove the DBG lines (for +example, by changing "grep -v DBG" into "grep -v aaDBG" in Makefile) a +lot of output will be generated. This is useful to understand what's +wrong when the FFdecsa_test is failing. I included a reference "known +good" output in the debug_output directory. Extra debug output is +commented out in the code. + +The debug output functionality could be... bugged. This is because I +tested everything using hard coded int grouping mode and then +generalized the debug output to abstract grouping modes. A bug where 4 +bytes are printed instead of 8 could be present somewhere. I think it +isn't, but you've been warned. + +This code was only tried on Linux. +It should work on Windows or other platforms, but you may encounter +problems related to the compiler quality. If you want to try, begin with +the int grouping mode. It is only 30% slower then the best (MMX) and it +should be easily portable because no intrinsics are used. I'm +particularly interested in hearing what kind of performance can be +obtained on x86_64 processors in int, long long int, mmx, 2mmx, sse +modes. + + +As a reference, here are the results I get on an Athlon XP 2400+ (this +processor runs at 2000MHz); other processors belonging to the Athlon XP +architecture, including Durons, should have the same speed per MHz. +Cache size and bus speed don't matter. + +CPU: AMD Athlon XP 2400+ + +Compiler: g++ (gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)) + +Flags: -O3 -march=athlon-xp -fexpensive-optimizations -funroll-loops + --param max-unrolled-insns=500 + +grouping mode speed (Mbit/s) notes +--------------------------------------------------------------------- +PARALLEL_32_4CHAR 14 +PARALLEL_32_4CHARA 12 +PARALLEL_32_INT 125 very good and very portable +PARALLEL_64_8CHAR 17 +PARALLEL_64_8CHARA 15 needs a vectorizing compiler +PARALLEL_64_2INT 75 x86 has too few registers +PARALLEL_64_LONG 97 try this on x86_64 +PARALLEL_64_MMX 165 the best +PARALLEL_128_16CHAR 6 +PARALLEL_128_16CHARA 7 +PARALLEL_128_4INT 69 +PARALLEL_128_2LONG 52 +PARALLEL_128_2MMX 36 slower than expected +PARALLEL_128_SSE 156 just slower than 64_MMX + +Best speeds are obtained with native data types: int, mmx, sse (this +could be a compiler artifact). + +64 bit processors should try 64_LONG. + +Vectorizing compilers should like *CHARA. + +64_MMX is faster than 128_SSE on the Athlon; perhaps SSE instruction are +internally split into 64 bit chunks. Could be different on x86_64 or +Intel processors. + +128_SSE has a 64 bit (MMX) batch type because SSE has no shifting +instructions, they are only available on SSE2. As the Athlon XP doesn't +support SSE2, I couldn't experiment with that. Index: libs/libmythtv/FFdecsa/docs/FAQ.txt =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/docs/FAQ.txt 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,77 @@ +------- +FFdecsa +------- + +FFdecsa is a fast implementation of the CSA decryption algorithm for MPEG +TS packets. + +Q: What does FF stands for? +A: FFdecsa means "Fucking Fast decsa". + +Q: Why would you use such a rude name? +A: Because this code is fucking fast, more than 800% the speed of the best + implementation I'm able to find around at the moment. + +Q: How it that possible? Are all other programmers stupid? +A: No, they just tried to save a cycle or two tweaking a fundamentally wrong + implementation. The algorithm has to be implemented in a totally different + way to achieve good speed. + +Q: Do you use multimedia instructions? +A: I use every trick I could come up with, including multimedia instructions. + They are not fundamental in achieving speed, a version without them runs + at 6x the speed of the best implementation around (which uses MMX). + +Q: So how did you do that? +A: By using a different approach for the implementation. This code is not + exploiting some new CSA vulnerability, it is just doing the same + calculations better. Think about replacing bubble sort with quick sort. + +Q: You're joking, it's impossible to gain so much speed. +A: Speed test are available, technical documentation is available, source + code is available. Try it yourself. + If you want details, these are some of the documented tricks I used + (more details in the docs directory): + TRICK NUMBER 0: emulate the hardware + TRICK NUMBER 1: virtual shift registers + TRICK NUMBER 2: parallel bitslice + TRICK NUMBER 3: multimedia instructions + TRICK NUMBER 4: parallel byteslice + TRICK NUMBER 5: efficient bit permutation + TRICK NUMBER 6: efficient normal<->slice conversion + TRICK NUMBER 7: try hard to process packets together + TRICK NUMBER 8: try to avoid doing the same thing many times + TRICK NUMBER 9: compiler + TRICK NUMBER a: a lot of brain work + +Q: How can be this code useful? +A: You can use this code in place of the old slow implementations and save a + lot of CPU power. + +Q: Just that? +A: Well, new applications are possible. + Decrypting a whole transponder is easily doable now. Well, a $50 CPU can + decrypt four transponder at the same time if you have four DVB boards (but + I couldn't test that). + +Q: You're cheating, this code is fake, I don't believe one word. +A: Go away. This is technical stuff for people with brains. + +Q: This code is great, may I distribute your code in original or modified + form? +A: Only if you respect the license. + +Q: May I use your code in my player/library/plugin...? +A: Again, you have to respect the license. + +Q: Are you an extraterrestrial programmer? +A: No, just a Turkish guy with a PC to play with :-) + +Q: Why did you spend your time doing this? +A: Because I thought that my approach was doable and I was sure it would + have been much faster, so I had to implement it to confirm I was right. + I got 8x the speed and that's enough to be proud of it. And I could not + just keep the code for myself only. + +Q: What is the answer to the meaning of the universe? +A: 42,43,71,5f,65,85,f6,76,0d,13,28,96,... Index: libs/libmythtv/FFdecsa/parallel_032_4charA.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/parallel_032_4charA.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,171 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +struct group_t{ + unsigned char s1[4]; +}; +typedef struct group_t group; + +#define GROUP_PARALLELISM 32 + +group static inline FF0(){ + group res; + int i; + for(i=0;i<4;i++) res.s1[i]=0x0; + return res; +} + +group static inline FF1(){ + group res; + int i; + for(i=0;i<4;i++) res.s1[i]=0xff; + return res; +} + +group static inline FFAND(group a,group b){ + group res; + int i; + for(i=0;i<4;i++) res.s1[i]=a.s1[i]&b.s1[i]; + return res; +} + +group static inline FFOR(group a,group b){ + group res; + int i; + for(i=0;i<4;i++) res.s1[i]=a.s1[i]|b.s1[i]; + return res; +} + +group static inline FFXOR(group a,group b){ + group res; + int i; + for(i=0;i<4;i++) res.s1[i]=a.s1[i]^b.s1[i]; + return res; +} + +group static inline FFNOT(group a){ + group res; + int i; + for(i=0;i<4;i++) res.s1[i]=~a.s1[i]; + return res; +} + + +/* 64 rows of 32 bits */ + +void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){ + *(((int *)tab)+g)=*((int *)data); + *(((int *)tab)+32+g)=*(((int *)data)+1); +} + +void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){ + *((int *)data)=*(((int *)tab)+g); + *(((int *)data)+1)=*(((int *)tab)+32+g); +} + +void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){ + int j; + for(j=0;j=4?32-1:0))+j); + } +} + +struct batch_t{ + unsigned char s1[4]; +}; +typedef struct batch_t batch; + +#define BYTES_PER_BATCH 4 + +batch static inline B_FFAND(batch a,batch b){ + batch res; + int i; + for(i=0;i<4;i++) res.s1[i]=a.s1[i]&b.s1[i]; + return res; +} + +batch static inline B_FFOR(batch a,batch b){ + batch res; + int i; + for(i=0;i<4;i++) res.s1[i]=a.s1[i]|b.s1[i]; + return res; +} + +batch static inline B_FFXOR(batch a,batch b){ + batch res; + int i; + for(i=0;i<4;i++) res.s1[i]=a.s1[i]^b.s1[i]; + return res; +} + + +batch static inline B_FFN_ALL_29(){ + batch res; + int i; + for(i=0;i<4;i++) res.s1[i]=0x29; + return res; +} +batch static inline B_FFN_ALL_02(){ + batch res; + int i; + for(i=0;i<4;i++) res.s1[i]=0x02; + return res; +} +batch static inline B_FFN_ALL_04(){ + batch res; + int i; + for(i=0;i<4;i++) res.s1[i]=0x04; + return res; +} +batch static inline B_FFN_ALL_10(){ + batch res; + int i; + for(i=0;i<4;i++) res.s1[i]=0x10; + return res; +} +batch static inline B_FFN_ALL_40(){ + batch res; + int i; + for(i=0;i<4;i++) res.s1[i]=0x40; + return res; +} +batch static inline B_FFN_ALL_80(){ + batch res; + int i; + for(i=0;i<4;i++) res.s1[i]=0x80; + return res; +} + +batch static inline B_FFSH8L(batch a,int n){ + batch res; + int i; + for(i=0;i<4;i++) res.s1[i]=a.s1[i]<>n; + return res; +} + +void static inline M_EMPTY(void){ +} Index: libs/libmythtv/FFdecsa/parallel_128_16char.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/parallel_128_16char.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,411 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +struct group_t{ + unsigned char s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,s16; +}; +typedef struct group_t group; + +#define GROUP_PARALLELISM 128 + +group static inline FF0(){ + group res; + res.s1=0x0; + res.s2=0x0; + res.s3=0x0; + res.s4=0x0; + res.s5=0x0; + res.s6=0x0; + res.s7=0x0; + res.s8=0x0; + res.s9=0x0; + res.s10=0x0; + res.s11=0x0; + res.s12=0x0; + res.s13=0x0; + res.s14=0x0; + res.s15=0x0; + res.s16=0x0; + return res; +} + +group static inline FF1(){ + group res; + res.s1=0xff; + res.s2=0xff; + res.s3=0xff; + res.s4=0xff; + res.s5=0xff; + res.s6=0xff; + res.s7=0xff; + res.s8=0xff; + res.s9=0xff; + res.s10=0xff; + res.s11=0xff; + res.s12=0xff; + res.s13=0xff; + res.s14=0xff; + res.s15=0xff; + res.s16=0xff; + return res; +} + +group static inline FFAND(group a,group b){ + group res; + res.s1=a.s1&b.s1; + res.s2=a.s2&b.s2; + res.s3=a.s3&b.s3; + res.s4=a.s4&b.s4; + res.s5=a.s5&b.s5; + res.s6=a.s6&b.s6; + res.s7=a.s7&b.s7; + res.s8=a.s8&b.s8; + res.s9=a.s9&b.s9; + res.s10=a.s10&b.s10; + res.s11=a.s11&b.s11; + res.s12=a.s12&b.s12; + res.s13=a.s13&b.s13; + res.s14=a.s14&b.s14; + res.s15=a.s15&b.s15; + res.s16=a.s16&b.s16; + return res; +} + +group static inline FFOR(group a,group b){ + group res; + res.s1=a.s1|b.s1; + res.s2=a.s2|b.s2; + res.s3=a.s3|b.s3; + res.s4=a.s4|b.s4; + res.s5=a.s5|b.s5; + res.s6=a.s6|b.s6; + res.s7=a.s7|b.s7; + res.s8=a.s8|b.s8; + res.s9=a.s9|b.s9; + res.s10=a.s10|b.s10; + res.s11=a.s11|b.s11; + res.s12=a.s12|b.s12; + res.s13=a.s13|b.s13; + res.s14=a.s14|b.s14; + res.s15=a.s15|b.s15; + res.s16=a.s16|b.s16; + return res; +} + +group static inline FFXOR(group a,group b){ + group res; + res.s1=a.s1^b.s1; + res.s2=a.s2^b.s2; + res.s3=a.s3^b.s3; + res.s4=a.s4^b.s4; + res.s5=a.s5^b.s5; + res.s6=a.s6^b.s6; + res.s7=a.s7^b.s7; + res.s8=a.s8^b.s8; + res.s9=a.s9^b.s9; + res.s10=a.s10^b.s10; + res.s11=a.s11^b.s11; + res.s12=a.s12^b.s12; + res.s13=a.s13^b.s13; + res.s14=a.s14^b.s14; + res.s15=a.s15^b.s15; + res.s16=a.s16^b.s16; + return res; +} + +group static inline FFNOT(group a){ + group res; + res.s1=~a.s1; + res.s2=~a.s2; + res.s3=~a.s3; + res.s4=~a.s4; + res.s5=~a.s5; + res.s6=~a.s6; + res.s7=~a.s7; + res.s8=~a.s8; + res.s9=~a.s9; + res.s10=~a.s10; + res.s11=~a.s11; + res.s12=~a.s12; + res.s13=~a.s13; + res.s14=~a.s14; + res.s15=~a.s15; + res.s16=~a.s16; + return res; +} + + +/* 64 rows of 128 bits */ + +void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){ + *(((int *)tab)+2*g)=*((int *)data); + *(((int *)tab)+2*g+1)=*(((int *)data)+1); +} + +void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){ + *((int *)data)=*(((int *)tab)+2*g); + *(((int *)data)+1)=*(((int *)tab)+2*g+1); +} + +void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){ + int j; + for(j=0;j>n; + res.s2=a.s2>>n; + res.s3=a.s3>>n; + res.s4=a.s4>>n; + res.s5=a.s5>>n; + res.s6=a.s6>>n; + res.s7=a.s7>>n; + res.s8=a.s8>>n; + res.s9=a.s9>>n; + res.s10=a.s10>>n; + res.s11=a.s11>>n; + res.s12=a.s12>>n; + res.s13=a.s13>>n; + res.s14=a.s14>>n; + res.s15=a.s15>>n; + res.s16=a.s16>>n; + return res; +} + + +void static inline M_EMPTY(void){ +} Index: libs/libmythtv/FFdecsa/FFdecsa_test.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/FFdecsa_test.c 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,174 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include +#include +#include + +#include "FFdecsa.h" + +#ifndef NULL +#define NULL 0 +#endif + +#include "FFdecsa_test_testcases.h" + +int compare(unsigned char *p1, unsigned char *p2, int n, int silently){ + int i; + int ok=1; + for(i=0;iServiceID; hdr[hdrlen++]=1; //Place holder for transportID (future) @@ -489,6 +497,43 @@ } } +void *DVBCam::ExternalCamSoftkeyHelper(void*self) +{ + ((DVBCam*)self)->ExternalCamSoftkeyLoop(); + return NULL; +} +void DVBCam::ExternalCamSoftkeyLoop() +{ + int keyfd; + unsigned char key[8]; + int index, pid; + skThreadRunning = true; + QString keyFifo = QDir::homeDirPath() + + QString("/.mythtv/externcam_%1/keyfifo").arg(cardnum); + if ((keyfd=open(keyFifo.ascii(),O_RDONLY))<0) { + ERROR("Could not open keyfifo!"); + skThreadRunning = false; + return; + } + while (!exitSkThread) + { + unsigned char keytype; + read(keyfd, &keytype,1); + index = 0; + pid = 0; + read(keyfd, &index, sizeof(int)); + if(keytype == 'E' || keytype == 'O') + read(keyfd, key, 8); + else if (keytype == 'P') + read(keyfd, &pid, sizeof(int)); + + DVBRecorder::UpdateDeCSAKeys(cardnum, keytype, index, key, pid); + } + skThreadRunning = false; + GENERAL(QString("CA: External Softkey thread stopped")); + return; +} + void DVBCam::stopExternalCam() { if(external_cam_fd != -1) { Index: libs/libmythtv/FFdecsa/parallel_064_2int.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/parallel_064_2int.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,175 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +struct group_t{ + unsigned int s1; + unsigned int s2; +}; +typedef struct group_t group; + +#define GROUP_PARALLELISM 64 + +group static inline FF0(){ + group res; + res.s1=0x0; + res.s2=0x0; + return res; +} + +group static inline FF1(){ + group res; + res.s1=0xffffffff; + res.s2=0xffffffff; + return res; +} + +group static inline FFAND(group a,group b){ + group res; + res.s1=a.s1&b.s1; + res.s2=a.s2&b.s2; + return res; +} + +group static inline FFOR(group a,group b){ + group res; + res.s1=a.s1|b.s1; + res.s2=a.s2|b.s2; + return res; +} + +group static inline FFXOR(group a,group b){ + group res; + res.s1=a.s1^b.s1; + res.s2=a.s2^b.s2; + return res; +} + +group static inline FFNOT(group a){ + group res; + res.s1=~a.s1; + res.s2=~a.s2; + return res; +} + + +/* 64 rows of 64 bits */ + +void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){ + *(((int *)tab)+2*g)=*((int *)data); + *(((int *)tab)+2*g+1)=*(((int *)data)+1); +} + +void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){ + *((int *)data)=*(((int *)tab)+2*g); + *(((int *)data)+1)=*(((int *)tab)+2*g+1); +} + +void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){ + int j; + for(j=0;j>n; + res.s2=a.s2>>n; + return res; +} + + +void static inline M_EMPTY(void){ +} Index: libs/libmythtv/FFdecsa/logic/logic.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/logic/logic.c 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,330 @@ +/* logic -- synthetize logic functions with 4 inputs + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + + + +/* Can we use negated inputs? */ +#define noNEGATEDTOO + + +#include + + +/* + * abcd + */ + +#define BINARY(b15,b14,b13,b12,b11,b10,b9,b8,b7,b6,b5,b4,b3,b2,b1,b0) \ + ((b15)<<15)|((b14)<<14)|((b13)<<13)|((b12)<<12)| \ + ((b11)<<11)|((b10)<<10)|((b9) << 9)|((b8) << 8)| \ + ((b7) << 7)|((b6) << 6)|((b5) << 5)|((b4) << 4)| \ + ((b3) << 3)|((b2) << 2)|((b1) << 1)|((b0) << 0) + +struct fun{ + int level; + int op_type; + int op1; + int op2; +}; + +struct fun db[65536]; +int n_fun; + +#define LEVEL_ALOT 1000000 + +#define OP_FALSE 0 +#define OP_TRUE 1 +#define OP_SRC 2 +#define OP_AND 3 +#define OP_OR 4 +#define OP_XOR 5 + +#define SRC_A 10 +#define SRC_B 20 +#define SRC_C 30 +#define SRC_D 40 +#define SRC_AN 11 +#define SRC_BN 21 +#define SRC_CN 31 +#define SRC_DN 41 + +void dump_element_prefix(int); +void dump_element_infix(int); + +int main(void){ + int i,j; + int l,p1,p2; + int candidate; + int max_p2_lev; + + for(i=0;i<65536;i++){ + db[i].level=LEVEL_ALOT; + } + n_fun=0; + + db[0].level=0; + db[0].op_type=OP_FALSE; + n_fun++; + + db[65535].level=0; + db[65535].op_type=OP_TRUE; + n_fun++; + + db[BINARY(0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1)].level=0; + db[BINARY(0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1)].op_type=OP_SRC; + db[BINARY(0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1)].op1=SRC_A; + n_fun++; + + db[BINARY(0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1)].level=0; + db[BINARY(0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1)].op_type=OP_SRC; + db[BINARY(0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1)].op1=SRC_B; + n_fun++; + + db[BINARY(0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1)].level=0; + db[BINARY(0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1)].op_type=OP_SRC; + db[BINARY(0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1)].op1=SRC_C; + n_fun++; + + db[BINARY(0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1)].level=0; + db[BINARY(0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1)].op_type=OP_SRC; + db[BINARY(0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1)].op1=SRC_D; + n_fun++; +#ifdef NEGATEDTOO + db[BINARY(1,1,1,1, 1,1,1,1, 0,0,0,0, 0,0,0,0)].level=0; + db[BINARY(1,1,1,1, 1,1,1,1, 0,0,0,0, 0,0,0,0)].op_type=OP_SRC; + db[BINARY(1,1,1,1, 1,1,1,1, 0,0,0,0, 0,0,0,0)].op1=SRC_AN; + n_fun++; + + db[BINARY(1,1,1,1, 0,0,0,0, 1,1,1,1, 0,0,0,0)].level=0; + db[BINARY(1,1,1,1, 0,0,0,0, 1,1,1,1, 0,0,0,0)].op_type=OP_SRC; + db[BINARY(1,1,1,1, 0,0,0,0, 1,1,1,1, 0,0,0,0)].op1=SRC_BN; + n_fun++; + + db[BINARY(1,1,0,0, 1,1,0,0, 1,1,0,0, 1,1,0,0)].level=0; + db[BINARY(1,1,0,0, 1,1,0,0, 1,1,0,0, 1,1,0,0)].op_type=OP_SRC; + db[BINARY(1,1,0,0, 1,1,0,0, 1,1,0,0, 1,1,0,0)].op1=SRC_CN; + n_fun++; + + db[BINARY(1,0,1,0, 1,0,1,0, 1,0,1,0, 1,0,1,0)].level=0; + db[BINARY(1,0,1,0, 1,0,1,0, 1,0,1,0, 1,0,1,0)].op_type=OP_SRC; + db[BINARY(1,0,1,0, 1,0,1,0, 1,0,1,0, 1,0,1,0)].op1=SRC_DN; + n_fun++; +#endif + + for(l=0;l<100;l++){ + printf("calculating level %i\n",l); + for(p1=1;p1<65536;p1++){ + if(db[p1].level==LEVEL_ALOT) continue; + max_p2_lev=l-db[p1].level-1; + for(p2=p1+1;p2<65536;p2++){ + if(db[p2].level>max_p2_lev) continue; + + candidate=p1&p2; + if(db[candidate].level==LEVEL_ALOT){ + //found new + db[candidate].level=db[p1].level+db[p2].level+1; + db[candidate].op_type=OP_AND; + db[candidate].op1=p1; + db[candidate].op2=p2; + n_fun++; + } + + candidate=p1|p2; + if(db[candidate].level==LEVEL_ALOT){ + //found new + db[candidate].level=db[p1].level+db[p2].level+1; + db[candidate].op_type=OP_OR; + db[candidate].op1=p1; + db[candidate].op2=p2; + n_fun++; + } + + candidate=p1^p2; + if(db[candidate].level==LEVEL_ALOT){ + //found new + db[candidate].level=db[p1].level+db[p2].level+1; + db[candidate].op_type=OP_XOR; + db[candidate].op1=p1; + db[candidate].op2=p2; + n_fun++; + } + + } + } + printf("num fun=%i\n\n",n_fun); + fflush(stdout); + if(n_fun>=65536) break; + } + + + for(i=0;i<65536;i++){ + if(db[i].level==LEVEL_ALOT) continue; + + printf("PREFIX "); + for(j=15;j>=0;j--){ + printf("%i",i&(1<=0;j--){ + printf("%i",i&(1<>n; + res.s2=a.s2>>n; + res.s3=a.s3>>n; + res.s4=a.s4>>n; + return res; +} + + +void static inline M_EMPTY(void){ +} Index: libs/libmythtv/FFdecsa/parallel_128_2mmx.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/parallel_128_2mmx.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,199 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include + +struct group_t{ + __m64 s1,s2; +}; +typedef struct group_t group; + +#define GROUP_PARALLELISM 128 + +group static inline FF0(){ + group res; + res.s1=(__m64)0x0ULL; + res.s2=(__m64)0x0ULL; + return res; +} + +group static inline FF1(){ + group res; + res.s1=(__m64)0xffffffffffffffffULL; + res.s2=(__m64)0xffffffffffffffffULL; + return res; +} + +group static inline FFAND(group a,group b){ + group res; + res.s1=_m_pand(a.s1,b.s1); + res.s2=_m_pand(a.s2,b.s2); + return res; +} + +group static inline FFOR(group a,group b){ + group res; + res.s1=_m_por(a.s1,b.s1); + res.s2=_m_por(a.s2,b.s2); + return res; +} + +group static inline FFXOR(group a,group b){ + group res; + res.s1=_m_pxor(a.s1,b.s1); + res.s2=_m_pxor(a.s2,b.s2); + return res; +} + +group static inline FFNOT(group a){ + group res; + res.s1=_m_pxor(a.s1,FF1().s1); + res.s2=_m_pxor(a.s2,FF1().s2); + return res; +} + + +/* 64 rows of 128 bits */ + +void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){ + *(((int *)tab)+2*g)=*((int *)data); + *(((int *)tab)+2*g+1)=*(((int *)data)+1); +} + +void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){ + *((int *)data)=*(((int *)tab)+2*g); + *(((int *)data)+1)=*(((int *)tab)+2*g+1); +} + +void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){ + int j; + for(j=0;j +#include +#include + +#include "FFdecsa.h" + +#ifndef NULL +#define NULL 0 +#endif + +// activate debug by changing the grep command there. +// don't edit autogenerated files (name beginning with "_"). + +//// parallelization stuff, large speed differences are possible +// possible choices +#define PARALLEL_32_4CHAR 320 +#define PARALLEL_32_4CHARA 321 +#define PARALLEL_32_INT 322 +#define PARALLEL_64_8CHAR 640 +#define PARALLEL_64_8CHARA 641 +#define PARALLEL_64_2INT 642 +#define PARALLEL_64_LONG 643 +#define PARALLEL_64_MMX 644 +#define PARALLEL_128_16CHAR 1280 +#define PARALLEL_128_16CHARA 1281 +#define PARALLEL_128_4INT 1282 +#define PARALLEL_128_2LONG 1283 +#define PARALLEL_128_2MMX 1284 +#define PARALLEL_128_SSE 1285 + +//////// our choice //////////////// our choice //////////////// our choice //////////////// our choice //////// +#define PARALLEL_MODE PARALLEL_64_MMX +//////// our choice //////////////// our choice //////////////// our choice //////////////// our choice //////// + +#include "parallel_generic.h" +//// conditionals +#if PARALLEL_MODE==PARALLEL_32_4CHAR +#include "parallel_032_4char.h" +#elif PARALLEL_MODE==PARALLEL_32_4CHARA +#include "parallel_032_4charA.h" +#elif PARALLEL_MODE==PARALLEL_32_INT +#include "parallel_032_int.h" +#elif PARALLEL_MODE==PARALLEL_64_8CHAR +#include "parallel_064_8char.h" +#elif PARALLEL_MODE==PARALLEL_64_8CHARA +#include "parallel_064_8charA.h" +#elif PARALLEL_MODE==PARALLEL_64_2INT +#include "parallel_064_2int.h" +#elif PARALLEL_MODE==PARALLEL_64_LONG +#include "parallel_064_long.h" +#elif PARALLEL_MODE==PARALLEL_64_MMX +#include "parallel_064_mmx.h" +#elif PARALLEL_MODE==PARALLEL_128_16CHAR +#include "parallel_128_16char.h" +#elif PARALLEL_MODE==PARALLEL_128_16CHARA +#include "parallel_128_16charA.h" +#elif PARALLEL_MODE==PARALLEL_128_4INT +#include "parallel_128_4int.h" +#elif PARALLEL_MODE==PARALLEL_128_2LONG +#include "parallel_128_2long.h" +#elif PARALLEL_MODE==PARALLEL_128_2MMX +#include "parallel_128_2mmx.h" +#elif PARALLEL_MODE==PARALLEL_128_SSE +#include "parallel_128_sse.h" +#else +#error "unknown/undefined parallel mode" +#endif + +// stuff depending on conditionals + +#define BYTES_PER_GROUP (GROUP_PARALLELISM/8) +#define BYPG BYTES_PER_GROUP +#define BITS_PER_GROUP GROUP_PARALLELISM +#define BIPG BITS_PER_GROUP + + +//// debug tool + +static void dump_mem(unsigned char *string, unsigned char *p, int len, int linelen){ + int i; + for(i=0;i>4)&0xf; + iA[1]=(ck[0] )&0xf; + iA[2]=(ck[1]>>4)&0xf; + iA[3]=(ck[1] )&0xf; + iA[4]=(ck[2]>>4)&0xf; + iA[5]=(ck[2] )&0xf; + iA[6]=(ck[3]>>4)&0xf; + iA[7]=(ck[3] )&0xf; + iB[0]=(ck[4]>>4)&0xf; + iB[1]=(ck[4] )&0xf; + iB[2]=(ck[5]>>4)&0xf; + iB[3]=(ck[5] )&0xf; + iB[4]=(ck[6]>>4)&0xf; + iB[5]=(ck[6] )&0xf; + iB[6]=(ck[7]>>4)&0xf; + iB[7]=(ck[7] )&0xf; +} + +//----- stream main function + +#define STREAM_INIT +#include "tmp_autogenerated_stuff_stream.c" +#undef STREAM_INIT + +#define STREAM_NORMAL +#include "tmp_autogenerated_stuff_stream.c" +#undef STREAM_NORMAL + + +//-----block decypher + +//-----key schedule for block decypher + +static void key_schedule_block( + unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key. + unsigned char *kk) // [Out] kk[0]-kk[55] 56 bytes | Key schedule. +{ + static const unsigned char key_perm[0x40] = { + 0x12,0x24,0x09,0x07,0x2A,0x31,0x1D,0x15, 0x1C,0x36,0x3E,0x32,0x13,0x21,0x3B,0x40, + 0x18,0x14,0x25,0x27,0x02,0x35,0x1B,0x01, 0x22,0x04,0x0D,0x0E,0x39,0x28,0x1A,0x29, + 0x33,0x23,0x34,0x0C,0x16,0x30,0x1E,0x3A, 0x2D,0x1F,0x08,0x19,0x17,0x2F,0x3D,0x11, + 0x3C,0x05,0x38,0x2B,0x0B,0x06,0x0A,0x2C, 0x20,0x3F,0x2E,0x0F,0x03,0x26,0x10,0x37, + }; + + int i,j,k; + int bit[64]; + int newbit[64]; + int kb[7][8]; + + // 56 steps + // 56 key bytes kk(55)..kk(0) by key schedule from ck + + // kb(6,0) .. kb(6,7) = ck(0) .. ck(7) + kb[6][0] = ck[0]; + kb[6][1] = ck[1]; + kb[6][2] = ck[2]; + kb[6][3] = ck[3]; + kb[6][4] = ck[4]; + kb[6][5] = ck[5]; + kb[6][6] = ck[6]; + kb[6][7] = ck[7]; + + // calculate kb[5] .. kb[0] + for(i=5; i>=0; i--){ + // 64 bit perm on kb + for(j=0; j<8; j++){ + for(k=0; k<8; k++){ + bit[j*8+k] = (kb[i+1][j] >> (7-k)) & 1; + newbit[key_perm[j*8+k]-1] = bit[j*8+k]; + } + } + for(j=0; j<8; j++){ + kb[i][j] = 0; + for(k=0; k<8; k++){ + kb[i][j] |= newbit[j*8+k] << (7-k); + } + } + } + + // xor to give kk + for(i=0; i<7; i++){ + for(j=0; j<8; j++){ + kk[i*8+j] = kb[i][j] ^ i; + } + } + +} + +//-----block utils + +static inline __attribute__((always_inline)) void trasp_N_8 (unsigned char *in,unsigned char* out,int count){ + int *ri=(int *)in; + int *ibi=(int *)out; + int j,i,k,g; + // copy and first step + for(g=0;g>16) | (b&0xffff0000) ; + } + } + } +//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); +// now 01010101 + for(j=0;j<8;j+=2){ + for(i=0;i<1;i++){ + for(k=0;k>8) | (b&0xff00ff00); + } + } + } +//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); +// now 00000000 +} + +static inline __attribute__((always_inline)) void trasp_8_N (unsigned char *in,unsigned char* out,int count){ + int *ri=(int *)in; + int *bdi=(int *)out; + int j,i,k,g; +#define INTS_PER_ROW (GROUP_PARALLELISM/8*2) +//dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); +// now 00000000 + for(j=0;j<8;j+=2){ + for(i=0;i<1;i++){ + for(k=0;k>8) | (b&0xff00ff00); + } + } + } +//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); +// now 01010101 + for(j=0;j<8;j+=4){ + for(i=0;i<2;i++){ + for(k=0;k>16) | (b&0xffff0000) ; + } + } + } +//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); +// now 01230123 + for(g=0;g=0;i--){ + { + batch tkkmulti=kkmulti[i]; + batch *si=(batch *)sbox_in; + batch *r6_N=(batch *)(r+roff+GROUP_PARALLELISM*6); + for(g=0;g=*(clst+1)){ + // out of this range, try next + clst++;clst++; + pkt=*clst; + continue; + } + + do{ // handle this packet + xc0=pkt[3]&0xc0; + if(xc0==0x00){ + advanced+=can_advance; + stat_no_scramble++; + break; + } + if(xc0==0x40){ + advanced+=can_advance; + stat_reserved++; + break; + } + if(xc0==0x80||xc0==0xc0){ // encrypted + ev_od=(xc0&0x40)>>6; // 0 even, 1 odd + if(grouped==0) group_ev_od=ev_od; // this group will be all even (or odd) + if(group_ev_od==ev_od){ // could be added to group + pkt[3]&=0x3f; // consider it decrypted now + if(pkt[3]&0x20){ // incomplete packet + offset=4+pkt[4]+1; + len=188-offset; + n=len>>3; + residue=len-(n<<3); + if(n==0){ // decrypted==encrypted! + advanced+=can_advance; + stat_decrypted_mini++; + break; // this doesn't need more processing + } + }else{ + len=184; + offset=4; + n=23; + residue=0; + } + g_pkt[grouped]=pkt; + g_len[grouped]=len; + g_offset[grouped]=offset; + g_n[grouped]=n; + g_residue[grouped]=residue; + grouped++; + advanced+=can_advance; + stat_decrypted[ev_od]++; + } + else{ + can_advance=0; + break; // skip and go on + } + } + } while(0); + + if(can_advance){ + // move range start forward + *clst+=188; + } + // next packet, if there is one + pkt+=188; + } while(1); + + // delete empty ranges and compact list + clst2=cluster; + for(clst=cluster;*clst!=NULL;clst+=2){ + // if not empty + if(*clst<*(clst+1)){ + // it will remain + *clst2=*clst; + *(clst2+1)=*(clst+1); + clst2+=2; + } + } + *clst2=NULL; + + if(grouped==0){ + // no processing needed + return advanced; + } + + // sort them, longest payload first + // we expect many n=23 packets and a few n<23 + // grouped is always <= GROUP_PARALLELISM + +#define g_swap(a,b) \ + pkt=g_pkt[a]; \ + g_pkt[a]=g_pkt[b]; \ + g_pkt[b]=pkt; \ +\ + len=g_len[a]; \ + g_len[a]=g_len[b]; \ + g_len[b]=len; \ +\ + offset=g_offset[a]; \ + g_offset[a]=g_offset[b]; \ + g_offset[b]=offset; \ +\ + n=g_n[a]; \ + g_n[a]=g_n[b]; \ + g_n[b]=n; \ +\ + residue=g_residue[a]; \ + g_residue[a]=g_residue[b]; \ + g_residue[b]=residue; + + // step 1: move n=23 packets before small packets + t23=0; + tsmall=grouped-1; + for(;;){ + for(;t23=0;tsmall--){ + if(g_n[tsmall]==23) break; + } + + if(tsmall-t23<1) break; + + + g_swap(t23,tsmall); + + t23++; + tsmall--; + } + + // step 2: sort small packets in decreasing order of n (bubble sort is enough) + for(i=t23;ig_n[i]){ + g_swap(i,j); + } + } + } + + // we need to know how many packets need 23 iterations, how many 22... + for(i=0;i<=23;i++){ + alive[i]=0; + } + // count + alive[23-1]=t23; + for(i=t23;i=0;i--){ + alive[i]+=alive[i+1]; + } + + // choose key + if(group_ev_od==0){ + k=&keys.even; + } + else{ + k=&keys.odd; + } + + //INIT +#define INITIALIZE_UNUSED_INPUT +#ifdef INITIALIZE_UNUSED_INPUT +// unnecessary zeroing. +// without this, we operate on uninitialized memory +// when groupediA_g,k->iB_g,stream_in); + // fill first ib + for(g=0;g0;iter++){ + // alive and just dead packets: calc block + block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]); + // all packets (dead too): calc stream + stream_cypher_group_normal(stream_out); +//dump_mem("stream_out",stream_out,GROUP_PARALLELISM*8,BYPG); + + // alive packets: calc ib + for(g=0;gkkmulti,ib,block_out,alive[iter-1]); + // just dead packets: write decrypted data + for(g=alive[iter];g +#include +#include +#include + +#include "FFdecsa.h" + +#ifndef NULL +#define NULL 0 +#endif + +//#define DEBUG +#ifdef DEBUG +#define DBG(a) a +#else +#define DBG(a) +#endif + +//// parallelization stuff, large speed differences are possible +// possible choices +#define PARALLEL_32_4CHAR 320 +#define PARALLEL_32_4CHARA 321 +#define PARALLEL_32_INT 322 +#define PARALLEL_64_8CHAR 640 +#define PARALLEL_64_8CHARA 641 +#define PARALLEL_64_2INT 642 +#define PARALLEL_64_LONG 643 +#define PARALLEL_64_MMX 644 +#define PARALLEL_128_16CHAR 1280 +#define PARALLEL_128_16CHARA 1281 +#define PARALLEL_128_4INT 1282 +#define PARALLEL_128_2LONG 1283 +#define PARALLEL_128_2MMX 1284 +#define PARALLEL_128_SSE 1285 + +//////// our choice //////////////// our choice //////////////// our choice //////////////// our choice //////// +#ifndef PARALLEL_MODE +#define PARALLEL_MODE PARALLEL_32_INT +#endif +//////// our choice //////////////// our choice //////////////// our choice //////////////// our choice //////// + +#include "parallel_generic.h" +//// conditionals +#if PARALLEL_MODE==PARALLEL_32_4CHAR +#include "parallel_032_4char.h" +#elif PARALLEL_MODE==PARALLEL_32_4CHARA +#include "parallel_032_4charA.h" +#elif PARALLEL_MODE==PARALLEL_32_INT +#include "parallel_032_int.h" +#elif PARALLEL_MODE==PARALLEL_64_8CHAR +#include "parallel_064_8char.h" +#elif PARALLEL_MODE==PARALLEL_64_8CHARA +#include "parallel_064_8charA.h" +#elif PARALLEL_MODE==PARALLEL_64_2INT +#include "parallel_064_2int.h" +#elif PARALLEL_MODE==PARALLEL_64_LONG +#include "parallel_064_long.h" +#elif PARALLEL_MODE==PARALLEL_64_MMX +#include "parallel_064_mmx.h" +#elif PARALLEL_MODE==PARALLEL_128_16CHAR +#include "parallel_128_16char.h" +#elif PARALLEL_MODE==PARALLEL_128_16CHARA +#include "parallel_128_16charA.h" +#elif PARALLEL_MODE==PARALLEL_128_4INT +#include "parallel_128_4int.h" +#elif PARALLEL_MODE==PARALLEL_128_2LONG +#include "parallel_128_2long.h" +#elif PARALLEL_MODE==PARALLEL_128_2MMX +#include "parallel_128_2mmx.h" +#elif PARALLEL_MODE==PARALLEL_128_SSE +#include "parallel_128_sse.h" +#else +#error "unknown/undefined parallel mode" +#endif + +// stuff depending on conditionals + +#define BYTES_PER_GROUP (GROUP_PARALLELISM/8) +#define BYPG BYTES_PER_GROUP +#define BITS_PER_GROUP GROUP_PARALLELISM +#define BIPG BITS_PER_GROUP + + +//// debug tool + +static void dump_mem(const char *string, const unsigned char *p, int len, int linelen){ + int i; + for(i=0;i>4)&0xf; + iA[1]=(ck[0] )&0xf; + iA[2]=(ck[1]>>4)&0xf; + iA[3]=(ck[1] )&0xf; + iA[4]=(ck[2]>>4)&0xf; + iA[5]=(ck[2] )&0xf; + iA[6]=(ck[3]>>4)&0xf; + iA[7]=(ck[3] )&0xf; + iB[0]=(ck[4]>>4)&0xf; + iB[1]=(ck[4] )&0xf; + iB[2]=(ck[5]>>4)&0xf; + iB[3]=(ck[5] )&0xf; + iB[4]=(ck[6]>>4)&0xf; + iB[5]=(ck[6] )&0xf; + iB[6]=(ck[7]>>4)&0xf; + iB[7]=(ck[7] )&0xf; +} + +//----- stream main function + +#define STREAM_INIT +#include "stream.c" +#undef STREAM_INIT + +#define STREAM_NORMAL +#include "stream.c" +#undef STREAM_NORMAL + + +//-----block decypher + +//-----key schedule for block decypher + +static void key_schedule_block( + unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key. + unsigned char *kk) // [Out] kk[0]-kk[55] 56 bytes | Key schedule. +{ + static const unsigned char key_perm[0x40] = { + 0x12,0x24,0x09,0x07,0x2A,0x31,0x1D,0x15, 0x1C,0x36,0x3E,0x32,0x13,0x21,0x3B,0x40, + 0x18,0x14,0x25,0x27,0x02,0x35,0x1B,0x01, 0x22,0x04,0x0D,0x0E,0x39,0x28,0x1A,0x29, + 0x33,0x23,0x34,0x0C,0x16,0x30,0x1E,0x3A, 0x2D,0x1F,0x08,0x19,0x17,0x2F,0x3D,0x11, + 0x3C,0x05,0x38,0x2B,0x0B,0x06,0x0A,0x2C, 0x20,0x3F,0x2E,0x0F,0x03,0x26,0x10,0x37, + }; + + int i,j,k; + int bit[64]; + int newbit[64]; + int kb[7][8]; + + // 56 steps + // 56 key bytes kk(55)..kk(0) by key schedule from ck + + // kb(6,0) .. kb(6,7) = ck(0) .. ck(7) + kb[6][0] = ck[0]; + kb[6][1] = ck[1]; + kb[6][2] = ck[2]; + kb[6][3] = ck[3]; + kb[6][4] = ck[4]; + kb[6][5] = ck[5]; + kb[6][6] = ck[6]; + kb[6][7] = ck[7]; + + // calculate kb[5] .. kb[0] + for(i=5; i>=0; i--){ + // 64 bit perm on kb + for(j=0; j<8; j++){ + for(k=0; k<8; k++){ + bit[j*8+k] = (kb[i+1][j] >> (7-k)) & 1; + newbit[key_perm[j*8+k]-1] = bit[j*8+k]; + } + } + for(j=0; j<8; j++){ + kb[i][j] = 0; + for(k=0; k<8; k++){ + kb[i][j] |= newbit[j*8+k] << (7-k); + } + } + } + + // xor to give kk + for(i=0; i<7; i++){ + for(j=0; j<8; j++){ + kk[i*8+j] = kb[i][j] ^ i; + } + } + +} + +//-----block utils + +static inline __attribute__((always_inline)) void trasp_N_8 (unsigned char *in,unsigned char* out,int count){ + int *ri=(int *)in; + int *ibi=(int *)out; + int j,i,k,g; + // copy and first step + for(g=0;g>16) | (b&0xffff0000) ; + } + } + } +//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); +// now 01010101 + for(j=0;j<8;j+=2){ + for(i=0;i<1;i++){ + for(k=0;k>8) | (b&0xff00ff00); + } + } + } +//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); +// now 00000000 +} + +static inline __attribute__((always_inline)) void trasp_8_N (unsigned char *in,unsigned char* out,int count){ + int *ri=(int *)in; + int *bdi=(int *)out; + int j,i,k,g; +#define INTS_PER_ROW (GROUP_PARALLELISM/8*2) +//dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); +// now 00000000 + for(j=0;j<8;j+=2){ + for(i=0;i<1;i++){ + for(k=0;k>8) | (b&0xff00ff00); + } + } + } +//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); +// now 01010101 + for(j=0;j<8;j+=4){ + for(i=0;i<2;i++){ + for(k=0;k>16) | (b&0xffff0000) ; + } + } + } +//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); +// now 01230123 + for(g=0;g=0;i--){ + { + batch tkkmulti=kkmulti[i]; + batch *si=(batch *)sbox_in; + batch *r6_N=(batch *)(r+roff+GROUP_PARALLELISM*6); + for(g=0;gck,pk,8); +// precalculations for stream + key_schedule_stream(key->ck,key->iA,key->iB); + for(by=0;by<8;by++){ + for(bi=0;bi<8;bi++){ + key->ck_g[by][bi]=(key->ck[by]&(1<iA_g[by][bi]=(key->iA[by]&(1<iB_g[by][bi]=(key->iB[by]&(1<ck,key->kk); + for(i=0;i<56;i++){ + for(j=0;jkkmulti[i])+j)=key->kk[i]; + } + } +} + +void set_control_words(void *keys, const unsigned char *ev, const unsigned char *od){ + schedule_key(&((struct csa_keys_t *)keys)->even,ev); + schedule_key(&((struct csa_keys_t *)keys)->odd,od); +} + +void set_even_control_word(void *keys, const unsigned char *pk){ + schedule_key(&((struct csa_keys_t *)keys)->even,pk); +} + +void set_odd_control_word(void *keys, const unsigned char *pk){ + schedule_key(&((struct csa_keys_t *)keys)->odd,pk); +} + +//-----get control words + +void get_control_words(void *keys, unsigned char *even, unsigned char *odd){ + memcpy(even,&((struct csa_keys_t *)keys)->even.ck,8); + memcpy(odd,&((struct csa_keys_t *)keys)->odd.ck,8); +} + +//----- decrypt + +int decrypt_packets(void *keys, unsigned char **cluster){ + // statistics, currently unused + int stat_no_scramble=0; + int stat_reserved=0; + int stat_decrypted[2]={0,0}; + int stat_decrypted_mini=0; + unsigned char **clst; + unsigned char **clst2; + int grouped; + int group_ev_od; + int advanced; + int can_advance; + unsigned char *g_pkt[GROUP_PARALLELISM]; + int g_len[GROUP_PARALLELISM]; + int g_offset[GROUP_PARALLELISM]; + int g_n[GROUP_PARALLELISM]; + int g_residue[GROUP_PARALLELISM]; + unsigned char *pkt; + int xc0,ev_od,len,offset,n,residue; + struct csa_key_t* k; + int i,j,iter,g; + int t23,tsmall; + int alive[24]; +//icc craziness int pad1=0; //////////align! FIXME + unsigned char *encp[GROUP_PARALLELISM]; + unsigned char stream_in[GROUP_PARALLELISM*8]; + unsigned char stream_out[GROUP_PARALLELISM*8]; + unsigned char ib[GROUP_PARALLELISM*8]; + unsigned char block_out[GROUP_PARALLELISM*8]; + struct stream_regs regs; + +//icc craziness i=(int)&pad1;//////////align!!! FIXME + + // build a list of packets to be processed + clst=cluster; + grouped=0; + advanced=0; + can_advance=1; + group_ev_od=-1; // silence incorrect compiler warning + pkt=*clst; + do{ // find a new packet + if(grouped==GROUP_PARALLELISM){ + // full + break; + } + if(pkt==NULL){ + // no more ranges + break; + } + if(pkt>=*(clst+1)){ + // out of this range, try next + clst++;clst++; + pkt=*clst; + continue; + } + + do{ // handle this packet + xc0=pkt[3]&0xc0; + DBG(fprintf(stderr," exam pkt=%p, xc0=%02x, can_adv=%i\n",pkt,xc0,can_advance)); + if(xc0==0x00){ + DBG(fprintf(stderr,"skip clear pkt %p (can_advance is %i)\n",pkt,can_advance)); + advanced+=can_advance; + stat_no_scramble++; + break; + } + if(xc0==0x40){ + DBG(fprintf(stderr,"skip reserved pkt %p (can_advance is %i)\n",pkt,can_advance)); + advanced+=can_advance; + stat_reserved++; + break; + } + if(xc0==0x80||xc0==0xc0){ // encrypted + ev_od=(xc0&0x40)>>6; // 0 even, 1 odd + if(grouped==0) group_ev_od=ev_od; // this group will be all even (or odd) + if(group_ev_od==ev_od){ // could be added to group + pkt[3]&=0x3f; // consider it decrypted now + if(pkt[3]&0x20){ // incomplete packet + offset=4+pkt[4]+1; + len=188-offset; + n=len>>3; + residue=len-(n<<3); + if(n==0){ // decrypted==encrypted! + DBG(fprintf(stderr,"DECRYPTED MINI! (can_advance is %i)\n",can_advance)); + advanced+=can_advance; + stat_decrypted_mini++; + break; // this doesn't need more processing + } + }else{ + len=184; + offset=4; + n=23; + residue=0; + } + g_pkt[grouped]=pkt; + g_len[grouped]=len; + g_offset[grouped]=offset; + g_n[grouped]=n; + g_residue[grouped]=residue; + DBG(fprintf(stderr,"%2i: eo=%i pkt=%p len=%03i n=%2i residue=%i\n",grouped,ev_od,pkt,len,n,residue)); + grouped++; + advanced+=can_advance; + stat_decrypted[ev_od]++; + } + else{ + can_advance=0; + DBG(fprintf(stderr,"skip pkt %p and can_advance set to 0\n",pkt)); + break; // skip and go on + } + } + } while(0); + + if(can_advance){ + // move range start forward + *clst+=188; + } + // next packet, if there is one + pkt+=188; + } while(1); + DBG(fprintf(stderr,"-- result: grouped %i pkts, advanced %i pkts\n",grouped,advanced)); + + // delete empty ranges and compact list + clst2=cluster; + for(clst=cluster;*clst!=NULL;clst+=2){ + // if not empty + if(*clst<*(clst+1)){ + // it will remain + *clst2=*clst; + *(clst2+1)=*(clst+1); + clst2+=2; + } + } + *clst2=NULL; + + if(grouped==0){ + // no processing needed + return advanced; + } + + // sort them, longest payload first + // we expect many n=23 packets and a few n<23 + DBG(fprintf(stderr,"PRESORTING\n")); + for(i=0;i=0;tsmall--){ + if(g_n[tsmall]==23) break; + } +DBG(fprintf(stderr,"tsmall after for =%i\n",tsmall)); + + if(tsmall-t23<1) break; + +DBG(fprintf(stderr,"swap t23=%i,tsmall=%i\n",t23,tsmall)); + + g_swap(t23,tsmall); + + t23++; + tsmall--; +DBG(fprintf(stderr,"new t23=%i,tsmall=%i\n\n",t23,tsmall)); + } + DBG(fprintf(stderr,"packets with n=23, t23=%i grouped=%i\n",t23,grouped)); + DBG(fprintf(stderr,"MIDSORTING\n")); + for(i=0;ig_n[i]){ + g_swap(i,j); + } + } + } + DBG(fprintf(stderr,"POSTSORTING\n")); + for(i=0;i=0;i--){ + alive[i]+=alive[i+1]; + } + DBG(fprintf(stderr,"ALIVE\n")); + for(i=0;i<=23;i++){ + DBG(fprintf(stderr,"alive%2i=%i\n",i,alive[i])); + } + + // choose key + if(group_ev_od==0){ + k=&((struct csa_keys_t *)keys)->even; + } + else{ + k=&((struct csa_keys_t *)keys)->odd; + } + + //INIT +//#define INITIALIZE_UNUSED_INPUT +#ifdef INITIALIZE_UNUSED_INPUT +// unnecessary zeroing. +// without this, we operate on uninitialized memory +// when grouped>>>>ITER 0\n")); + iter=0; + stream_cypher_group_init(®s,k->iA_g,k->iB_g,stream_in); + // fill first ib + for(g=0;g0;iter++){ +DBG(fprintf(stderr,">>>>>ITER %i\n",iter)); + // alive and just dead packets: calc block + block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]); +DBG(dump_mem("BLO_ib ",block_out,8*alive[iter-1],8)); + // all packets (dead too): calc stream + stream_cypher_group_normal(®s,stream_out); +//dump_mem("stream_out",stream_out,GROUP_PARALLELISM*8,BYPG); + + // alive packets: calc ib + for(g=0;g>>>>ITER 23\n")); + iter=23; + // calc block + block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]); +DBG(dump_mem("23BLO_ib ",block_out,8*alive[iter-1],8)); + // just dead packets: write decrypted data + for(g=alive[iter];g100%, so undecryptable in real time) +- a very slow processor can decrypt one channel with no problems +- offline decoding of one hour of a 5Mbit/s channel takes less than + two minutes (30x than realtime) +- offline decoding will work at more than 20MB/s (megabytes/s), + nearly as fast as a file copy + +The docs directory contains useful stuff: + + FAQ.txt + to know something more about this software + + how_to_compile.txt + if you want to compile this code (and get optimal speed) + + how_to_use.txt + if you want to use this code + + technical_background.txt + if you want to understand how this code works or you want to + modify/improve it + + how_to_understand.txt + if you want to understand the code to make modifications + + how_to_release.txt + if you want to release modified versions of the code + + +fatih89r Index: libs/libmythtv/FFdecsa/FFdecsa.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/FFdecsa.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,62 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#ifndef FFDECSA_H +#define FFDECSA_H + +//----- public interface + +// -- how many packets can be decrypted at the same time +// This is an info about internal decryption parallelism. +// You should try to call decrypt_packets with more packets than the number +// returned here for performance reasons (use get_suggested_cluster_size to know +// how many). +int get_internal_parallelism(void); + +// -- how many packets you should have in a cluster when calling decrypt_packets +// This is a suggestion to achieve optimal performance; typically a little +// higher than what get_internal_parallelism returns. +// Passing less packets could slow down the decryption. +// Passing more packets is never bad (if you don't spend a lot of time building +// the list). +int get_suggested_cluster_size(void); + +// -- alloc & free the key structure +void *get_key_struct(void); +void free_key_struct(void *keys); + +// -- set control words, 8 bytes each +void set_control_words(void *keys, const unsigned char *even, const unsigned char *odd); + +// -- set even control word, 8 bytes +void set_even_control_word(void *keys, const unsigned char *even); + +// -- set odd control word, 8 bytes +void set_odd_control_word(void *keys, const unsigned char *odd); + +// -- get control words, 8 bytes each +//void get_control_words(void *keys, unsigned char *even, unsigned char *odd); + +// -- decrypt many TS packets +// This interface is a bit complicated because it is designed for maximum speed. +// Please read doc/how_to_use.txt. +int decrypt_packets(void *keys, unsigned char **cluster); + +#endif Index: libs/libmythtv/FFdecsa/docs/how_to_understand.txt =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/docs/how_to_understand.txt 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,15 @@ +------- +FFdecsa +------- + +First, you need to know how decsa works, study the source of a classical +implementation. Then you have to understand how things are done in +slicing mode. Read all the documentation and have a working classical +implementation to compare partial results. There are comments spread +around the code. Some things are difficult to understand without paper +notes; for example the matrix transpositions and meaning of array +indices. + +Sorry, it is hard to understand and modify ... + +... but it was harder to design and implement!!! Index: libs/libmythtv/FFdecsa/parallel_064_mmx.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/parallel_064_mmx.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,106 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include + +#if 0 // some older gcc version need this +#define _m_pand _mm_and_si64 +#define _m_por _mm_or_si64 +#define _m_pxor _mm_xor_si64 +#define _m_psllqi _mm_slli_si64 +#define _m_psrlqi _mm_srli_si64 +#define _m_empty _mm_empty +#endif + +typedef __m64 group; +#define GROUP_PARALLELISM 64 +#define FF0() ((__m64)0x0ULL) +#define FF1() ((__m64)0xffffffffffffffffULL) +#define FFAND(a,b) _m_pand((a),(b)) +#define FFOR(a,b) _m_por((a),(b)) +#define FFXOR(a,b) _m_pxor((a),(b)) +#define FFNOT(a) _m_pxor((a),FF1()) + +/* 64 rows of 64 bits */ + +void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){ +#if 1 + *(((int *)tab)+2*g)=*((int *)data); + *(((int *)tab)+2*g+1)=*(((int *)data)+1); +#else + *(((long long int *)tab)+g)=*((long long int *)data); +#endif +} + +void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){ +#if 1 + *((int *)data)=*(((int *)tab)+2*g); + *(((int *)data)+1)=*(((int *)tab)+2*g+1); +#else + *((long long int *)data)=*(((long long int *)tab)+g); +#endif +} + +void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){ + int j; + for(j=0;j>n; + res.s2=a.s2>>n; + res.s3=a.s3>>n; + res.s4=a.s4>>n; + res.s5=a.s5>>n; + res.s6=a.s6>>n; + res.s7=a.s7>>n; + res.s8=a.s8>>n; + return res; +} + + +void static inline M_EMPTY(void){ +} Index: libs/libmythtv/FFdecsa/docs/how_to_release.txt =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/docs/how_to_release.txt 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,21 @@ +------- +FFdecsa +------- + +Please use the name of the release you're basing on as a base name and +add your suffix. + +For example if john modifies + FFdecsa-1.0.0 +he should release + FFdecsa-1.0.0-john_0.3 +or + FFdecsa-1.0.0-john_0.4 + +If paul modifies john's version the correct name would be like + FFdecsa-1.0.0-john_0.4-paul_0.1 + +This is to avoid many different versions with random version numbers, as +development is not centralized. + +Thank you. Index: libs/libmythtv/FFdecsa/parallel_064_long.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/parallel_064_long.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,56 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "parallel_std_def.h" + +typedef unsigned long long group; +#define GROUP_PARALLELISM 64 +#define FF0() 0x0ULL +#define FF1() 0xffffffffffffffffULL + +/* 64 rows of 64 bits */ + +void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){ + *(((int *)tab)+2*g)=*((int *)data); + *(((int *)tab)+2*g+1)=*(((int *)data)+1); +} + +void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){ + *((int *)data)=*(((int *)tab)+2*g); + *(((int *)data)+1)=*(((int *)tab)+2*g+1); +} + +void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){ + int j; + for(j=0;j>n; + res.s2=a.s2>>n; + return res; +} + + +void static inline M_EMPTY(void){ +} Index: libs/libmythtv/FFdecsa/parallel_std_def.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/parallel_std_def.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,29 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define FFXOR(a,b) ((a)^(b)) +#define FFAND(a,b) ((a)&(b)) +#define FFOR(a,b) ((a)|(b)) +#define FFNOT(a) (~(a)) + +#define B_FFAND(a,b) ((a)&(b)) +#define B_FFOR(a,b) ((a)|(b)) +#define B_FFXOR(a,b) ((a)^(b)) +#define B_FFSH8L(a,n) ((a)<<(n)) +#define B_FFSH8R(a,n) ((a)>>(n)) Index: libs/libmythtv/FFdecsa/docs/technical_background.txt =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/docs/technical_background.txt 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,341 @@ +------- +FFdecsa +------- + +This doc is for people who looked into the source code and found it +difficult to believe that this is a decsa algorithm, as it appears +completely different from other decsa implementations. + +It appears different because it is different. Being different is what +enables it to be a lot faster than all the others (currently it has more +than 800% the speed of the best version I was able to find) + +The csa algo was designed to be run in hardware, but people are now +running it in software. + +Hardware has data lines carrying bits and functional blocks doing +calculations (logic operations, adders, shifters, table lookup, ...), +software instead uses memory to contain data values and executes a +sequence of instructions to transform the values. As a consequence, +writing a software implementation of a hardware algorithm can be +inefficient. + +For example, if you have 32 data lines, you can permutate the bits with +zero cost in hardware (you just permute the physical traces), but if you +have the bits in a 32 bit variable you have to use 32 "and" operations +with 32 different masks, 32 shifts and 31 "or" operations (if you +suggest using "if"s testing the bits one by one you know nothing about +how jump prediction works in modern processors). + +So the approach is *emulating the hardware*. + +Then there are some additional cool tricks. + +TRICK NUMBER 0: emulate the hardware +------------------------------------ +We will work on bits one by one, that is a 4 bit word is now four +variables. In this way we revert complex software operations into +hardware emulation: + + software hardware + ------------------------------------------- + copy values copy values + logic op logic op + (bit permut.) ands+shifts+ors copy values + additions logic op emulating adders + (comparisons) if logic op selecting one of the two results + lookup tables logic op synthetizing a ROM (*) + +(*) sometimes lookup tables can be converted to logic expressions + +The sbox in the stream cypher have been converted to efficient logic +operations using a custom written software (look into logic directory) +and is responsible for a lot of speed increase. Maybe there exists a +slightly better way to express the sbox as logical expressions, but it +would be a minuscule improvement. The sbox in the block cypher can't be +converted to efficient logic operations (8 bits of inputs are just too +much) and is implemeted with a traditional lookup in an array. + +But there is a problem; if we want to process bits, but our external +input and output wants bytes. We need conversion routines. Conversion +routines are similar to the awful permutations we described before, so +this has to be done efficiently someway. + + +TRICK NUMBER 1: virtual shift registers +--------------------------------------- +Shift registers are normally implemented by moving all data around. +Better leave the data in the same memory locations and redefine where +the start of the register is (updating a pointer). That is called +virtual shift register. + + +TRICK NUMBER 2: parallel bitslice +--------------------------------- +Implementing the algorithm as described in tricks 1 and 2 give us about +15% of the speed of a traditional implementation. This happens because +we work on only one bit, even if our CPU is 32 bit wide. But *we can +process 32 different packets at the same time*. This is called +"bitslice" method. It can be done only if the program flow is not +dependent of the data (if, while,...). Luckily this is true. +Things like + if(a){ + b=c&d; + } + else{ + b=e&f; + } +can be coded as (think of how hardware would implement this) + b1=c&d; + b2=e&f; + b=b2^(a&(b1^b2)); +and things like + if(a){ + b=c&d + } +can be transformed in the same way, as they may be written as + if(a){ + b=c&d + } + else{ + b=b; + } +It could look wasteful, but it is not; and destroys data dependency. + +Our codes takes the same time as before, but produces 32 results, so +speed is now 480% the speed of a traditional implementation. + + +TRICK NUMBER 3: multimedia instructions +--------------------------------------- +If our CPU is 32 bit but it can also process larger blocks of data +efficiently (multimedia instructions), we can use them. We only need +logic ops and these are typically available. + +We can use MMX and work on 64 packets, or SSE and work on 128 packets. +The speed doesn't automatically double going from 32 to 64 because the +integer registers of the processor are normally faster. However, some +speed is gained in this way. + +Multimedia instructions are often used by writing assembler by hand, but +compilers are very good in doing register allocation, loop unrolling and +instruction scheduling, so it is better to write the code in C and use +native multimedia data types (intrinsics). + +Depending on number of available registers, execution latency, number of +execution units in the CPU, it may be good to process more than one data +block at the same time, for example 2 64bit MMX values. In this case we +work on 128 bits by simulating a 128 bit op with two consecutive 64 bit +op. This may or may not help (apparently not because x86 architecture +has a small number of registers). + +We can also try working on 96 bit, pairing a MMX and an int op, or 192 +bit by using MMX and SSE. While this is doable in theory and could +exploit different execution units in the CPU, speed doesn't improve +(because of cache line handling problems inside the CPU, maybe). + +Besides int, MMX, SSE, we can use long long int (64 bit) and, why not, +unsigned char. + +Using groups of unsigned chars (8 or 16) could give the compiler an +opportunity to insert multimedia instructions automatically. For +example, icc can use one MMX istruction to do + unsigned char a[8],b[8],c[8]; + for(i=0;i<8;i++){ + a[i]=b[i]&c[i]; + } +Some compilers (like icc) are efficient in this case, but using +intrinsics manually is generally faster. + +All these experiments can be easily done if the code is written in a way +which abstracts the data type used. This is not easy but doable, all the +operations on data become (inlined) function calls or preprocessor +macros. Good compilers are able to simplify all the abstraction at +compile time and generate perfect code (gcc is great). + +The data abstraction used in the code is called "group". + + +TRICK NUMBER 4: parallel byteslice +---------------------------------- +The bitslice method works wonderfully on the stream cypher, but can't be +applied to the block cypher because of the evil big look up table. + +As we have to convert input data from normal to bitslice before starting +processing and from bitslice to normal before output, we convert the +stream cypher output to normal before the block calculations and do the +block stage in a traditional way. + +There are some xors in the block cypher; so we arrange bytes from +different packets side by side and use multimedia instructions to work +on many bytes at the same time. This is not exactly bitslice, maybe it +is called byteslice. The conversion routines are similar (just a bit +simpler). + +The data type we use to do this in the code is called "batch". + +The virtual shift register described in trick number 2 is useful too. + +The look up table is the only thing which is done serially one byte at a +time. Luckily if we do it on 32 or 64 bytes the loop is heavily +unrolled, and the compiler and the CPU manage to get a good speed +because there is little dependency between instructions. + + +TRICK NUMBER 5: efficient bit permutation +----------------------------------------- +The block cypher has a bit permutation part. As we are not in a bit +sliced form at that point, permuting bits in a byte takes 8 masks, 8 +and, 7 or; but three bits move in the same direction, so we make it with +6 masks, 6 and, 5 or. Batch processing through multimedia instructions +is applicable too. + + +TRICK NUMBER 6: efficient normal<->slice conversion +--------------------------------------------------- +The bitslice<->normal conversion routines are a sort of transposition +operation, that is you have bits in rows and want them in columns. This +can be done efficiently. For example, transposition of 8 bytes (matrix +of 8x8=64 bits) can be done this way (we want to exchange bit[i][j] with +bit[j][i] and we assume bit 0 is the MSB in the byte): + + // untested code, may be bugged + unsigned char a[8]; + unsigned char b[8]; + for(i=0;i<8;i++) b[i]=0; + for(i=0;i<8;i++){ + for(j=0;j<8;j++){ + b[i]|=((a[j]>>(7-i)&1))<<(7-j); + } + } + +but it is slow (128 shifts, 64 and, 64 or), or + + // untested code, may be bugged + unsigned char a[8]; + unsigned char b[8]; + for(i=0;i<8;i++) b[i]=0; + for(i=0;i<8;i++){ + for(j=0;j<8;j++){ + if(a[j]&(1<<(7-i))) b[i]|=1<<(7-j); + } + } + +but is very very slow (128 shifts, 64 and, 64 or, 128 unpredictable +if!), or using a>>=1 and b<<=1, which gains you nothing, or + + // untested code, may be bugged + unsigned char a[8]; + unsigned char b[8]; + unsigned char top,bottom; + for(j=0;j<1;j++){ + for(i=0;i<4;i++){ + top= a[8*j+i]; + bottom=a[8*j+4+i]; + a[8*j+i]= (top&0xf0) |((bottom&0xf0)>>4); + a[8*j+4+i]=((top&0x0f)<<4)| (bottom&0x0f); + } + } + for(j=0;j<2;j++){ + for(i=0;i<2;i++){ + top= a[4*j+i]; + bottom=a[4*j+2+i]; + a[4*j+i] = (top&0xcc) |((bottom&0xcc)>>2); + a[4*j+2+i]=((top&0x33)<<2)| (bottom&0x33); + } + } + for(j=0;j<4;j++){ + for(i=0;i<1;i++){ + top= a[2*j+i]; + bottom=a[2*j+1+i]; + a[2*j+i] = (top&0xaa) |((bottom&0xaa)>>1); + a[2*j+1+i]=((top&0x55)<<1)| (bottom&0x55); + } + } + for(i=0;i<8;i++) b[i]=a[i]; //easy to integrate into one of the stages above + +which is very fast (24 shifts, 48 and, 24 or) and has redundant loops +and address calculations which will be optimized away by the compiler. +It can be written as 3 nested loops but it becomes less readable and +makes it difficult to have results in b without an extra copy. The +compiler always unrolls heavily. + +The gain is much bigger when operating with 32 bit or 64 bit values (we +are going from N^2 to Nlog(N)). This method is used for rectangular +matrixes too (they have to be seen as square matrixes side by side). +Warning: this code is not *endian independent* if you use ints to work +on 4 bytes. Running it on a big endian processor will give you a +different and strange kind of bit rotation if you don't modify masks and +shifts. + +This is done in the code using int or long long int. It should be +possible to use MMX instead of long long int and it could be faster, but +this code doesn't cost a great fraction of the total time. There are +problems with the shifts, as multimedia instructions do not have all +possible kind of shift we need (SSE has none!). + + +TRICK NUMBER 7: try hard to process packets together +---------------------------------------------------- +As we are able to process many packets together, we have to avoid +running with many slots empty. Processing one packet or 64 packets takes +the same time if the internal parallelism is 64! So we try hard to +aggregate packets that can be processed together; for simplicity reasons +we don't mix packets with even and odd parity (different keys), even if +it should be doable with a little effort. Sometimes the transition from +even to odd parity and viceversa is not sharp, but there are sequences +like EEEEEOEEOEEOOOO. We try to group all the E together even if there +are O between them. This out-of-order processing complicates the +interface to the applications a bit but saves us three or four runs with +many empty slots. + +We have also logic to process together packets with a different size of +the payload, which is not always 184 bytes. This involves sorting the +packets by size before processing and careful operation of the 23 +iteration loop to exclude some packets from the calculations. It is not +CPU heavy. + +Packets with payload <8 bytes are identical before and after decryption +(!), so we skip them without using a slot. (according to DVB specs these +kind of packets shouldn't happen, but they are used in the real world). + + +TRICK NUMBER 8: try to avoid doing the same thing many times +------------------------------------------------------------ +Some calculations related to keys are only done when the keys are set, +then all the values depending on keys are stored in a convenient form +and used everytime we convert a group of packets. + + +TRICK NUMBER 9: compiler +------------------------ + +Compilers have a lot of optimization options. I used -march to target my +CPU and played with unsual options. In particular + "--param max-unrolled-insns=500" +does a good job on the tricky table lookup in the block cypher. Bigger +values unroll too much somewhere and loose speed. All the testing has +been done on an AthlonXP CPU with a specific version of gcc + gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) +Other combinations of CPU and compiler can give different speeds. If the +compiler is not able to simplify the group and batch structures and +stores everything in memory instead of registers, performance will be +low. + +Absolutely use a good compiler! + +Note: the same code can be compiled in C or C++ mode. g++ gives a 3% +speed increase compared to gcc (I suppose some stricter constraint on +array and pointers in C++ mode gives the optimizer more freedom). + + +TRICK NUMBER a: a lot of brain work +----------------------------------- +The code started as very slow but correct implementation and was then +tweaked for months with a lot of experimentation and by adding all the +good ideas one after another to achieve little steps toward the best +speed possible, while continously testing that nothing had been broken. + +Many hours were spent on this code. + +Enjoy the result. Index: libs/libmythtv/dvbchannel.cpp =================================================================== --- libs/libmythtv/dvbchannel.cpp.orig 2006-06-11 12:41:25.000000000 -0400 +++ libs/libmythtv/dvbchannel.cpp 2006-06-20 17:36:06.000000000 -0400 @@ -658,6 +658,9 @@ retune_tuning = channel.tuning; + GENERAL(QString("invalidating DeCSA for card %1").arg(cardnum)); + DVBRecorder::UpdateDeCSAKeys(cardnum, 'I', 0, NULL, 0); + if (fd_frontend < 0) { ERROR("DVBChannel::Tune: Card not open!"); Index: libs/libmythtv/FFdecsa/parallel_128_16charA.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/parallel_128_16charA.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,172 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +struct group_t{ + unsigned char s1[16]; +}; +typedef struct group_t group; + +#define GROUP_PARALLELISM 128 + +group static inline FF0(){ + group res; + int i; + for(i=0;i<16;i++) res.s1[i]=0x0; + return res; +} + +group static inline FF1(){ + group res; + int i; + for(i=0;i<16;i++) res.s1[i]=0xff; + return res; +} + +group static inline FFAND(group a,group b){ + group res; + int i; + for(i=0;i<16;i++) res.s1[i]=a.s1[i]&b.s1[i]; + return res; +} + +group static inline FFOR(group a,group b){ + group res; + int i; + for(i=0;i<16;i++) res.s1[i]=a.s1[i]|b.s1[i]; + return res; +} + +group static inline FFXOR(group a,group b){ + group res; + int i; + for(i=0;i<16;i++) res.s1[i]=a.s1[i]^b.s1[i]; + return res; +} + +group static inline FFNOT(group a){ + group res; + int i; + for(i=0;i<16;i++) res.s1[i]=~a.s1[i]; + return res; +} + + +/* 64 rows of 128 bits */ + +void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){ + *(((int *)tab)+2*g)=*((int *)data); + *(((int *)tab)+2*g+1)=*(((int *)data)+1); +} + +void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){ + *((int *)data)=*(((int *)tab)+2*g); + *(((int *)data)+1)=*(((int *)tab)+2*g+1); +} + +void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){ + int j; + for(j=0;j>n; + return res; +} + +void static inline M_EMPTY(void){ +} Index: libs/libmythtv/FFdecsa/parallel_generic.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/parallel_generic.h 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,102 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + + +#if 0 +//// generics +#define COPY4BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \ + *pd = *ps; }while(0) +#define COPY8BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ + *pd = *ps; }while(0) +#define COPY16BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ + *pd = *ps; \ + *(pd+1) = *(ps+1); }while(0) +#define COPY32BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ + *pd = *ps; \ + *(pd+1) = *(ps+1) \ + *(pd+2) = *(ps+2) \ + *(pd+3) = *(ps+3); }while(0) +#define XOR4BY(d,s1,s2) do{ int *pd=(int *)(d), *ps1=(int *)(s1), *ps2=(int *)(s2); \ + *pd = *ps1 ^ *ps2; }while(0) +#define XOR8BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \ + *pd = *ps1 ^ *ps2; }while(0) +#define XOR16BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \ + *pd = *ps1 ^ *ps2; \ + *(pd+8) = *(ps1+8) ^ *(ps2+8); }while(0) +#define XOR32BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \ + *pd = *ps1 ^ *ps2; \ + *(pd+1) = *(ps1+1) ^ *(ps2+1); \ + *(pd+2) = *(ps1+2) ^ *(ps2+2); \ + *(pd+3) = *(ps1+3) ^ *(ps2+3); }while(0) +#define XOR32BV(d,s1,s2) do{ int *const pd=(int *const)(d), *ps1=(const int *const)(s1), *ps2=(const int *const)(s2); \ + int z; \ + for(z=0;z<8;z++){ \ + pd[z]=ps1[z]^ps2[z]; \ + } \ + }while(0) +#define XOREQ4BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \ + *pd ^= *ps; }while(0) +#define XOREQ8BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ + *pd ^= *ps; }while(0) +#define XOREQ16BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ + *pd ^= *ps; \ + *(pd+1) ^=*(ps+1); }while(0) +#define XOREQ32BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ + *pd ^= *ps; \ + *(pd+1) ^=*(ps+1); \ + *(pd+2) ^=*(ps+2); \ + *(pd+3) ^=*(ps+3); }while(0) +#define XOREQ32BY4(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \ + *pd ^= *ps; \ + *(pd+1) ^=*(ps+1); \ + *(pd+2) ^=*(ps+2); \ + *(pd+3) ^=*(ps+3); \ + *(pd+4) ^=*(ps+4); \ + *(pd+5) ^=*(ps+5); \ + *(pd+6) ^=*(ps+6); \ + *(pd+7) ^=*(ps+7); }while(0) +#define XOREQ32BV(d,s) do{ unsigned char *pd=(unsigned char *)(d), *ps=(unsigned char *)(s); \ + int z; \ + for(z=0;z<32;z++){ \ + pd[z]^=ps[z]; \ + } \ + }while(0) + +#else +#define XOR_4_BY(d,s1,s2) do{ int *pd=(int *)(d), *ps1=(int *)(s1), *ps2=(int *)(s2); \ + *pd = *ps1 ^ *ps2; }while(0) +#define XOR_8_BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \ + *pd = *ps1 ^ *ps2; }while(0) +#define XOREQ_4_BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \ + *pd ^= *ps; }while(0) +#define XOREQ_8_BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ + *pd ^= *ps; }while(0) +#define COPY_4_BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \ + *pd = *ps; }while(0) +#define COPY_8_BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ + *pd = *ps; }while(0) + +#define BEST_SPAN 8 +#define XOR_BEST_BY(d,s1,s2) do{ XOR_8_BY(d,s1,s2); }while(0); +#define XOREQ_BEST_BY(d,s) do{ XOREQ_8_BY(d,s); }while(0); +#define COPY_BEST_BY(d,s) do{ COPY_8_BY(d,s); }while(0); + +#define END_MM do{ }while(0); +#endif Index: libs/libmythtv/FFdecsa/docs/how_to_use.txt =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/docs/how_to_use.txt 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,239 @@ +------- +FFdecsa +------- + +This code is able to decrypt MPEG TS packets with the CSA algorithm. To +achieve high speed, the decryption core works on many packets at the +same time, so the interface is more complicated than usual decsa +implementations. + +The FFdecsa.h file defines the external interface of this code. + +Basically: + +1) you use get_suggested_cluster_size to know the optimal number of +packets you have to pass for decryption + +2) you use set_control_words to set the decryption keys + +3) you use decrypt_packets to do the actual decryption + +You don't need to always use set_control_words before decrypt_packets, +if keys aren't changed. + + +The decrypt_packets function call decrypts many packets at the same +time. The interface is complicated because the only design goal was +speed, so it implements zero-copying of packets, out-of-order decryption +and optimal packet aggregation for better parallelism. This part is the +most difficult to understand. + +--- HOW TO USE int decrypt_packets(unsigned char **cluster); --- + +PARAMETERS + cluster points to an array of pointers, representing zero or more + ranges. Every range has a start and end pointer; a start pointer==NULL + terminates the array. + So, an array of pointers has this content: + start_of_buffer_1, end_of_buffer_1, ... start_of_buffer_N, + end_of_buffer_N, NULL + example: + 0x12340000, 0x123400bc, 0x56780a00, 0x5678b78, NULL + has two ranges (0x12340000 - 0x123400bc and 0x56780a00 - 0x5678b78), + for a total of three packets (starting at 0x12340000, 0x56780a00, + 0x5678abc) +RETURNS + How many packets can now be consumed by the caller, this is always >= + 1, unless the cluster contained zero packets (in that case it's + obviously zero). +MODIFIES + The cluster is modified to try to exclude packets which shouldn't be + submitted again for decryption (because just decrypted or originally + not crypted). "Try to exclude" because the returned array will never + be bigger than what was passed, so if you passed only a range and some + packets in the middle were decrypted making "holes" into the range, + the range would have to be split into several ranges, and that will + not be done. If you want a strict description of what has to be passed + again to decrypt_packets, you have to use ranges with only one packet + inside. Note that the first packet will certainly be eliminated from + the returned cluster (see also RETURNS). + +You can now read the detailed description of operation or just skip to +the API examples. + + +--------------------------------- +DETAILED DESCRIPTION OF OPERATION +--------------------------------- + consider a sequence of packets like this: + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ... + E E E E E E E E E E E O E O E O O 0 0 0 0 0 0 0 0 c O O O O O O O O O O O ... + where + E = encrypted_even, + O = encrypted_odd, + e = clear_was_encrypted_even, + o = clear_was_encrypted_odd, + c = clear + and suppose the suggested cluster size is 10 (this could be for a function with internal parallelism 8) + + 1) we define the cluster to include packets 0-9 and + call decrypt_packets + a possible result is that the function call + - returns 8 (8 packets available) + - the buffer contains now this + ----------------------------- + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ... + e e e e e e e e E E E O E O E O O 0 0 0 0 0 0 0 0 c O O O O O O O O O O O ... + ----- + - the modified cluster covers 8-9 [continue reading, but then see note 1 below] + so, we can use the first 8 packets of the original cluster (0-7) + + 2) now, we define cluster over 8-17 and call decrypt_packets + a possible result is: + - returns 3 (3 packets available) + - the buffer contains now this (!!!) + ----------------------------- + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ... + e e e e e e e e e e e O e O e O O 0 0 0 0 0 0 0 0 c O O O O O O O O O O O ... + -- -- -------- + - the modified cluster covers 11-11,13-13,15-17 [continue reading, but then see note 1 below] + so, we can use the first 3 packets of the original cluster (8-10) + + 3) now, we define cluster over 11-20 and call decrypt packets (defining a cluster 11-11,13-13,15-22 would be better) + a possible result is: + - returns 10 (10 packets available) + - the buffer contains now this + ----------------------------- + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ... + e e e e e e e e e e e o e o e o o o o o o 0 0 0 0 c O O O O O O O O O O O ... + + - the modified cluster is empty + so, we can use the first 10 packets of the original cluster (11-20) + What it happened is that the second call decrypted packets 12 and 14 but they were + not made available because packet 11 was still encrypted, + the third call decrypted 11,13,15-20 and included 12 and 14 as available too. + + 4) now, we define cluster over 21-30 and call decrypt packets + a possible result is: + - returns 9 (9 packets available) + - the buffer contains now this + ----------------------------- + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ... + e e e e e e e e e e e o e o e o o o o o o o o o o c o o o o O O O O O O O ... + -- + - the modified cluster covers 30-30 + so, we can use the first 9 packets of the original cluster (21-29) + What happened is that packet 25 could be skipped because it is in clear. + + Note that the suggested cluster size (10) is higher than the maximum number + of packets that can be really decrypted (8), but we are able to skip 12 and 14 + in step 3) and run the decryption on a full 8 packets group. + In the same way, we were able to skip 25 in step 4). + There are three kinds of "free" packets we can skip: + - packets decrypted in a previous call (as 12 and 14) + - packets already in clear (as 25) + - packets with a payload of less than 8 bytes (clear==encrypted!) + + Note also that we could have defined a better cluster in step 3 + (11-11,13-13,15-22), using what step 2 had returned. The risk of not + having 8 packets to decrypt would have been smaller (consider the case + where 19 and 20 were "c"). + + Final considerations: + - you can use a bigger or smaller cluster than the suggested number of packets + - every call to decrypt_packets has a *fixed* CPU cost, so you should try to + not run it with a few packets, when possible + - decrypt_packets can't decrypt even and odd at the same time; it guarantees + that the first packet will be decrypted and tries to decrypt as many packets + as possible + - clear packets in the middle of encrypted packets don't happen in real world, + but E,E,E,O,E,O,O,O sequences do happen (audio/video muxing problems?) and + small packets (<8 bytes) happen frequently; the ability to skip is useful. + + note 1: + As the returned cluster will not have more ranges than the passed one, what it is + described above is not actually true. + In the step 1) the returned cluster will cover 8-9, but in step 2) it will + cover 11-17 (some extra packets had to remain in); this lack of information + prevents us from using an optimal 11-11,13-13,15-22 in step 3). Note that + in any case step 3) will decrypt 11,13,15,16,17,18,19,20 thanks to the + extra margin we use (we put ten packets (including 19 and 20) even if the + parallelism was just 8, and it was a good idea; but if 19 and 20 were of + type c, we would have run the decryption with only 6/8 efficiency). + This problem can be prevented by using ranges with only one packet: in + step 2) we would have passed + 8-8,9-9,10-10,11-11,12-12,13-13,14-14,15-15,16-16,17-17 + and got back + 11-11,13-13,15-17. + + +------------ +API EXAMPLES +------------ + +Some examples of how the API can be used (this is not real code, so it +may have typos or other bugs). + + +Example 1: (big linear buffer, simple use of cluster) + + unsigned char *p; + unsigned char *cluster[3]; + for(p=start;p=4?32-1:0))+j); + } +} + +struct batch_t{ + unsigned char s1,s2,s3,s4; +}; +typedef struct batch_t batch; + +#define BYTES_PER_BATCH 4 + +batch static inline B_FFAND(batch a,batch b){ + batch res; + res.s1=a.s1&b.s1; + res.s2=a.s2&b.s2; + res.s3=a.s3&b.s3; + res.s4=a.s4&b.s4; + return res; +} + +batch static inline B_FFOR(batch a,batch b){ + batch res; + res.s1=a.s1|b.s1; + res.s2=a.s2|b.s2; + res.s3=a.s3|b.s3; + res.s4=a.s4|b.s4; + return res; +} + +batch static inline B_FFXOR(batch a,batch b){ + batch res; + res.s1=a.s1^b.s1; + res.s2=a.s2^b.s2; + res.s3=a.s3^b.s3; + res.s4=a.s4^b.s4; + return res; +} + + +batch static inline B_FFN_ALL_29(){ + batch res; + res.s1=0x29; + res.s2=0x29; + res.s3=0x29; + res.s4=0x29; + return res; +} +batch static inline B_FFN_ALL_02(){ + batch res; + res.s1=0x02; + res.s2=0x02; + res.s3=0x02; + res.s4=0x02; + return res; +} +batch static inline B_FFN_ALL_04(){ + batch res; + res.s1=0x04; + res.s2=0x04; + res.s3=0x04; + res.s4=0x04; + return res; +} +batch static inline B_FFN_ALL_10(){ + batch res; + res.s1=0x10; + res.s2=0x10; + res.s3=0x10; + res.s4=0x10; + return res; +} +batch static inline B_FFN_ALL_40(){ + batch res; + res.s1=0x40; + res.s2=0x40; + res.s3=0x40; + res.s4=0x40; + return res; +} +batch static inline B_FFN_ALL_80(){ + batch res; + res.s1=0x80; + res.s2=0x80; + res.s3=0x80; + res.s4=0x80; + return res; +} + +batch static inline B_FFSH8L(batch a,int n){ + batch res; + res.s1=a.s1<>n; + res.s2=a.s2>>n; + res.s3=a.s3>>n; + res.s4=a.s4>>n; + return res; +} + + +void static inline M_EMPTY(void){ +} Index: libs/libmythtv/FFdecsa/stream.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ libs/libmythtv/FFdecsa/stream.c 2006-06-20 17:36:06.000000000 -0400 @@ -0,0 +1,906 @@ +/* FFdecsa -- fast decsa algorithm + * + * Copyright (C) 2003-2004 fatih89r + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + + +// define statics only once, when STREAM_INIT +#ifdef STREAM_INIT +struct stream_regs { + group A[32+10][4]; // 32 because we will move back (virtual shift register) + group B[32+10][4]; // 32 because we will move back (virtual shift register) + group X[4]; + group Y[4]; + group Z[4]; + group D[4]; + group E[4]; + group F[4]; + group p; + group q; + group r; + }; + +static inline void trasp64_32_88ccw(unsigned char *data){ +/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/ +#define row ((unsigned int *)data) + int i,j; + for(j=0;j<64;j+=32){ + unsigned int t,b; + for(i=0;i<16;i++){ + t=row[j+i]; + b=row[j+16+i]; + row[j+i] = (t&0x0000ffff) | ((b )<<16); + row[j+16+i]=((t )>>16) | (b&0xffff0000) ; + } + } + for(j=0;j<64;j+=16){ + unsigned int t,b; + for(i=0;i<8;i++){ + t=row[j+i]; + b=row[j+8+i]; + row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8); + row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00); + } + } + for(j=0;j<64;j+=8){ + unsigned int t,b; + for(i=0;i<4;i++){ + t=row[j+i]; + b=row[j+4+i]; + row[j+i] =((t&0x0f0f0f0f)<<4) | (b&0x0f0f0f0f); + row[j+4+i] = (t&0xf0f0f0f0) | ((b&0xf0f0f0f0)>>4); + } + } + for(j=0;j<64;j+=4){ + unsigned int t,b; + for(i=0;i<2;i++){ + t=row[j+i]; + b=row[j+2+i]; + row[j+i] =((t&0x33333333)<<2) | (b&0x33333333); + row[j+2+i] = (t&0xcccccccc) | ((b&0xcccccccc)>>2); + } + } + for(j=0;j<64;j+=2){ + unsigned int t,b; + for(i=0;i<1;i++){ + t=row[j+i]; + b=row[j+1+i]; + row[j+i] =((t&0x55555555)<<1) | (b&0x55555555); + row[j+1+i] = (t&0xaaaaaaaa) | ((b&0xaaaaaaaa)>>1); + } + } +#undef row +} + +static inline void trasp64_32_88cw(unsigned char *data){ +/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate clockwise)*/ +#define row ((unsigned int *)data) + int i,j; + for(j=0;j<64;j+=32){ + unsigned int t,b; + for(i=0;i<16;i++){ + t=row[j+i]; + b=row[j+16+i]; + row[j+i] = (t&0x0000ffff) | ((b )<<16); + row[j+16+i]=((t )>>16) | (b&0xffff0000) ; + } + } + for(j=0;j<64;j+=16){ + unsigned int t,b; + for(i=0;i<8;i++){ + t=row[j+i]; + b=row[j+8+i]; + row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8); + row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00); + } + } + for(j=0;j<64;j+=8){ + unsigned int t,b; + for(i=0;i<4;i++){ + t=row[j+i]; + b=row[j+4+i]; + row[j+i] =((t&0xf0f0f0f0)>>4) | (b&0xf0f0f0f0); + row[j+4+i]= (t&0x0f0f0f0f) | ((b&0x0f0f0f0f)<<4); + } + } + for(j=0;j<64;j+=4){ + unsigned int t,b; + for(i=0;i<2;i++){ + t=row[j+i]; + b=row[j+2+i]; + row[j+i] =((t&0xcccccccc)>>2) | (b&0xcccccccc); + row[j+2+i]= (t&0x33333333) | ((b&0x33333333)<<2); + } + } + for(j=0;j<64;j+=2){ + unsigned int t,b; + for(i=0;i<1;i++){ + t=row[j+i]; + b=row[j+1+i]; + row[j+i] =((t&0xaaaaaaaa)>>1) | (b&0xaaaaaaaa); + row[j+1+i]= (t&0x55555555) | ((b&0x55555555)<<1); + } + } +#undef row +} + +//64-64---------------------------------------------------------- +static inline void trasp64_64_88ccw(unsigned char *data){ +/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/ +#define row ((unsigned long long int *)data) + int i,j; + for(j=0;j<64;j+=64){ + unsigned long long int t,b; + for(i=0;i<32;i++){ + t=row[j+i]; + b=row[j+32+i]; + row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32); + row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ; + } + } + for(j=0;j<64;j+=32){ + unsigned long long int t,b; + for(i=0;i<16;i++){ + t=row[j+i]; + b=row[j+16+i]; + row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); + row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; + } + } + for(j=0;j<64;j+=16){ + unsigned long long int t,b; + for(i=0;i<8;i++){ + t=row[j+i]; + b=row[j+8+i]; + row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); + row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); + } + } + for(j=0;j<64;j+=8){ + unsigned long long int t,b; + for(i=0;i<4;i++){ + t=row[j+i]; + b=row[j+4+i]; + row[j+i] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL); + row[j+4+i] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4); + } + } + for(j=0;j<64;j+=4){ + unsigned long long int t,b; + for(i=0;i<2;i++){ + t=row[j+i]; + b=row[j+2+i]; + row[j+i] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL); + row[j+2+i] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2); + } + } + for(j=0;j<64;j+=2){ + unsigned long long int t,b; + for(i=0;i<1;i++){ + t=row[j+i]; + b=row[j+1+i]; + row[j+i] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL); + row[j+1+i] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1); + } + } +#undef row +} + +static inline void trasp64_64_88cw(unsigned char *data){ +/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate clockwise)*/ +#define row ((unsigned long long int *)data) + int i,j; + for(j=0;j<64;j+=64){ + unsigned long long int t,b; + for(i=0;i<32;i++){ + t=row[j+i]; + b=row[j+32+i]; + row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32); + row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ; + } + } + for(j=0;j<64;j+=32){ + unsigned long long int t,b; + for(i=0;i<16;i++){ + t=row[j+i]; + b=row[j+16+i]; + row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); + row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; + } + } + for(j=0;j<64;j+=16){ + unsigned long long int t,b; + for(i=0;i<8;i++){ + t=row[j+i]; + b=row[j+8+i]; + row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); + row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); + } + } + for(j=0;j<64;j+=8){ + unsigned long long int t,b; + for(i=0;i<4;i++){ + t=row[j+i]; + b=row[j+4+i]; + row[j+i] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL); + row[j+4+i] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4); + } + } + for(j=0;j<64;j+=4){ + unsigned long long int t,b; + for(i=0;i<2;i++){ + t=row[j+i]; + b=row[j+2+i]; + row[j+i] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL); + row[j+2+i] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2); + } + } + for(j=0;j<64;j+=2){ + unsigned long long int t,b; + for(i=0;i<1;i++){ + t=row[j+i]; + b=row[j+1+i]; + row[j+i] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL); + row[j+1+i] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1); + } + } +#undef row +} + +//64-128---------------------------------------------------------- +static inline void trasp64_128_88ccw(unsigned char *data){ +/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/ +#define halfrow ((unsigned long long int *)data) + int i,j; + for(j=0;j<64;j+=64){ + unsigned long long int t,b; + for(i=0;i<32;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+32+i)]; + halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32); + halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ; + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+32+i)+1]; + halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32); + halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ; + } + } + for(j=0;j<64;j+=32){ + unsigned long long int t,b; + for(i=0;i<16;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+16+i)]; + halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); + halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+16+i)+1]; + halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); + halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; + } + } + for(j=0;j<64;j+=16){ + unsigned long long int t,b; + for(i=0;i<8;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+8+i)]; + halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); + halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+8+i)+1]; + halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); + halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); + } + } + for(j=0;j<64;j+=8){ + unsigned long long int t,b; + for(i=0;i<4;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+4+i)]; + halfrow[2*(j+i)] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL); + halfrow[2*(j+4+i)] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+4+i)+1]; + halfrow[2*(j+i)+1] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL); + halfrow[2*(j+4+i)+1] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4); + } + } + for(j=0;j<64;j+=4){ + unsigned long long int t,b; + for(i=0;i<2;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+2+i)]; + halfrow[2*(j+i)] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL); + halfrow[2*(j+2+i)] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+2+i)+1]; + halfrow[2*(j+i)+1] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL); + halfrow[2*(j+2+i)+1] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2); + } + } + for(j=0;j<64;j+=2){ + unsigned long long int t,b; + for(i=0;i<1;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+1+i)]; + halfrow[2*(j+i)] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL); + halfrow[2*(j+1+i)] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+1+i)+1]; + halfrow[2*(j+i)+1] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL); + halfrow[2*(j+1+i)+1] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1); + } + } +#undef halfrow +} + +static inline void trasp64_128_88cw(unsigned char *data){ +/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate clockwise)*/ +#define halfrow ((unsigned long long int *)data) + int i,j; + for(j=0;j<64;j+=64){ + unsigned long long int t,b; + for(i=0;i<32;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+32+i)]; + halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32); + halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ; + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+32+i)+1]; + halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32); + halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ; + } + } + for(j=0;j<64;j+=32){ + unsigned long long int t,b; + for(i=0;i<16;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+16+i)]; + halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); + halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+16+i)+1]; + halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); + halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; + } + } + for(j=0;j<64;j+=16){ + unsigned long long int t,b; + for(i=0;i<8;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+8+i)]; + halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); + halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+8+i)+1]; + halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); + halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); + } + } + for(j=0;j<64;j+=8){ + unsigned long long int t,b; + for(i=0;i<4;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+4+i)]; + halfrow[2*(j+i)] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL); + halfrow[2*(j+4+i)] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+4+i)+1]; + halfrow[2*(j+i)+1] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL); + halfrow[2*(j+4+i)+1] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4); + } + } + for(j=0;j<64;j+=4){ + unsigned long long int t,b; + for(i=0;i<2;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+2+i)]; + halfrow[2*(j+i)] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL); + halfrow[2*(j+2+i)] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+2+i)+1]; + halfrow[2*(j+i)+1] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL); + halfrow[2*(j+2+i)+1] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2); + } + } + for(j=0;j<64;j+=2){ + unsigned long long int t,b; + for(i=0;i<1;i++){ + t=halfrow[2*(j+i)]; + b=halfrow[2*(j+1+i)]; + halfrow[2*(j+i)] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL); + halfrow[2*(j+1+i)] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1); + t=halfrow[2*(j+i)+1]; + b=halfrow[2*(j+1+i)+1]; + halfrow[2*(j+i)+1] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL); + halfrow[2*(j+1+i)+1] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1); + } + } +#undef halfrow +} +#endif + + +#ifdef STREAM_INIT +void stream_cypher_group_init( + struct stream_regs *regs, + group iA[8][4], // [In] iA00,iA01,...iA73 32 groups | Derived from key. + group iB[8][4], // [In] iB00,iB01,...iB73 32 groups | Derived from key. + unsigned char *sb) // [In] (SB0,SB1,...SB7)...x32 32*8 bytes | Extra input. +#endif +#ifdef STREAM_NORMAL +void stream_cypher_group_normal( + struct stream_regs *regs, + unsigned char *cb) // [Out] (CB0,CB1,...CB7)...x32 32*8 bytes | Output. +#endif +{ +#ifdef STREAM_INIT + group in1[4]; + group in2[4]; +#endif + group extra_B[4]; + group fa,fb,fc,fd,fe; + group s1a,s1b,s2a,s2b,s3a,s3b,s4a,s4b,s5a,s5b,s6a,s6b,s7a,s7b; + group next_E[4]; + group tmp0,tmp1,tmp2,tmp3,tmp4; +#ifdef STREAM_INIT + group *sb_g=(group *)sb; +#endif +#ifdef STREAM_NORMAL + group *cb_g=(group *)cb; +#endif + int aboff; + int i,j,k,b; + int dbg; + +#ifdef STREAM_INIT + DBG(fprintf(stderr,":::::::::: BEGIN STREAM INIT\n")); +#endif +#ifdef STREAM_NORMAL + DBG(fprintf(stderr,":::::::::: BEGIN STREAM NORMAL\n")); +#endif +#ifdef STREAM_INIT +for(j=0;j<64;j++){ + DBG(fprintf(stderr,"precall prerot stream_in[%2i]=",j)); + DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG)); +} + +DBG(dump_mem("stream_prerot ",sb,GROUP_PARALLELISM*8,BYPG)); +#if GROUP_PARALLELISM==32 +trasp64_32_88ccw(sb); +#endif +#if GROUP_PARALLELISM==64 +trasp64_64_88ccw(sb); +#endif +#if GROUP_PARALLELISM==128 +trasp64_128_88ccw(sb); +#endif +DBG(dump_mem("stream_postrot",sb,GROUP_PARALLELISM*8,BYPG)); + +for(j=0;j<64;j++){ + DBG(fprintf(stderr,"precall stream_in[%2i]=",j)); + DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG)); +} +#endif + + aboff=32; + +#ifdef STREAM_INIT + // load first 32 bits of ck into A[aboff+0]..A[aboff+7] + // load last 32 bits of ck into B[aboff+0]..B[aboff+7] + // all other regs = 0 + for(i=0;i<8;i++){ + for(b=0;b<4;b++){ +DBG(fprintf(stderr,"dbg from iA A[%i][%i]=",i,b)); +DBG(dump_mem("",(unsigned char *)&iA[i][b],BYPG,BYPG)); +DBG(fprintf(stderr," dbg from iB B[%i][%i]=",i,b)); +DBG(dump_mem("",(unsigned char *)&iB[i][b],BYPG,BYPG)); + regs->A[aboff+i][b]=iA[i][b]; + regs->B[aboff+i][b]=iB[i][b]; + } + } + for(b=0;b<4;b++){ + regs->A[aboff+8][b]=FF0(); + regs->A[aboff+9][b]=FF0(); + regs->B[aboff+8][b]=FF0(); + regs->B[aboff+9][b]=FF0(); + } + for(b=0;b<4;b++){ + regs->X[b]=FF0(); + regs->Y[b]=FF0(); + regs->Z[b]=FF0(); + regs->D[b]=FF0(); + regs->E[b]=FF0(); + regs->F[b]=FF0(); + } + regs->p=FF0(); + regs->q=FF0(); + regs->r=FF0(); +#endif + +for(dbg=0;dbg<4;dbg++){ + DBG(fprintf(stderr,"dbg A0[%i]=",dbg)); + DBG(dump_mem("",(unsigned char *)®s->A[aboff+0][dbg],BYPG,BYPG)); + DBG(fprintf(stderr,"dbg B0[%i]=",dbg)); + DBG(dump_mem("",(unsigned char *)®s->B[aboff+0][dbg],BYPG,BYPG)); +} + +//////////////////////////////////////////////////////////////////////////////// + + // EXTERNAL LOOP - 8 bytes per operation + for(i=0;i<8;i++){ + + DBG(fprintf(stderr,"--BEGIN EXTERNAL LOOP %i\n",i)); + +#ifdef STREAM_INIT + for(b=0;b<4;b++){ + in1[b]=sb_g[8*i+4+b]; + in2[b]=sb_g[8*i+b]; + } +#endif + + // INTERNAL LOOP - 2 bits per iteration + for(j=0; j<4; j++){ + + DBG(fprintf(stderr,"---BEGIN INTERNAL LOOP %i (EXT %i, INT %i)\n",j,i,j)); + + // from A0..A9, 35 bits are selected as inputs to 7 s-boxes + // 5 bits input per s-box, 2 bits output per s-box + + // we can select bits with zero masking and shifting operations + // and synthetize s-boxes with optimized boolean functions. + // this is the actual reason we do all the crazy transposition + // stuff to switch between normal and bit slice representations. + // this code really flies. + + fe=regs->A[aboff+3][0];fa=regs->A[aboff+0][2];fb=regs->A[aboff+5][1];fc=regs->A[aboff+6][3];fd=regs->A[aboff+8][0]; +/* 1000 1110 1110 0001 : lev 7: */ //tmp0=( fa^( fb^( ( ( ( fa|fb )^fc )|( fc^fd ) )^ALL_ONES ) ) ); +/* 1110 0010 0011 0011 : lev 6: */ //tmp1=( ( fa|fb )^( ( fc&( fa|( fb^fd ) ) )^ALL_ONES ) ); +/* 0011 0110 1000 1101 : lev 5: */ //tmp2=( fa^( ( fb&fd )^( ( fa&fd )|fc ) ) ); +/* 0101 0101 1001 0011 : lev 5: */ //tmp3=( ( fa&fc )^( fa^( ( fa&fb )|fd ) ) ); +/* 1000 1110 1110 0001 : lev 7: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFOR(FFXOR(FFOR(fa,fb),fc),FFXOR(fc,fd)),FF1()))); +/* 1110 0010 0011 0011 : lev 6: */ tmp1=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fa,FFXOR(fb,fd))),FF1())); +/* 0011 0110 1000 1101 : lev 5: */ tmp2=FFXOR(fa,FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),fc))); +/* 0101 0101 1001 0011 : lev 5: */ tmp3=FFXOR(FFAND(fa,fc),FFXOR(fa,FFOR(FFAND(fa,fb),fd))); + s1a=FFXOR(tmp0,FFAND(fe,tmp1)); + s1b=FFXOR(tmp2,FFAND(fe,tmp3)); +//dump_mem("s1as1b-fe",&fe,BYPG,BYPG); +//dump_mem("s1as1b-fa",&fa,BYPG,BYPG); +//dump_mem("s1as1b-fb",&fb,BYPG,BYPG); +//dump_mem("s1as1b-fc",&fc,BYPG,BYPG); +//dump_mem("s1as1b-fd",&fd,BYPG,BYPG); + + fe=regs->A[aboff+1][1];fa=regs->A[aboff+2][2];fb=regs->A[aboff+5][3];fc=regs->A[aboff+6][0];fd=regs->A[aboff+8][1]; +/* 1001 1110 0110 0001 : lev 6: */ //tmp0=( fa^( ( fb&( fc|fd ) )^( fc^( fd^ALL_ONES ) ) ) ); +/* 0000 0011 0111 1011 : lev 5: */ //tmp1=( ( fa&( fb^fd ) )|( ( fa|fb )&fc ) ); +/* 1100 0110 1101 0010 : lev 6: */ //tmp2=( ( fb&fd )^( ( fa&fd )|( fb^( fc^ALL_ONES ) ) ) ); +/* 0001 1110 1111 0101 : lev 5: */ //tmp3=( ( fa&fd )|( fa^( fb^( fc&fd ) ) ) ); +/* 1001 1110 0110 0001 : lev 6: */ tmp0=FFXOR(fa,FFXOR(FFAND(fb,FFOR(fc,fd)),FFXOR(fc,FFXOR(fd,FF1())))); +/* 0000 0011 0111 1011 : lev 5: */ tmp1=FFOR(FFAND(fa,FFXOR(fb,fd)),FFAND(FFOR(fa,fb),fc)); +/* 1100 0110 1101 0010 : lev 6: */ tmp2=FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),FFXOR(fb,FFXOR(fc,FF1())))); +/* 0001 1110 1111 0101 : lev 5: */ tmp3=FFOR(FFAND(fa,fd),FFXOR(fa,FFXOR(fb,FFAND(fc,fd)))); + s2a=FFXOR(tmp0,FFAND(fe,tmp1)); + s2b=FFXOR(tmp2,FFAND(fe,tmp3)); + + fe=regs->A[aboff+0][3];fa=regs->A[aboff+1][0];fb=regs->A[aboff+4][1];fc=regs->A[aboff+4][3];fd=regs->A[aboff+5][2]; +/* 0100 1011 1001 0110 : lev 5: */ //tmp0=( fa^( fb^( ( fc&( fa|fd ) )^fd ) ) ); +/* 1101 0101 1000 1100 : lev 7: */ //tmp1=( ( fa&fc )^( ( fa^fd )|( ( fb|fc )^( fd^ALL_ONES ) ) ) ); +/* 0010 0111 1101 1000 : lev 4: */ //tmp2=( fa^( ( ( fb^fc )&fd )^fc ) ); +/* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES; +/* 0100 1011 1001 0110 : lev 5: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFAND(fc,FFOR(fa,fd)),fd))); +/* 1101 0101 1000 1100 : lev 7: */ tmp1=FFXOR(FFAND(fa,fc),FFOR(FFXOR(fa,fd),FFXOR(FFOR(fb,fc),FFXOR(fd,FF1())))); +/* 0010 0111 1101 1000 : lev 4: */ tmp2=FFXOR(fa,FFXOR(FFAND(FFXOR(fb,fc),fd),fc)); +/* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1(); + s3a=FFXOR(tmp0,FFAND(FFNOT(fe),tmp1)); + s3b=FFXOR(tmp2,FFAND(fe,tmp3)); + + fe=regs->A[aboff+2][3];fa=regs->A[aboff+0][1];fb=regs->A[aboff+1][3];fc=regs->A[aboff+3][2];fd=regs->A[aboff+7][0]; +/* 1011 0101 0100 1001 : lev 7: */ //tmp0=( fa^( ( fc&( fa^fd ) )|( fb^( fc|( fd^ALL_ONES ) ) ) ) ); +/* 0010 1101 0110 0110 : lev 6: */ //tmp1=( ( fa&fb )^( fb^( ( ( fa|fc )&fd )^fc ) ) ); +/* 0110 0111 1101 0000 : lev 7: */ //tmp2=( fa^( ( fb&fc )|( ( ( fa&( fb^fd ) )|fc )^fd ) ) ); +/* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES; +/* 1011 0101 0100 1001 : lev 7: */ tmp0=FFXOR(fa,FFOR(FFAND(fc,FFXOR(fa,fd)),FFXOR(fb,FFOR(fc,FFXOR(fd,FF1()))))); +/* 0010 1101 0110 0110 : lev 6: */ tmp1=FFXOR(FFAND(fa,fb),FFXOR(fb,FFXOR(FFAND(FFOR(fa,fc),fd),fc))); +/* 0110 0111 1101 0000 : lev 7: */ tmp2=FFXOR(fa,FFOR(FFAND(fb,fc),FFXOR(FFOR(FFAND(fa,FFXOR(fb,fd)),fc),fd))); +/* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1(); + s4a=FFXOR(tmp0,FFAND(fe,FFXOR(tmp1,tmp0))); + s4b=FFXOR(FFXOR(s4a,tmp2),FFAND(fe,tmp3)); + + fe=regs->A[aboff+4][2];fa=regs->A[aboff+3][3];fb=regs->A[aboff+5][0];fc=regs->A[aboff+7][1];fd=regs->A[aboff+8][2]; +/* 1000 1111 0011 0010 : lev 7: */ //tmp0=( ( ( fa&( fb|fc ) )^fb )|( ( ( fa^fc )|fd )^ALL_ONES ) ); +/* 0110 1011 0000 1011 : lev 6: */ //tmp1=( fb^( ( fc^fd )&( fc^( fb|( fa^fd ) ) ) ) ); +/* 0001 1010 0111 1001 : lev 6: */ //tmp2=( ( fa&fc )^( fb^( ( fb|( fa^fc ) )&fd ) ) ); +/* 0101 1101 1101 0101 : lev 4: */ //tmp3=( ( ( fa^fb )&( fc^ALL_ONES ) )|fd ); +/* 1000 1111 0011 0010 : lev 7: */ tmp0=FFOR(FFXOR(FFAND(fa,FFOR(fb,fc)),fb),FFXOR(FFOR(FFXOR(fa,fc),fd),FF1())); +/* 0110 1011 0000 1011 : lev 6: */ tmp1=FFXOR(fb,FFAND(FFXOR(fc,fd),FFXOR(fc,FFOR(fb,FFXOR(fa,fd))))); +/* 0001 1010 0111 1001 : lev 6: */ tmp2=FFXOR(FFAND(fa,fc),FFXOR(fb,FFAND(FFOR(fb,FFXOR(fa,fc)),fd))); +/* 0101 1101 1101 0101 : lev 4: */ tmp3=FFOR(FFAND(FFXOR(fa,fb),FFXOR(fc,FF1())),fd); + s5a=FFXOR(tmp0,FFAND(fe,tmp1)); + s5b=FFXOR(tmp2,FFAND(fe,tmp3)); + + fe=regs->A[aboff+2][1];fa=regs->A[aboff+3][1];fb=regs->A[aboff+4][0];fc=regs->A[aboff+6][2];fd=regs->A[aboff+8][3]; +/* 0011 0110 0010 1101 : lev 6: */ //tmp0=( ( ( fa&fc )&fd )^( ( fb&( fa|fd ) )^fc ) ); +/* 1110 1110 1011 1011 : lev 3: */ //tmp1=( ( ( fa^fc )&fd )^ALL_ONES ); +/* 0101 1000 0110 0111 : lev 6: */ //tmp2=( ( fa&( fb|fc ) )^( fb^( ( fb&fc )|fd ) ) ); +/* 0001 0011 0000 0001 : lev 5: */ //tmp3=( fc&( ( fa&( fb^fd ) )^( fb|fd ) ) ); +/* 0011 0110 0010 1101 : lev 6: */ tmp0=FFXOR(FFAND(FFAND(fa,fc),fd),FFXOR(FFAND(fb,FFOR(fa,fd)),fc)); +/* 1110 1110 1011 1011 : lev 3: */ tmp1=FFXOR(FFAND(FFXOR(fa,fc),fd),FF1()); +/* 0101 1000 0110 0111 : lev 6: */ tmp2=FFXOR(FFAND(fa,FFOR(fb,fc)),FFXOR(fb,FFOR(FFAND(fb,fc),fd))); +/* 0001 0011 0000 0001 : lev 5: */ tmp3=FFAND(fc,FFXOR(FFAND(fa,FFXOR(fb,fd)),FFOR(fb,fd))); + s6a=FFXOR(tmp0,FFAND(fe,tmp1)); + s6b=FFXOR(tmp2,FFAND(fe,tmp3)); + + fe=regs->A[aboff+1][2];fa=regs->A[aboff+2][0];fb=regs->A[aboff+6][1];fc=regs->A[aboff+7][2];fd=regs->A[aboff+7][3]; +/* 0111 1000 1001 0110 : lev 5: */ //tmp0=( fb^( ( fc&fd )|( fa^( fc^fd ) ) ) ); +/* 0100 1001 0101 1011 : lev 6: */ //tmp1=( ( fb|fd )&( ( fa&fc )|( fb^( fc^fd ) ) ) ); +/* 0100 1001 1011 1001 : lev 5: */ //tmp2=( ( fa|fb )^( ( fc&( fb|fd ) )^fd ) ); +/* 1111 1111 1101 1101 : lev 3: */ //tmp3=( fd|( ( fa&fc )^ALL_ONES ) ); +/* 0111 1000 1001 0110 : lev 5: */ tmp0=FFXOR(fb,FFOR(FFAND(fc,fd),FFXOR(fa,FFXOR(fc,fd)))); +/* 0100 1001 0101 1011 : lev 6: */ tmp1=FFAND(FFOR(fb,fd),FFOR(FFAND(fa,fc),FFXOR(fb,FFXOR(fc,fd)))); +/* 0100 1001 1011 1001 : lev 5: */ tmp2=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fb,fd)),fd)); +/* 1111 1111 1101 1101 : lev 3: */ tmp3=FFOR(fd,FFXOR(FFAND(fa,fc),FF1())); + s7a=FFXOR(tmp0,FFAND(fe,tmp1)); + s7b=FFXOR(tmp2,FFAND(fe,tmp3)); + + +/* + we have just done this: + + int sbox1[0x20] = {2,0,1,1,2,3,3,0, 3,2,2,0,1,1,0,3, 0,3,3,0,2,2,1,1, 2,2,0,3,1,1,3,0}; + int sbox2[0x20] = {3,1,0,2,2,3,3,0, 1,3,2,1,0,0,1,2, 3,1,0,3,3,2,0,2, 0,0,1,2,2,1,3,1}; + int sbox3[0x20] = {2,0,1,2,2,3,3,1, 1,1,0,3,3,0,2,0, 1,3,0,1,3,0,2,2, 2,0,1,2,0,3,3,1}; + int sbox4[0x20] = {3,1,2,3,0,2,1,2, 1,2,0,1,3,0,0,3, 1,0,3,1,2,3,0,3, 0,3,2,0,1,2,2,1}; + int sbox5[0x20] = {2,0,0,1,3,2,3,2, 0,1,3,3,1,0,2,1, 2,3,2,0,0,3,1,1, 1,0,3,2,3,1,0,2}; + int sbox6[0x20] = {0,1,2,3,1,2,2,0, 0,1,3,0,2,3,1,3, 2,3,0,2,3,0,1,1, 2,1,1,2,0,3,3,0}; + int sbox7[0x20] = {0,3,2,2,3,0,0,1, 3,0,1,3,1,2,2,1, 1,0,3,3,0,1,1,2, 2,3,1,0,2,3,0,2}; + + s12 = sbox1[ (((A3>>0)&1)<<4) | (((A0>>2)&1)<<3) | (((A5>>1)&1)<<2) | (((A6>>3)&1)<<1) | (((A8>>0)&1)<<0) ] + |sbox2[ (((A1>>1)&1)<<4) | (((A2>>2)&1)<<3) | (((A5>>3)&1)<<2) | (((A6>>0)&1)<<1) | (((A8>>1)&1)<<0) ]; + s34 = sbox3[ (((A0>>3)&1)<<4) | (((A1>>0)&1)<<3) | (((A4>>1)&1)<<2) | (((A4>>3)&1)<<1) | (((A5>>2)&1)<<0) ] + |sbox4[ (((A2>>3)&1)<<4) | (((A0>>1)&1)<<3) | (((A1>>3)&1)<<2) | (((A3>>2)&1)<<1) | (((A7>>0)&1)<<0) ]; + s56 = sbox5[ (((A4>>2)&1)<<4) | (((A3>>3)&1)<<3) | (((A5>>0)&1)<<2) | (((A7>>1)&1)<<1) | (((A8>>2)&1)<<0) ] + |sbox6[ (((A2>>1)&1)<<4) | (((A3>>1)&1)<<3) | (((A4>>0)&1)<<2) | (((A6>>2)&1)<<1) | (((A8>>3)&1)<<0) ]; + s7 = sbox7[ (((A1>>2)&1)<<4) | (((A2>>0)&1)<<3) | (((A6>>1)&1)<<2) | (((A7>>2)&1)<<1) | (((A7>>3)&1)<<0) ]; +*/ + + // use 4x4 xor to produce extra nibble for T3 + + extra_B[3]=FFXOR(FFXOR(FFXOR(regs->B[aboff+2][0],regs->B[aboff+5][1]),regs->B[aboff+6][2]),regs->B[aboff+8][3]); + extra_B[2]=FFXOR(FFXOR(FFXOR(regs->B[aboff+5][0],regs->B[aboff+7][1]),regs->B[aboff+2][3]),regs->B[aboff+3][2]); + extra_B[1]=FFXOR(FFXOR(FFXOR(regs->B[aboff+4][3],regs->B[aboff+7][2]),regs->B[aboff+3][0]),regs->B[aboff+4][1]); + extra_B[0]=FFXOR(FFXOR(FFXOR(regs->B[aboff+8][2],regs->B[aboff+5][3]),regs->B[aboff+2][1]),regs->B[aboff+7][0]); +for(dbg=0;dbg<4;dbg++){ + DBG(fprintf(stderr,"extra_B[%i]=",dbg)); + DBG(dump_mem("",(unsigned char *)&extra_B[dbg],BYPG,BYPG)); +} + + // T1 = xor all inputs + // in1, in2, D are only used in T1 during initialisation, not generation + for(b=0;b<4;b++){ + regs->A[aboff-1][b]=FFXOR(regs->A[aboff+9][b],regs->X[b]); + } + +#ifdef STREAM_INIT + for(b=0;b<4;b++){ + regs->A[aboff-1][b]=FFXOR(FFXOR(regs->A[aboff-1][b],regs->D[b]),((j % 2) ? in2[b] : in1[b])); + } +#endif + +for(dbg=0;dbg<4;dbg++){ + DBG(fprintf(stderr,"next_A0[%i]=",dbg)); + DBG(dump_mem("",(unsigned char *)®s->A[aboff-1][dbg],BYPG,BYPG)); +} + + // T2 = xor all inputs + // in1, in2 are only used in T1 during initialisation, not generation + // if p=0, use this, if p=1, rotate the result left + for(b=0;b<4;b++){ + regs->B[aboff-1][b]=FFXOR(FFXOR(regs->B[aboff+6][b],regs->B[aboff+9][b]),regs->Y[b]); + } + +#ifdef STREAM_INIT + for(b=0;b<4;b++){ + regs->B[aboff-1][b]=FFXOR(regs->B[aboff-1][b],((j % 2) ? in1[b] : in2[b])); + } +#endif + +for(dbg=0;dbg<4;dbg++){ + DBG(fprintf(stderr,"next_B0[%i]=",dbg)); + DBG(dump_mem("",(unsigned char *)®s->B[aboff-1][dbg],BYPG,BYPG)); +} + + // if p=1, rotate left (yes, this is what we're doing) + tmp3=regs->B[aboff-1][3]; + regs->B[aboff-1][3]=FFXOR(regs->B[aboff-1][3],FFAND(FFXOR(regs->B[aboff-1][3],regs->B[aboff-1][2]),regs->p)); + regs->B[aboff-1][2]=FFXOR(regs->B[aboff-1][2],FFAND(FFXOR(regs->B[aboff-1][2],regs->B[aboff-1][1]),regs->p)); + regs->B[aboff-1][1]=FFXOR(regs->B[aboff-1][1],FFAND(FFXOR(regs->B[aboff-1][1],regs->B[aboff-1][0]),regs->p)); + regs->B[aboff-1][0]=FFXOR(regs->B[aboff-1][0],FFAND(FFXOR(regs->B[aboff-1][0],tmp3),regs->p)); + +for(dbg=0;dbg<4;dbg++){ + DBG(fprintf(stderr,"next_B0[%i]=",dbg)); + DBG(dump_mem("",(unsigned char *)®s->B[aboff-1][dbg],BYPG,BYPG)); +} + + // T3 = xor all inputs + for(b=0;b<4;b++){ + regs->D[b]=FFXOR(FFXOR(regs->E[b],regs->Z[b]),extra_B[b]); + } + +for(dbg=0;dbg<4;dbg++){ + DBG(fprintf(stderr,"D[%i]=",dbg)); + DBG(dump_mem("",(unsigned char *)®s->D[dbg],BYPG,BYPG)); +} + + // T4 = sum, carry of Z + E + r + for(b=0;b<4;b++){ + next_E[b]=regs->F[b]; + } + + tmp0=FFXOR(regs->Z[0],regs->E[0]); + tmp1=FFAND(regs->Z[0],regs->E[0]); + regs->F[0]=FFXOR(regs->E[0],FFAND(regs->q,FFXOR(regs->Z[0],regs->r))); + tmp3=FFAND(tmp0,regs->r); + tmp4=FFOR(tmp1,tmp3); + + tmp0=FFXOR(regs->Z[1],regs->E[1]); + tmp1=FFAND(regs->Z[1],regs->E[1]); + regs->F[1]=FFXOR(regs->E[1],FFAND(regs->q,FFXOR(regs->Z[1],tmp4))); + tmp3=FFAND(tmp0,tmp4); + tmp4=FFOR(tmp1,tmp3); + + tmp0=FFXOR(regs->Z[2],regs->E[2]); + tmp1=FFAND(regs->Z[2],regs->E[2]); + regs->F[2]=FFXOR(regs->E[2],FFAND(regs->q,FFXOR(regs->Z[2],tmp4))); + tmp3=FFAND(tmp0,tmp4); + tmp4=FFOR(tmp1,tmp3); + + tmp0=FFXOR(regs->Z[3],regs->E[3]); + tmp1=FFAND(regs->Z[3],regs->E[3]); + regs->F[3]=FFXOR(regs->E[3],FFAND(regs->q,FFXOR(regs->Z[3],tmp4))); + tmp3=FFAND(tmp0,tmp4); + regs->r=FFXOR(regs->r,FFAND(regs->q,FFXOR(FFOR(tmp1,tmp3),regs->r))); // ultimate carry + +/* + we have just done this: (believe it or not) + + if (q) { + F = Z + E + r; + r = (F >> 4) & 1; + F = F & 0x0f; + } + else { + F = E; + } +*/ + for(b=0;b<4;b++){ + regs->E[b]=next_E[b]; + } +for(dbg=0;dbg<4;dbg++){ + DBG(fprintf(stderr,"F[%i]=",dbg)); + DBG(dump_mem("",(unsigned char *)®s->F[dbg],BYPG,BYPG)); +} +DBG(fprintf(stderr,"r=")); +DBG(dump_mem("",(unsigned char *)®s->r,BYPG,BYPG)); +for(dbg=0;dbg<4;dbg++){ + DBG(fprintf(stderr,"E[%i]=",dbg)); + DBG(dump_mem("",(unsigned char *)®s->E[dbg],BYPG,BYPG)); +} + + // this simple instruction is virtually shifting all the shift registers + aboff--; + +/* + we've just done this: + + A9=A8;A8=A7;A7=A6;A6=A5;A5=A4;A4=A3;A3=A2;A2=A1;A1=A0;A0=next_A0; + B9=B8;B8=B7;B7=B6;B6=B5;B5=B4;B4=B3;B3=B2;B2=B1;B1=B0;B0=next_B0; +*/ + + regs->X[0]=s1a; + regs->X[1]=s2a; + regs->X[2]=s3b; + regs->X[3]=s4b; + regs->Y[0]=s3a; + regs->Y[1]=s4a; + regs->Y[2]=s5b; + regs->Y[3]=s6b; + regs->Z[0]=s5a; + regs->Z[1]=s6a; + regs->Z[2]=s1b; + regs->Z[3]=s2b; + regs->p=s7a; + regs->q=s7b; +for(dbg=0;dbg<4;dbg++){ + DBG(fprintf(stderr,"X[%i]=",dbg)); + DBG(dump_mem("",(unsigned char *)®s->X[dbg],BYPG,BYPG)); +} +for(dbg=0;dbg<4;dbg++){ + DBG(fprintf(stderr,"Y[%i]=",dbg)); + DBG(dump_mem("",(unsigned char *)®s->Y[dbg],BYPG,BYPG)); +} +for(dbg=0;dbg<4;dbg++){ + DBG(fprintf(stderr,"Z[%i]=",dbg)); + DBG(dump_mem("",(unsigned char *)®s->Z[dbg],BYPG,BYPG)); +} +DBG(fprintf(stderr,"p=")); +DBG(dump_mem("",(unsigned char *)®s->p,BYPG,BYPG)); +DBG(fprintf(stderr,"q=")); +DBG(dump_mem("",(unsigned char *)®s->q,BYPG,BYPG)); + +#ifdef STREAM_NORMAL + // require 4 loops per output byte + // 2 output bits are a function of the 4 bits of D + // xor 2 by 2 + cb_g[8*i+7-2*j]=FFXOR(regs->D[2],regs->D[3]); + cb_g[8*i+6-2*j]=FFXOR(regs->D[0],regs->D[1]); +for(dbg=0;dbg<8;dbg++){ + DBG(fprintf(stderr,"op[%i]=",dbg)); + DBG(dump_mem("",(unsigned char *)&cb_g[8*i+dbg],BYPG,BYPG)); +} +#endif + +DBG(fprintf(stderr,"---END INTERNAL LOOP\n")); + + } // INTERNAL LOOP + +DBG(fprintf(stderr,"--END EXTERNAL LOOP\n")); + + } // EXTERNAL LOOP + + // move 32 steps forward, ready for next call + for(k=0;k<10;k++){ + for(b=0;b<4;b++){ +DBG(fprintf(stderr,"moving forward AB k=%i b=%i\n",k,b)); + regs->A[32+k][b]=regs->A[k][b]; + regs->B[32+k][b]=regs->B[k][b]; + } + } + + +//////////////////////////////////////////////////////////////////////////////// + +#ifdef STREAM_NORMAL +for(j=0;j<64;j++){ + DBG(fprintf(stderr,"postcall prerot cb[%2i]=",j)); + DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG)); +} + +#if GROUP_PARALLELISM==32 +trasp64_32_88cw(cb); +#endif +#if GROUP_PARALLELISM==64 +trasp64_64_88cw(cb); +#endif +#if GROUP_PARALLELISM==128 +trasp64_128_88cw(cb); +#endif + +for(j=0;j<64;j++){ + DBG(fprintf(stderr,"postcall postrot cb[%2i]=",j)); + DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG)); +} +#endif + +#ifdef STREAM_INIT + DBG(fprintf(stderr,":::::::::: END STREAM INIT\n")); +#endif +#ifdef STREAM_NORMAL + DBG(fprintf(stderr,":::::::::: END STREAM NORMAL\n")); +#endif + +} + Index: libs/libmythtv/dvbrecorder.cpp =================================================================== --- libs/libmythtv/dvbrecorder.cpp.orig 2006-06-20 17:34:53.000000000 -0400 +++ libs/libmythtv/dvbrecorder.cpp 2006-06-20 17:39:37.000000000 -0400 @@ -67,6 +67,11 @@ #include "../libavformat/avformat.h" #include "../libavformat/mpegts.h" +#include "FFdecsa/FFdecsa.h" +static pthread_mutex_t csalock=PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t allkeylock=PTHREAD_MUTEX_INITIALIZER; +static QMap decsaMap; + const int DVBRecorder::PMT_PID = 0x1700; ///< PID for rewritten PMT const int DVBRecorder::TSPACKETS_BETWEEN_PSIP_SYNC = 2000; const int DVBRecorder::POLL_INTERVAL = 50; // msec @@ -111,6 +116,9 @@ _buffer = new unsigned char[_buffer_size]; bzero(_buffer, _buffer_size); + + initDeCSA(); + } DVBRecorder::~DVBRecorder() @@ -190,6 +198,8 @@ _reset_pid_filters = true; bzero(_ps_rec_buf, sizeof(unsigned char) * 3); + _csa_softKey = DVBRecorder::UpdateDeCSAKeys( + _card_number_option, 'G', 0, NULL, 0); //'G' = 'Get' } bool DVBRecorder::Open(void) @@ -411,6 +421,8 @@ void DVBRecorder::StartRecording(void) { + uint bufferOffset = 0; + if (!Open()) { _error = true; @@ -467,12 +479,14 @@ if (Poll()) { #ifdef USE_DRB - ssize_t len = _drb->Read(_buffer, _buffer_size); + ssize_t len = _drb->Read(_buffer + bufferOffset, + _buffer_size - bufferOffset); #else // if !USE_DRB - ssize_t len = safe_read(_stream_fd, _buffer, _buffer_size); + ssize_t len = safe_read(_stream_fd, _buffer + bufferOffset, + _buffer_size - bufferOffset); #endif // !USE_DRB - if (len > 0) - ProcessDataTS(_buffer, len); + if (len + bufferOffset > 0) + bufferOffset = ProcessDataTS(_buffer, len + bufferOffset); } #ifdef USE_DRB @@ -690,14 +704,20 @@ return len; uint pos = 0; - uint end = len - TSPacket::SIZE; + int end = len - TSPacket::SIZE; + + // we may not decode the entire buffer + if ((end = PreProcessDataTS(buffer, len)) < 0) + return 0; while (pos <= end) { const TSPacket *pkt = reinterpret_cast(&buffer[pos]); ProcessTSPacket(*pkt); pos += TSPacket::SIZE; } - return len - pos; + + // At this point pos is the start of the first packet not handled + return PostProcessDataTS(buffer, pos, len - pos); } bool DVBRecorder::ProcessTSPacket(const TSPacket& tspacket) @@ -1205,3 +1225,203 @@ int cardnum = _card_number_option; GENERAL(debugmsg); } + +void DVBRecorder::initDeCSA() +{ + delete _buffer; + + _csa_cluster_size = get_suggested_cluster_size(); + _buffer_size = MPEG_TS_PKT_SIZE * _csa_cluster_size; + _buffer = new unsigned char[_buffer_size]; + bzero(_buffer, _buffer_size); + _csa_cluster = new unsigned char *[2 * _csa_cluster_size]; // start / end ptr for each packet + _csa_clusterptr = _csa_cluster; + _csa_pkt_buf_count = 0; +#if 0 + _csa_even_pkt_count = 0; + _csa_odd_pkt_count = 0; +#endif + + memset(_csa_even_ck, 0, 8); + memset(_csa_odd_ck, 0, 8); + _csa_softKey = NULL; +} + +struct decsaKey *DVBRecorder::UpdateDeCSAKeys( + int cardnum, unsigned char keytype, + int index, unsigned char *key, int pid) +{ + struct decsaKey *decsaPtr = NULL; + + VERBOSE(VB_RECORD, QString("Got Key type(%1) idx: %2, pid: %3") + .arg(keytype).arg(index).arg(pid)); + + pthread_mutex_lock(&allkeylock); + QMap::Iterator it = decsaMap.begin(); + while (it != decsaMap.end()) + { + if (it.key() == cardnum) + { + decsaPtr = it.data(); + break; + } + else + it++; + } + if (keytype == 'I') + { + if (decsaPtr) + { + pthread_mutex_lock(&decsaPtr->keylock); + decsaPtr->valid_keys = 0x00; + pthread_mutex_unlock(&decsaPtr->keylock); + } + pthread_mutex_unlock(&allkeylock); + return decsaPtr; + } + if (!decsaPtr) { + decsaPtr = new struct decsaKey; + memset(decsaPtr,0, sizeof(struct decsaKey)); + pthread_mutex_init(&decsaPtr->keylock, NULL); + decsaMap[cardnum] = decsaPtr; + memset(decsaPtr->keys, 0, sizeof(decsaPtr->keys)); + memset(decsaPtr->pidmap, 0, sizeof(decsaPtr->pidmap)); + } + pthread_mutex_lock(&decsaPtr->keylock); + pthread_mutex_unlock(&allkeylock); + if (keytype == 'G') + { + pthread_mutex_unlock(&decsaPtr->keylock); + return decsaPtr; + } + if (! decsaPtr->use_decsa) + decsaPtr->use_decsa = true; + if (! decsaPtr->keys[index]) + decsaPtr->keys[index] = get_key_struct(); + + if (keytype == 'P') // PID + decsaPtr->pidmap[pid] = index; + else if (keytype == 'F') //Force PID + memset(decsaPtr->pidmap, index, sizeof(decsaPtr->pidmap)); + else if (keytype == 'E') //Even + { + decsaPtr->valid_keys |= 0x01; + set_even_control_word(decsaPtr->keys[index], key); + } + else if (keytype == 'O') //Odd + { + decsaPtr->valid_keys |= 0x02; + set_odd_control_word(decsaPtr->keys[index], key); + } + + pthread_mutex_unlock(&decsaPtr->keylock); + return decsaPtr; +} + +int DVBRecorder::PreProcessDataTS(unsigned char *buffer, uint len) +{ + int decodedPackets = 0, unencrypted = 0; + bool new_range = true; + int curr_idx = -1; + int offset = -2; + + // packets already added to the cluster array can be skipped + uint pos = _csa_pkt_buf_count * TSPacket::SIZE; + uint end = len - TSPacket::SIZE; + + // walk the packets to setup the decrypt cluster array + while (pos <= end) + { + if (buffer[pos + 3] & 0xC0) + { + // encrypted + int index = _csa_softKey->pidmap[((buffer[pos + 1] << 8) + + (buffer[pos + 2])) & (MAX_CSA_PIDS-1)]; + if (curr_idx < 0 || index == curr_idx) + { + //same or no index + curr_idx = index; + + if (new_range) + { + // the buffer is allocated to never be + // bigger than the cluster array + new_range = false; + offset += 2; + _csa_clusterptr[offset] = buffer + pos; + } + _csa_clusterptr[ offset + 1] = buffer + pos + TSPacket::SIZE; + // advance packet count and packet position + _csa_pkt_buf_count++; + } + else + new_range = true; + } + else if (! new_range) + { + _csa_clusterptr[ offset + 1] = buffer + pos + TSPacket::SIZE; + _csa_pkt_buf_count++; + } + else if (_csa_pkt_buf_count == 0) + unencrypted++; + pos += TSPacket::SIZE; + } + _csa_clusterptr += offset + 2; + + // terminate the cluster array + _csa_clusterptr[0] = NULL; + + // set the keys (if needed) and decrypt + pthread_mutex_lock(&_csa_softKey->keylock); + if(_csa_softKey->use_decsa && _csa_pkt_buf_count) { + if (_csa_softKey->valid_keys && curr_idx > 0) { + pthread_mutex_lock(&csalock); + decodedPackets = decrypt_packets(_csa_softKey->keys[curr_idx], + _csa_cluster); + pthread_mutex_unlock(&csalock); + pthread_mutex_unlock(&_csa_softKey->keylock); + } + else + { + // Clear all queued packets since we don't have a valid key + pthread_mutex_unlock(&_csa_softKey->keylock); + _csa_pkt_buf_count = 0; + _csa_clusterptr = _csa_cluster; + return (unencrypted - 1) * TSPacket::SIZE; + } + } else { + // Not using decsa so pass through all the packets + pthread_mutex_unlock(&_csa_softKey->keylock); + _csa_cluster[0] = NULL; // ensure the cluster table doesn't overflow when not using decsa + return len - TSPacket::SIZE; + } + + _csa_pkt_buf_count -= decodedPackets; + + // Return the start of the last available packet + return (unencrypted + decodedPackets - 1) * TSPacket::SIZE; +} + +uint DVBRecorder::PostProcessDataTS(unsigned char *buffer, uint offset, uint len) +{ + if (len == 0) { + _csa_clusterptr = _csa_cluster; + return 0; + } + + // relocate the unhandled packets + memmove(buffer, buffer + offset, len); + + // fix up the cluster array to account for the relocation + unsigned char **clusterWalk = _csa_cluster; + while (clusterWalk[0]) { + clusterWalk[0] -= offset; + clusterWalk[1] -= offset; + clusterWalk += 2; + } + + // save the address for the next time we add to the cluster array + _csa_clusterptr = clusterWalk; + + return len; +}