]> git.pld-linux.org Git - packages/mythtv.git/blame - myth_softcsa_r10247.diff
- fixed new shared lib deps
[packages/mythtv.git] / myth_softcsa_r10247.diff
CommitLineData
b91fb912 1=== libs/libmythtv/dvbrecorder.cpp
2Index: libs/libmythtv/dvbrecorder.h
3===================================================================
4--- libs/libmythtv/dvbrecorder.h.orig 2006-04-10 21:26:56.000000000 -0400
5+++ libs/libmythtv/dvbrecorder.h 2006-06-20 17:36:06.000000000 -0400
6@@ -28,6 +28,15 @@
7 class ProgramMapTable;
8 class TSPacket;
9
10+#define MAX_CSA_PIDS 8192
11+struct decsaKey {
12+ bool use_decsa;
13+ unsigned char pidmap[MAX_CSA_PIDS]; //max # of pids
14+ void *keys[16];
15+ unsigned char valid_keys;
16+ pthread_mutex_t keylock;
17+};
18+
19 class PIDInfo
20 {
21 public:
22@@ -60,6 +69,10 @@
23 DVBRecorder(TVRec *rec, DVBChannel* dvbchannel);
24 ~DVBRecorder();
25
26+ static struct decsaKey *DVBRecorder::UpdateDeCSAKeys(
27+ int cardnum, unsigned char keytype,
28+ int index, unsigned char *key, int pid);
29+
30 void SetOption(const QString &name, int value);
31
32 void SetOptionsFromProfile(RecordingProfile *profile,
33@@ -159,6 +172,18 @@
34 static const int TSPACKETS_BETWEEN_PSIP_SYNC;
35 static const int POLL_INTERVAL;
36 static const int POLL_WARNING_TIMEOUT;
37+
38+ void initDeCSA();
39+ uint PostProcessDataTS(unsigned char *buffer, uint offset, uint len);
40+ int PreProcessDataTS(unsigned char *buffer, uint len);
41+ int _csa_pkt_buf_count;
42+ unsigned char **_csa_cluster;
43+ unsigned char **_csa_clusterptr;
44+ int _csa_cluster_size;
45+
46+ struct decsaKey *_csa_softKey;
47+ unsigned char _csa_even_ck[8], _csa_odd_ck[8];
48+
49 };
50
51 inline void PIDInfo::Close(void)
52Index: libs/libmythtv/libmythtv.pro
53===================================================================
54--- libs/libmythtv/libmythtv.pro.orig 2006-04-10 21:26:56.000000000 -0400
55+++ libs/libmythtv/libmythtv.pro 2006-06-20 17:36:06.000000000 -0400
56@@ -14,6 +14,10 @@
57 DEPENDPATH += ../libmythmpeg2
58 DEPENDPATH += ./dvbdev ./mpeg
59
60+# (begin)softcam-0.4
61+LIBS += FFdecsa/FFdecsa.o
62+# (end)softcam-0.4
63+
64 LIBS += -L../libmyth -L../libavutil -L../libavcodec -L../libavformat -L../libmythmpeg2
65 LIBS += -lmyth-$${LIBVERSION} -lmythavutil-$${LIBVERSION} \
66 -lmythavcodec-$${LIBVERSION} \
67Index: libs/libmythtv/FFdecsa/FFdecsa_test_testcases.h
68===================================================================
69--- /dev/null 1970-01-01 00:00:00.000000000 +0000
70+++ libs/libmythtv/FFdecsa/FFdecsa_test_testcases.h 2006-06-20 17:36:06.000000000 -0400
71@@ -0,0 +1,279 @@
72+/* FFdecsa -- fast decsa algorithm
73+ *
74+ * Copyright (C) 2003-2004 fatih89r
75+ *
76+ * This program is free software; you can redistribute it and/or modify
77+ * it under the terms of the GNU General Public License as published by
78+ * the Free Software Foundation; either version 2 of the License, or
79+ * (at your option) any later version.
80+ *
81+ * This program is distributed in the hope that it will be useful,
82+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
83+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
84+ * GNU General Public License for more details.
85+ *
86+ * You should have received a copy of the GNU General Public License
87+ * along with this program; if not, write to the Free Software
88+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
89+ */
90+
91+
92+// TEST DATA
93+
94+////////// used as a wrong key
95+unsigned char test_invalid_key[0x08] = {
96+ 0x0f, 0x1e, 0x2d, 0x3c, 0x4b, 0x5a, 0x69, 0x78
97+};
98+
99+
100+////////// test 1: odd key
101+unsigned char test_1_key[0x8] = {
102+ 0x07, 0xe0, 0x1b, 0x02, 0xc9, 0xe0, 0x45, 0xee
103+};
104+unsigned char test_1_encrypted[0x100] = {
105+ 0x47, 0x00, 0x00, 0xd0,
106+ 0xde, 0xcf, 0x0a, 0x0d, 0xb2, 0xd7, 0xc4, 0x40, 0xde, 0x5d, 0x63, 0x18, 0x5a, 0x98, 0x17, 0xaa,
107+ 0xc9, 0xbc, 0x27, 0xc6, 0xcb, 0x49, 0x40, 0x48, 0xfd, 0x20, 0xb7, 0x05, 0x5b, 0x27, 0xcb, 0xeb,
108+ 0x9a, 0xf0, 0xac, 0x45, 0x6d, 0x56, 0xf4, 0x7b, 0x6f, 0xa0, 0x57, 0xf3, 0x9b, 0xf7, 0xa2, 0xc7,
109+ 0xd4, 0x68, 0x24, 0x00, 0x2f, 0x28, 0x13, 0x96, 0x94, 0xa8, 0x7c, 0xf4, 0x6f, 0x07, 0x2a, 0x0e,
110+ 0xe8, 0xa1, 0xeb, 0xc7, 0x80, 0xac, 0x1f, 0x79, 0xbf, 0x5d, 0xb6, 0x10, 0x7c, 0x2e, 0x52, 0xe9,
111+ 0x34, 0x2c, 0xa8, 0x39, 0x01, 0x73, 0x04, 0x24, 0xa8, 0x1e, 0xdb, 0x5b, 0xcb, 0x24, 0xf6, 0x31,
112+ 0xab, 0x02, 0x6b, 0xf9, 0xf6, 0xf7, 0xe9, 0x52, 0xad, 0xcf, 0x62, 0x0f, 0x42, 0xf6, 0x66, 0x5d,
113+ 0xc0, 0x86, 0xf2, 0x7b, 0x40, 0x20, 0xa9, 0xbd, 0x1f, 0xfd, 0x16, 0xad, 0x2e, 0x75, 0xa6, 0xa0,
114+ 0x85, 0xf3, 0x9c, 0x31, 0x20, 0x4e, 0xfb, 0x95, 0x61, 0x78, 0xce, 0x10, 0xc1, 0x48, 0x5f, 0xd3,
115+ 0x61, 0x05, 0x12, 0xf4, 0xe2, 0x04, 0xae, 0xe0, 0x86, 0x01, 0x56, 0x55, 0xb1, 0x0f, 0xa6, 0x33,
116+ 0x95, 0x20, 0x92, 0xf0, 0xbe, 0x39, 0x31, 0xe1, 0x2a, 0xf7, 0x93, 0xb4, 0xf7, 0xe4, 0xf1, 0x85,
117+ 0xae, 0x50, 0xf1, 0x63, 0xd4, 0x5d, 0x9c, 0x6c
118+};
119+unsigned char test_1_expected[0x100] = {
120+ 0x47, 0x00, 0x00, 0xd0,
121+ 0xaf, 0xbe, 0xfb, 0xef, 0xbe, 0xfb, 0xef, 0xbe, 0xfb, 0xef, 0xbe, 0xfb, 0xe6, 0xb5, 0xad, 0x7c,
122+ 0xf9, 0xf3, 0xe5, 0xb1, 0x6c, 0x7c, 0xf9, 0xf3, 0xe6, 0xb5, 0xad, 0x6b, 0x5f, 0x3e, 0x7c, 0xf9,
123+ 0x6c, 0x5b, 0x1f, 0x3e, 0x7c, 0xf9, 0xad, 0x6b, 0x5a, 0xd7, 0xcf, 0x9f, 0x3e, 0x5b, 0x16, 0xc7,
124+ 0xcf, 0x9f, 0x3e, 0x6b, 0x5a, 0xd6, 0xb5, 0xf3, 0xe7, 0xcf, 0x96, 0xc5, 0xb1, 0xf3, 0xe7, 0xcf,
125+ 0x9a, 0xd6, 0xb5, 0xad, 0x7c, 0xf9, 0xf3, 0xe5, 0xb1, 0x6c, 0x7c, 0xf9, 0xf3, 0xe6, 0xb5, 0xad,
126+ 0x6b, 0x5f, 0x3e, 0x7c, 0xf9, 0x6c, 0x5b, 0x1f, 0x3e, 0x7c, 0xf9, 0xad, 0x6b, 0x5a, 0xd7, 0xcf,
127+ 0x9f, 0x3e, 0x5b, 0x16, 0xc7, 0xcf, 0x9f, 0x3e, 0x6b, 0x5a, 0xd6, 0xb5, 0xf3, 0xe7, 0xcf, 0x96,
128+ 0xc5, 0xb1, 0xf3, 0xe7, 0xcf, 0x9a, 0xd6, 0xb5, 0xad, 0x7c, 0xf9, 0xf3, 0xe5, 0xb1, 0x6c, 0x7c,
129+ 0xf9, 0xf3, 0xe6, 0xb5, 0xad, 0x6b, 0x5f, 0x3e, 0x7c, 0xf9, 0x6c, 0x5b, 0x1f, 0x3e, 0x7c, 0xf9,
130+ 0xad, 0x6b, 0x5a, 0xd7, 0xcf, 0x9f, 0x3e, 0x5b, 0x16, 0xc7, 0xcf, 0x9f, 0x3e, 0x6b, 0x5a, 0xd6,
131+ 0xb5, 0xf3, 0xe7, 0xcf, 0x96, 0xc5, 0xb1, 0xf3, 0xe7, 0xcf, 0x9a, 0xd0, 0x00, 0x00, 0x00, 0x00,
132+ 0xff, 0xfc, 0x44, 0x00, 0x66, 0xb1, 0x11, 0x11
133+};
134+unsigned char test_1_expected_stream[0x100] = {
135+ 0xdc, 0x15, 0xde, 0xf1, 0x4a, 0xf1, 0xf8, 0x2c,
136+ 0x75, 0xc8, 0x3a, 0x1f, 0xbf, 0x67, 0x19, 0xe1,
137+ 0xf4, 0x6c, 0x78, 0x99, 0x48, 0xaf, 0xef, 0x94,
138+ 0x71, 0x6b, 0x23, 0x9e, 0x29, 0x69, 0x2d, 0xa1,
139+ 0x8a, 0xbb, 0xf4, 0x16, 0x68, 0xa5, 0x7f, 0x14,
140+ 0xa9, 0x37, 0x24, 0x05, 0x5e, 0xdd, 0xec, 0x4b,
141+ 0xb5, 0xcb, 0x7f, 0x1d, 0xa7, 0x09, 0x2a, 0xce,
142+ 0xc4, 0x30, 0x83, 0xfd, 0xd9, 0x88, 0xa9, 0xf3,
143+ 0x85, 0x9c, 0x38, 0x31, 0x88, 0xac, 0x74, 0x02,
144+ 0x44, 0xdc, 0xb7, 0x81, 0x07, 0xc8, 0x1b, 0x03,
145+ 0x9c, 0x76, 0xbe, 0xe9, 0x4d, 0x3e, 0x19, 0xad,
146+ 0xe1, 0xf1, 0xa5, 0x13, 0xe8, 0xc0, 0x12, 0x57,
147+ 0x68, 0xb1, 0x9c, 0x6c, 0x9f, 0x58, 0x78, 0xee,
148+ 0x4f, 0x5b, 0x33, 0x1e, 0xc6, 0x29, 0xfc, 0x40,
149+ 0x58, 0x22, 0xa2, 0xd8, 0x32, 0xdd, 0x29, 0x4f,
150+ 0x2b, 0xe1, 0xef, 0xe4, 0xbb, 0xf2, 0x60, 0x94,
151+ 0x6c, 0xc5, 0x51, 0xec, 0x35, 0x4c, 0x27, 0xc6,
152+ 0x9d, 0x73, 0xe0, 0xf4, 0x2b, 0xfa, 0x62, 0x12,
153+ 0xcd, 0x44, 0xbe, 0x57, 0xfe, 0x80, 0xe7, 0xa9,
154+ 0x3c, 0x49, 0x42, 0xb6, 0xed, 0x05, 0x57, 0x00,
155+ 0xd2, 0x25, 0x90, 0xb3, 0xe4, 0x65, 0x8f, 0xd6,
156+ 0x4e, 0x0c, 0x73, 0x30, 0x3b, 0x68, 0x48, 0xdd,
157+// stream ^ sb
158+// 0x02, 0x48, 0xbd, 0xe9, 0x10, 0x69, 0xef, 0x86,
159+// 0xbc, 0x74, 0x1d, 0xd9, 0x74, 0x2e, 0x59, 0xa9,
160+// 0x09, 0x4c, 0xcf, 0x9c, 0x13, 0x88, 0x24, 0x7f,
161+// 0xeb, 0x9b, 0x8f, 0xdb, 0x44, 0x3f, 0xd9, 0xda,
162+};
163+unsigned char test_1_expected_block[0x100] = {
164+ 0xad, 0xf6, 0x46, 0x06, 0xae, 0x92, 0x00, 0x38,
165+ 0x47, 0x9b, 0xa3, 0x22, 0x92, 0x9b, 0xf4, 0xd5,
166+ 0xf0, 0xbf, 0x2a, 0x2d, 0x7f, 0xf4, 0xdd, 0x8c,
167+ 0x0d, 0x2e, 0x22, 0xb0, 0x1b, 0x01, 0xa5, 0x23,
168+ 0x89, 0x40, 0xbc, 0xdb, 0x8f, 0xab, 0x70, 0xb8,
169+ 0x27, 0x88, 0xcf, 0x9a, 0x4f, 0xae, 0xe9, 0x1a,
170+ 0xee, 0xfc, 0x3d, 0x82, 0x92, 0xd8, 0xb5, 0x33,
171+ 0xcb, 0x5e, 0xfe, 0xff, 0xe8, 0xd7, 0x51, 0x45,
172+ 0xa0, 0x17, 0x3b, 0x8c, 0x88, 0x7b, 0xd5, 0x0e,
173+ 0xc1, 0x9c, 0x63, 0x41, 0xf5, 0x5d, 0xaa, 0x8a,
174+ 0x5f, 0x37, 0x5b, 0xce, 0x7f, 0x76, 0xb4, 0x83,
175+ 0x74, 0x8f, 0x37, 0x47, 0x75, 0x6d, 0x2c, 0xca,
176+ 0x5a, 0x40, 0xa5, 0x75, 0x1a, 0x61, 0x81, 0x8d,
177+ 0xe4, 0x87, 0x17, 0xd0, 0x75, 0xee, 0x9a, 0x6b,
178+ 0x82, 0x6e, 0x47, 0x92, 0xd3, 0x32, 0x59, 0x5a,
179+ 0x03, 0x6e, 0x8a, 0x26, 0x7e, 0x0d, 0xf7, 0x7d,
180+ 0xf4, 0x4e, 0x79, 0x49, 0x59, 0x6f, 0x27, 0x2b,
181+ 0x80, 0x8f, 0x9e, 0x5b, 0xd6, 0xc0, 0xb0, 0x0b,
182+ 0xe6, 0x2e, 0xb2, 0xd5, 0x80, 0x10, 0x7f, 0xc1,
183+ 0xbf, 0xae, 0x1f, 0xd9, 0x6d, 0x57, 0x3c, 0x37,
184+ 0x4d, 0x21, 0xe4, 0xc8, 0x85, 0x44, 0xcf, 0xa0,
185+ 0x07, 0x93, 0x18, 0x83, 0xef, 0x35, 0xd4, 0xb1,
186+ 0xff, 0xfc, 0x44, 0x00, 0x66, 0xb1, 0x11, 0x11
187+};
188+unsigned char test_1_expected_kb[] = {
189+ 0xEE, 0x45, 0xE0, 0xC9, 0x02, 0x1B, 0xE0, 0x07,
190+ 0x46, 0xA4, 0x1C, 0x26, 0x7B, 0x0C, 0x01, 0xED,
191+ 0x93, 0x99, 0xC3, 0x14, 0xC4, 0x4A, 0x8D, 0x54,
192+ 0x19, 0x82, 0x39, 0xD1, 0x33, 0xB0, 0x33, 0x52,
193+ 0x75, 0x62, 0x80, 0x3A, 0xC8, 0x83, 0x5E, 0x23,
194+ 0xA2, 0x57, 0x0C, 0xC4, 0x2C, 0x2D, 0xD2, 0x98,
195+ 0xA0, 0x6C, 0x77, 0x29, 0x11, 0x42, 0x49, 0xCE,
196+};
197+unsigned char test_1_expected_kk[] = {
198+ 0x5e, 0x9d, 0xff, 0x2e, 0xbb, 0xaa, 0xa8, 0xe9,
199+ 0xf6, 0x0e, 0xff, 0x7c, 0xda, 0xce, 0x55, 0x03,
200+ 0xd9, 0xde, 0x79, 0xf5, 0x2c, 0xaf, 0x06, 0xf8,
201+ 0xb2, 0xc9, 0xf8, 0x78, 0x54, 0xf9, 0xd1, 0xe7,
202+ 0xeb, 0xbe, 0xd7, 0xeb, 0x25, 0xe9, 0x17, 0x99,
203+ 0xbf, 0x24, 0xce, 0x2a, 0x73, 0xfe, 0xf9, 0xbc,
204+ 0xd9, 0x55, 0x91, 0xcf, 0xe0, 0xc9, 0xdf, 0x88,
205+};
206+
207+
208+////////// test 2: even key
209+unsigned char test_2_key[0x8] = {
210+ 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
211+};
212+unsigned char test_2_encrypted[0x100] = {
213+ 0x47, 0x00, 0x00, 0x90,
214+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
215+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
216+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
217+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
218+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
219+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
220+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
221+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
222+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
223+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
224+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
225+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
226+};
227+unsigned char test_2_expected[0x100] = {
228+ 0x47, 0x00, 0x00, 0x90,
229+ 0x2d, 0x0a, 0x47, 0x20, 0x18, 0x11, 0x9c, 0x8a, 0xd1, 0x2a, 0x65, 0x6b, 0x89, 0xe4, 0x35, 0x2b,
230+ 0xc2, 0xb5, 0x90, 0x61, 0xd1, 0x7e, 0x02, 0xe1, 0x3f, 0x46, 0x70, 0xcf, 0x77, 0x91, 0x2f, 0x22,
231+ 0x93, 0xc1, 0x6c, 0xfe, 0x49, 0xad, 0x7c, 0xc2, 0xaf, 0x86, 0x1b, 0xa3, 0x29, 0xbe, 0xaa, 0x64,
232+ 0xf0, 0x22, 0xb9, 0x5e, 0x98, 0xaa, 0x60, 0xef, 0xdf, 0xd6, 0x44, 0x77, 0xe6, 0xbf, 0xbb, 0x94,
233+ 0xb2, 0x0a, 0x63, 0x0e, 0x5c, 0xf2, 0xac, 0xb4, 0x49, 0xcc, 0x9e, 0x4f, 0x94, 0x4c, 0x30, 0x12,
234+ 0xe8, 0x55, 0xc2, 0x44, 0xa4, 0x52, 0xcb, 0x61, 0x81, 0xc9, 0xb6, 0xa6, 0x6b, 0xef, 0xaf, 0xa6,
235+ 0x71, 0x1d, 0x7b, 0x58, 0x2f, 0xfa, 0xd1, 0x0c, 0x07, 0x9d, 0x1f, 0x35, 0x87, 0xbe, 0x02, 0x9f,
236+ 0x20, 0xc6, 0x60, 0x8f, 0x1c, 0x30, 0x0f, 0x96, 0xd0, 0x71, 0xd6, 0x51, 0x10, 0xdf, 0x5b, 0xf6,
237+ 0x44, 0x2f, 0x80, 0x28, 0xb7, 0xec, 0x23, 0x59, 0x4b, 0x94, 0x0b, 0x9a, 0x74, 0xa1, 0x1f, 0xf7,
238+ 0x9e, 0x76, 0xb4, 0xdf, 0xbb, 0x3c, 0x8c, 0x88, 0x97, 0x22, 0x56, 0x73, 0x16, 0x05, 0xac, 0xf9,
239+ 0x4f, 0x77, 0x9d, 0x38, 0xa0, 0x6b, 0x05, 0xd2, 0xe6, 0x15, 0x01, 0xb1, 0x5c, 0xc9, 0x62, 0xa9,
240+ 0x9b, 0x1a, 0x6a, 0x1a, 0xcf, 0xe6, 0xa8, 0xba,
241+};
242+
243+
244+////////// test 3: even key
245+unsigned char test_3_key[0x8] = {
246+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
247+};
248+unsigned char test_3_encrypted[0x100] = {
249+ 0x47, 0x00, 0x00, 0x90,
250+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
251+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
252+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
253+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
254+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
255+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
256+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
257+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
258+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
259+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
260+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
261+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
262+
263+};
264+unsigned char test_3_expected[0x100] = {
265+ 0x47, 0x00, 0x00, 0x90,
266+ 0xfe, 0x91, 0xa7, 0x2f, 0xbf, 0xb0, 0x6a, 0x54, 0xc1, 0xe4, 0x33, 0x27, 0x18, 0xd5, 0x9c, 0x43,
267+ 0xea, 0xaa, 0x6b, 0x38, 0x5c, 0xe7, 0xae, 0xc9, 0xac, 0xec, 0xef, 0xc3, 0x51, 0x7d, 0x53, 0x47,
268+ 0xa0, 0xa7, 0x6d, 0x73, 0x8a, 0x9d, 0x16, 0x7d, 0x05, 0x2d, 0xd6, 0x6b, 0xf4, 0x8d, 0x4b, 0x81,
269+ 0x98, 0x2f, 0x46, 0xa5, 0x34, 0x84, 0xf3, 0x70, 0xa4, 0xe9, 0x04, 0x84, 0x7b, 0x87, 0x79, 0x3c,
270+ 0x01, 0x25, 0xb5, 0xfc, 0x3d, 0xd0, 0x25, 0xea, 0x2f, 0x91, 0xf0, 0x3f, 0x7f, 0xd4, 0x8e, 0x1e,
271+ 0x36, 0x83, 0x22, 0x91, 0x57, 0x92, 0x36, 0x0b, 0x44, 0xa5, 0xcc, 0x5e, 0xef, 0x44, 0x3e, 0xf8,
272+ 0xe9, 0x7b, 0x5e, 0x0c, 0xea, 0xb2, 0x50, 0x39, 0xb7, 0xea, 0xc4, 0xfb, 0xe4, 0x37, 0xf8, 0x85,
273+ 0xc2, 0xdc, 0x01, 0x98, 0x01, 0x2a, 0x44, 0xd3, 0x75, 0x10, 0x38, 0xf4, 0x85, 0x3e, 0xc9, 0xf7,
274+ 0xe7, 0xe4, 0xec, 0x40, 0x3d, 0x8f, 0xa5, 0xd2, 0x8a, 0xca, 0x62, 0x03, 0x3f, 0x65, 0x28, 0x8d,
275+ 0xf5, 0x56, 0xa7, 0xea, 0xd1, 0x0d, 0x70, 0x82, 0xbc, 0x90, 0x59, 0xf8, 0x3e, 0x08, 0xc9, 0xe1,
276+ 0x97, 0xef, 0x82, 0x43, 0x35, 0x41, 0x3e, 0x7f, 0x00, 0x96, 0x3f, 0x90, 0xe5, 0x1e, 0x96, 0xba,
277+ 0xce, 0x6d, 0xd2, 0x54, 0xce, 0x84, 0x76, 0x3c
278+};
279+
280+
281+////////// odd key, only 80 (0x50) bytes of payload (10 groups of 8 bytes + 0 byte residue)
282+unsigned char test_p_10_0_key[0x8] = {
283+ 0x2d, 0x11, 0x5f, 0x9d, 0x29, 0xbf, 0x7f, 0x67
284+};
285+unsigned char test_p_10_0_encrypted[0x100] = {
286+ 0x47, 0x00, 0x7a, 0xbe,
287+ 0x67, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
288+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
289+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
290+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
291+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
292+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
293+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x71, 0xa5, 0x7b, 0x8f, 0xf9, 0x87, 0xcb, 0xac,
294+ 0xea, 0x08, 0x0c, 0x02, 0x87, 0x7b, 0xad, 0x10, 0x40, 0x28, 0x8e, 0xd4, 0x4e, 0x62, 0xc7, 0x74,
295+ 0xd6, 0xbb, 0x3a, 0xaa, 0xb0, 0x7b, 0x70, 0xbe, 0x06, 0xc9, 0xdc, 0x07, 0xd2, 0x2d, 0xab, 0x2d,
296+ 0xe2, 0xc6, 0x36, 0xa6, 0xda, 0x64, 0x61, 0x15, 0xd1, 0x6a, 0x40, 0xc0, 0xa9, 0xfb, 0x3f, 0xb2,
297+ 0x6d, 0xa5, 0x59, 0xae, 0x57, 0x88, 0x6b, 0x0e, 0x00, 0xae, 0xce, 0x64, 0xee, 0xfd, 0xb1, 0x7f,
298+ 0x78, 0x9c, 0x12, 0x42, 0xbe, 0x30, 0x8a, 0xa3
299+};
300+unsigned char test_p_10_0_expected[0x100] = {
301+ 0x47, 0x00, 0x7a, 0xbe,
302+ 0x67, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
303+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
304+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
305+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
306+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
307+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
308+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa7, 0xca, 0x32, 0xaf, 0x2e, 0x6a, 0xea, 0x05,
309+ 0x39, 0x33, 0x67, 0x5d, 0xa3, 0x61, 0x0f, 0x34, 0x40, 0x6c, 0x1a, 0xb3, 0xee, 0x54, 0x64, 0xd5,
310+ 0xa3, 0x01, 0x95, 0x87, 0x9d, 0x3d, 0x38, 0xc5, 0x82, 0x8b, 0x8d, 0xab, 0xad, 0x93, 0x0f, 0xe8,
311+ 0xf9, 0xbd, 0x52, 0x98, 0x59, 0xb2, 0x41, 0x95, 0xcd, 0xae, 0x9b, 0x3e, 0xdf, 0xdb, 0x14, 0x9b,
312+ 0xa9, 0x22, 0x0d, 0x2d, 0x61, 0xf5, 0xf2, 0x52, 0x83, 0x20, 0xae, 0xb8, 0x83, 0x52, 0x02, 0xee,
313+ 0xbd, 0xd2, 0x94, 0x6c, 0x27, 0x58, 0x55, 0xd0
314+};
315+
316+
317+////////// odd key, only 14 (0x0e) bytes of payload (1 group of 8 bytes + 6 byte residue)
318+unsigned char test_p_1_6_key[0x8] = {
319+ 0x2d, 0x11, 0x5f, 0x9d, 0x29, 0xbf, 0x7f, 0x67
320+};
321+unsigned char test_p_1_6_encrypted[0x100] = {
322+ 0x47, 0x00, 0x7a, 0xb7,
323+ 0xa9, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
324+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
325+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
326+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
327+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
328+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
329+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
330+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
331+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
332+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
333+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc0, 0x5e, 0xfb, 0xc8, 0x4a, 0x63,
334+ 0xe3, 0x3c, 0x11, 0xd9, 0xe0, 0x75, 0x8e, 0xf2
335+};
336+unsigned char test_p_1_6_expected[0x100] = {
337+ 0x47, 0x00, 0x7a, 0xb7,
338+ 0xa9, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
339+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
340+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
341+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
342+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
343+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
344+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
345+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
346+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
347+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
348+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5a, 0x2c, 0xee, 0xb3, 0xde, 0x92,
349+ 0xe7, 0xa6, 0x6c, 0xaa, 0x99, 0x84, 0xe4, 0x00
350+};
351Index: libs/libmythtv/FFdecsa/COPYING
352===================================================================
353--- /dev/null 1970-01-01 00:00:00.000000000 +0000
354+++ libs/libmythtv/FFdecsa/COPYING 2006-06-20 17:36:06.000000000 -0400
355@@ -0,0 +1,339 @@
356+ GNU GENERAL PUBLIC LICENSE
357+ Version 2, June 1991
358+
359+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
360+ 675 Mass Ave, Cambridge, MA 02139, USA
361+ Everyone is permitted to copy and distribute verbatim copies
362+ of this license document, but changing it is not allowed.
363+
364+ Preamble
365+
366+ The licenses for most software are designed to take away your
367+freedom to share and change it. By contrast, the GNU General Public
368+License is intended to guarantee your freedom to share and change free
369+software--to make sure the software is free for all its users. This
370+General Public License applies to most of the Free Software
371+Foundation's software and to any other program whose authors commit to
372+using it. (Some other Free Software Foundation software is covered by
373+the GNU Library General Public License instead.) You can apply it to
374+your programs, too.
375+
376+ When we speak of free software, we are referring to freedom, not
377+price. Our General Public Licenses are designed to make sure that you
378+have the freedom to distribute copies of free software (and charge for
379+this service if you wish), that you receive source code or can get it
380+if you want it, that you can change the software or use pieces of it
381+in new free programs; and that you know you can do these things.
382+
383+ To protect your rights, we need to make restrictions that forbid
384+anyone to deny you these rights or to ask you to surrender the rights.
385+These restrictions translate to certain responsibilities for you if you
386+distribute copies of the software, or if you modify it.
387+
388+ For example, if you distribute copies of such a program, whether
389+gratis or for a fee, you must give the recipients all the rights that
390+you have. You must make sure that they, too, receive or can get the
391+source code. And you must show them these terms so they know their
392+rights.
393+
394+ We protect your rights with two steps: (1) copyright the software, and
395+(2) offer you this license which gives you legal permission to copy,
396+distribute and/or modify the software.
397+
398+ Also, for each author's protection and ours, we want to make certain
399+that everyone understands that there is no warranty for this free
400+software. If the software is modified by someone else and passed on, we
401+want its recipients to know that what they have is not the original, so
402+that any problems introduced by others will not reflect on the original
403+authors' reputations.
404+
405+ Finally, any free program is threatened constantly by software
406+patents. We wish to avoid the danger that redistributors of a free
407+program will individually obtain patent licenses, in effect making the
408+program proprietary. To prevent this, we have made it clear that any
409+patent must be licensed for everyone's free use or not licensed at all.
410+
411+ The precise terms and conditions for copying, distribution and
412+modification follow.
413+\f
414+ GNU GENERAL PUBLIC LICENSE
415+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
416+
417+ 0. This License applies to any program or other work which contains
418+a notice placed by the copyright holder saying it may be distributed
419+under the terms of this General Public License. The "Program", below,
420+refers to any such program or work, and a "work based on the Program"
421+means either the Program or any derivative work under copyright law:
422+that is to say, a work containing the Program or a portion of it,
423+either verbatim or with modifications and/or translated into another
424+language. (Hereinafter, translation is included without limitation in
425+the term "modification".) Each licensee is addressed as "you".
426+
427+Activities other than copying, distribution and modification are not
428+covered by this License; they are outside its scope. The act of
429+running the Program is not restricted, and the output from the Program
430+is covered only if its contents constitute a work based on the
431+Program (independent of having been made by running the Program).
432+Whether that is true depends on what the Program does.
433+
434+ 1. You may copy and distribute verbatim copies of the Program's
435+source code as you receive it, in any medium, provided that you
436+conspicuously and appropriately publish on each copy an appropriate
437+copyright notice and disclaimer of warranty; keep intact all the
438+notices that refer to this License and to the absence of any warranty;
439+and give any other recipients of the Program a copy of this License
440+along with the Program.
441+
442+You may charge a fee for the physical act of transferring a copy, and
443+you may at your option offer warranty protection in exchange for a fee.
444+
445+ 2. You may modify your copy or copies of the Program or any portion
446+of it, thus forming a work based on the Program, and copy and
447+distribute such modifications or work under the terms of Section 1
448+above, provided that you also meet all of these conditions:
449+
450+ a) You must cause the modified files to carry prominent notices
451+ stating that you changed the files and the date of any change.
452+
453+ b) You must cause any work that you distribute or publish, that in
454+ whole or in part contains or is derived from the Program or any
455+ part thereof, to be licensed as a whole at no charge to all third
456+ parties under the terms of this License.
457+
458+ c) If the modified program normally reads commands interactively
459+ when run, you must cause it, when started running for such
460+ interactive use in the most ordinary way, to print or display an
461+ announcement including an appropriate copyright notice and a
462+ notice that there is no warranty (or else, saying that you provide
463+ a warranty) and that users may redistribute the program under
464+ these conditions, and telling the user how to view a copy of this
465+ License. (Exception: if the Program itself is interactive but
466+ does not normally print such an announcement, your work based on
467+ the Program is not required to print an announcement.)
468+\f
469+These requirements apply to the modified work as a whole. If
470+identifiable sections of that work are not derived from the Program,
471+and can be reasonably considered independent and separate works in
472+themselves, then this License, and its terms, do not apply to those
473+sections when you distribute them as separate works. But when you
474+distribute the same sections as part of a whole which is a work based
475+on the Program, the distribution of the whole must be on the terms of
476+this License, whose permissions for other licensees extend to the
477+entire whole, and thus to each and every part regardless of who wrote it.
478+
479+Thus, it is not the intent of this section to claim rights or contest
480+your rights to work written entirely by you; rather, the intent is to
481+exercise the right to control the distribution of derivative or
482+collective works based on the Program.
483+
484+In addition, mere aggregation of another work not based on the Program
485+with the Program (or with a work based on the Program) on a volume of
486+a storage or distribution medium does not bring the other work under
487+the scope of this License.
488+
489+ 3. You may copy and distribute the Program (or a work based on it,
490+under Section 2) in object code or executable form under the terms of
491+Sections 1 and 2 above provided that you also do one of the following:
492+
493+ a) Accompany it with the complete corresponding machine-readable
494+ source code, which must be distributed under the terms of Sections
495+ 1 and 2 above on a medium customarily used for software interchange; or,
496+
497+ b) Accompany it with a written offer, valid for at least three
498+ years, to give any third party, for a charge no more than your
499+ cost of physically performing source distribution, a complete
500+ machine-readable copy of the corresponding source code, to be
501+ distributed under the terms of Sections 1 and 2 above on a medium
502+ customarily used for software interchange; or,
503+
504+ c) Accompany it with the information you received as to the offer
505+ to distribute corresponding source code. (This alternative is
506+ allowed only for noncommercial distribution and only if you
507+ received the program in object code or executable form with such
508+ an offer, in accord with Subsection b above.)
509+
510+The source code for a work means the preferred form of the work for
511+making modifications to it. For an executable work, complete source
512+code means all the source code for all modules it contains, plus any
513+associated interface definition files, plus the scripts used to
514+control compilation and installation of the executable. However, as a
515+special exception, the source code distributed need not include
516+anything that is normally distributed (in either source or binary
517+form) with the major components (compiler, kernel, and so on) of the
518+operating system on which the executable runs, unless that component
519+itself accompanies the executable.
520+
521+If distribution of executable or object code is made by offering
522+access to copy from a designated place, then offering equivalent
523+access to copy the source code from the same place counts as
524+distribution of the source code, even though third parties are not
525+compelled to copy the source along with the object code.
526+\f
527+ 4. You may not copy, modify, sublicense, or distribute the Program
528+except as expressly provided under this License. Any attempt
529+otherwise to copy, modify, sublicense or distribute the Program is
530+void, and will automatically terminate your rights under this License.
531+However, parties who have received copies, or rights, from you under
532+this License will not have their licenses terminated so long as such
533+parties remain in full compliance.
534+
535+ 5. You are not required to accept this License, since you have not
536+signed it. However, nothing else grants you permission to modify or
537+distribute the Program or its derivative works. These actions are
538+prohibited by law if you do not accept this License. Therefore, by
539+modifying or distributing the Program (or any work based on the
540+Program), you indicate your acceptance of this License to do so, and
541+all its terms and conditions for copying, distributing or modifying
542+the Program or works based on it.
543+
544+ 6. Each time you redistribute the Program (or any work based on the
545+Program), the recipient automatically receives a license from the
546+original licensor to copy, distribute or modify the Program subject to
547+these terms and conditions. You may not impose any further
548+restrictions on the recipients' exercise of the rights granted herein.
549+You are not responsible for enforcing compliance by third parties to
550+this License.
551+
552+ 7. If, as a consequence of a court judgment or allegation of patent
553+infringement or for any other reason (not limited to patent issues),
554+conditions are imposed on you (whether by court order, agreement or
555+otherwise) that contradict the conditions of this License, they do not
556+excuse you from the conditions of this License. If you cannot
557+distribute so as to satisfy simultaneously your obligations under this
558+License and any other pertinent obligations, then as a consequence you
559+may not distribute the Program at all. For example, if a patent
560+license would not permit royalty-free redistribution of the Program by
561+all those who receive copies directly or indirectly through you, then
562+the only way you could satisfy both it and this License would be to
563+refrain entirely from distribution of the Program.
564+
565+If any portion of this section is held invalid or unenforceable under
566+any particular circumstance, the balance of the section is intended to
567+apply and the section as a whole is intended to apply in other
568+circumstances.
569+
570+It is not the purpose of this section to induce you to infringe any
571+patents or other property right claims or to contest validity of any
572+such claims; this section has the sole purpose of protecting the
573+integrity of the free software distribution system, which is
574+implemented by public license practices. Many people have made
575+generous contributions to the wide range of software distributed
576+through that system in reliance on consistent application of that
577+system; it is up to the author/donor to decide if he or she is willing
578+to distribute software through any other system and a licensee cannot
579+impose that choice.
580+
581+This section is intended to make thoroughly clear what is believed to
582+be a consequence of the rest of this License.
583+\f
584+ 8. If the distribution and/or use of the Program is restricted in
585+certain countries either by patents or by copyrighted interfaces, the
586+original copyright holder who places the Program under this License
587+may add an explicit geographical distribution limitation excluding
588+those countries, so that distribution is permitted only in or among
589+countries not thus excluded. In such case, this License incorporates
590+the limitation as if written in the body of this License.
591+
592+ 9. The Free Software Foundation may publish revised and/or new versions
593+of the General Public License from time to time. Such new versions will
594+be similar in spirit to the present version, but may differ in detail to
595+address new problems or concerns.
596+
597+Each version is given a distinguishing version number. If the Program
598+specifies a version number of this License which applies to it and "any
599+later version", you have the option of following the terms and conditions
600+either of that version or of any later version published by the Free
601+Software Foundation. If the Program does not specify a version number of
602+this License, you may choose any version ever published by the Free Software
603+Foundation.
604+
605+ 10. If you wish to incorporate parts of the Program into other free
606+programs whose distribution conditions are different, write to the author
607+to ask for permission. For software which is copyrighted by the Free
608+Software Foundation, write to the Free Software Foundation; we sometimes
609+make exceptions for this. Our decision will be guided by the two goals
610+of preserving the free status of all derivatives of our free software and
611+of promoting the sharing and reuse of software generally.
612+
613+ NO WARRANTY
614+
615+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
616+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
617+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
618+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
619+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
620+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
621+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
622+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
623+REPAIR OR CORRECTION.
624+
625+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
626+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
627+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
628+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
629+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
630+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
631+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
632+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
633+POSSIBILITY OF SUCH DAMAGES.
634+
635+ END OF TERMS AND CONDITIONS
636+\f
637+ Appendix: How to Apply These Terms to Your New Programs
638+
639+ If you develop a new program, and you want it to be of the greatest
640+possible use to the public, the best way to achieve this is to make it
641+free software which everyone can redistribute and change under these terms.
642+
643+ To do so, attach the following notices to the program. It is safest
644+to attach them to the start of each source file to most effectively
645+convey the exclusion of warranty; and each file should have at least
646+the "copyright" line and a pointer to where the full notice is found.
647+
648+ <one line to give the program's name and a brief idea of what it does.>
649+ Copyright (C) 19yy <name of author>
650+
651+ This program is free software; you can redistribute it and/or modify
652+ it under the terms of the GNU General Public License as published by
653+ the Free Software Foundation; either version 2 of the License, or
654+ (at your option) any later version.
655+
656+ This program is distributed in the hope that it will be useful,
657+ but WITHOUT ANY WARRANTY; without even the implied warranty of
658+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
659+ GNU General Public License for more details.
660+
661+ You should have received a copy of the GNU General Public License
662+ along with this program; if not, write to the Free Software
663+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
664+
665+Also add information on how to contact you by electronic and paper mail.
666+
667+If the program is interactive, make it output a short notice like this
668+when it starts in an interactive mode:
669+
670+ Gnomovision version 69, Copyright (C) 19yy name of author
671+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
672+ This is free software, and you are welcome to redistribute it
673+ under certain conditions; type `show c' for details.
674+
675+The hypothetical commands `show w' and `show c' should show the appropriate
676+parts of the General Public License. Of course, the commands you use may
677+be called something other than `show w' and `show c'; they could even be
678+mouse-clicks or menu items--whatever suits your program.
679+
680+You should also get your employer (if you work as a programmer) or your
681+school, if any, to sign a "copyright disclaimer" for the program, if
682+necessary. Here is a sample; alter the names:
683+
684+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
685+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
686+
687+ <signature of Ty Coon>, 1 April 1989
688+ Ty Coon, President of Vice
689+
690+This General Public License does not permit incorporating your program into
691+proprietary programs. If your program is a subroutine library, you may
692+consider it more useful to permit linking proprietary applications with the
693+library. If this is what you want to do, use the GNU Library General
694+Public License instead of this License.
695Index: libs/libmythtv/FFdecsa/parallel_032_int.h
696===================================================================
697--- /dev/null 1970-01-01 00:00:00.000000000 +0000
698+++ libs/libmythtv/FFdecsa/parallel_032_int.h 2006-06-20 17:36:06.000000000 -0400
699@@ -0,0 +1,55 @@
700+/* FFdecsa -- fast decsa algorithm
701+ *
702+ * Copyright (C) 2003-2004 fatih89r
703+ *
704+ * This program is free software; you can redistribute it and/or modify
705+ * it under the terms of the GNU General Public License as published by
706+ * the Free Software Foundation; either version 2 of the License, or
707+ * (at your option) any later version.
708+ *
709+ * This program is distributed in the hope that it will be useful,
710+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
711+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
712+ * GNU General Public License for more details.
713+ *
714+ * You should have received a copy of the GNU General Public License
715+ * along with this program; if not, write to the Free Software
716+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
717+ */
718+
719+#include "parallel_std_def.h"
720+
721+typedef unsigned int group;
722+#define GROUP_PARALLELISM 32
723+#define FF0() 0x0
724+#define FF1() 0xffffffff
725+
726+/* 64 rows of 32 bits */
727+
728+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
729+ *(((int *)tab)+g)=*((int *)data);
730+ *(((int *)tab)+32+g)=*(((int *)data)+1);
731+}
732+
733+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
734+ *((int *)data)=*(((int *)tab)+g);
735+ *(((int *)data)+1)=*(((int *)tab)+32+g);
736+}
737+
738+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
739+ int j;
740+ for(j=0;j<n;j++){
741+ *(data+j)^=*(tab+4*(g+(j>=4?32-1:0))+j);
742+ }
743+}
744+
745+typedef unsigned int batch;
746+#define BYTES_PER_BATCH 4
747+#define B_FFN_ALL_29() 0x29292929
748+#define B_FFN_ALL_02() 0x02020202
749+#define B_FFN_ALL_04() 0x04040404
750+#define B_FFN_ALL_10() 0x10101010
751+#define B_FFN_ALL_40() 0x40404040
752+#define B_FFN_ALL_80() 0x80808080
753+
754+#define M_EMPTY()
755Index: libs/libmythtv/FFdecsa/tmp_autogenerated_stuff_stream.c
756===================================================================
757--- /dev/null 1970-01-01 00:00:00.000000000 +0000
758+++ libs/libmythtv/FFdecsa/tmp_autogenerated_stuff_stream.c 2006-06-20 17:36:06.000000000 -0400
759@@ -0,0 +1,814 @@
760+/* FFdecsa -- fast decsa algorithm
761+ *
762+ * Copyright (C) 2003-2004 fatih89r
763+ *
764+ * This program is free software; you can redistribute it and/or modify
765+ * it under the terms of the GNU General Public License as published by
766+ * the Free Software Foundation; either version 2 of the License, or
767+ * (at your option) any later version.
768+ *
769+ * This program is distributed in the hope that it will be useful,
770+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
771+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
772+ * GNU General Public License for more details.
773+ *
774+ * You should have received a copy of the GNU General Public License
775+ * along with this program; if not, write to the Free Software
776+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
777+ */
778+
779+
780+
781+// define statics only once, when STREAM_INIT
782+#ifdef STREAM_INIT
783+static group A[32+10][4]; // 32 because we will move back (virtual shift register)
784+static group B[32+10][4]; // 32 because we will move back (virtual shift register)
785+static group X[4];
786+static group Y[4];
787+static group Z[4];
788+static group D[4];
789+static group E[4];
790+static group F[4];
791+static group p;
792+static group q;
793+static group r;
794+
795+static inline void trasp64_32_88ccw(unsigned char *data){
796+/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
797+#define row ((unsigned int *)data)
798+ int i,j;
799+ for(j=0;j<64;j+=32){
800+ unsigned int t,b;
801+ for(i=0;i<16;i++){
802+ t=row[j+i];
803+ b=row[j+16+i];
804+ row[j+i] = (t&0x0000ffff) | ((b )<<16);
805+ row[j+16+i]=((t )>>16) | (b&0xffff0000) ;
806+ }
807+ }
808+ for(j=0;j<64;j+=16){
809+ unsigned int t,b;
810+ for(i=0;i<8;i++){
811+ t=row[j+i];
812+ b=row[j+8+i];
813+ row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
814+ row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00);
815+ }
816+ }
817+ for(j=0;j<64;j+=8){
818+ unsigned int t,b;
819+ for(i=0;i<4;i++){
820+ t=row[j+i];
821+ b=row[j+4+i];
822+ row[j+i] =((t&0x0f0f0f0f)<<4) | (b&0x0f0f0f0f);
823+ row[j+4+i] = (t&0xf0f0f0f0) | ((b&0xf0f0f0f0)>>4);
824+ }
825+ }
826+ for(j=0;j<64;j+=4){
827+ unsigned int t,b;
828+ for(i=0;i<2;i++){
829+ t=row[j+i];
830+ b=row[j+2+i];
831+ row[j+i] =((t&0x33333333)<<2) | (b&0x33333333);
832+ row[j+2+i] = (t&0xcccccccc) | ((b&0xcccccccc)>>2);
833+ }
834+ }
835+ for(j=0;j<64;j+=2){
836+ unsigned int t,b;
837+ for(i=0;i<1;i++){
838+ t=row[j+i];
839+ b=row[j+1+i];
840+ row[j+i] =((t&0x55555555)<<1) | (b&0x55555555);
841+ row[j+1+i] = (t&0xaaaaaaaa) | ((b&0xaaaaaaaa)>>1);
842+ }
843+ }
844+#undef row
845+}
846+
847+static inline void trasp64_32_88cw(unsigned char *data){
848+/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
849+#define row ((unsigned int *)data)
850+ int i,j;
851+ for(j=0;j<64;j+=32){
852+ unsigned int t,b;
853+ for(i=0;i<16;i++){
854+ t=row[j+i];
855+ b=row[j+16+i];
856+ row[j+i] = (t&0x0000ffff) | ((b )<<16);
857+ row[j+16+i]=((t )>>16) | (b&0xffff0000) ;
858+ }
859+ }
860+ for(j=0;j<64;j+=16){
861+ unsigned int t,b;
862+ for(i=0;i<8;i++){
863+ t=row[j+i];
864+ b=row[j+8+i];
865+ row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
866+ row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00);
867+ }
868+ }
869+ for(j=0;j<64;j+=8){
870+ unsigned int t,b;
871+ for(i=0;i<4;i++){
872+ t=row[j+i];
873+ b=row[j+4+i];
874+ row[j+i] =((t&0xf0f0f0f0)>>4) | (b&0xf0f0f0f0);
875+ row[j+4+i]= (t&0x0f0f0f0f) | ((b&0x0f0f0f0f)<<4);
876+ }
877+ }
878+ for(j=0;j<64;j+=4){
879+ unsigned int t,b;
880+ for(i=0;i<2;i++){
881+ t=row[j+i];
882+ b=row[j+2+i];
883+ row[j+i] =((t&0xcccccccc)>>2) | (b&0xcccccccc);
884+ row[j+2+i]= (t&0x33333333) | ((b&0x33333333)<<2);
885+ }
886+ }
887+ for(j=0;j<64;j+=2){
888+ unsigned int t,b;
889+ for(i=0;i<1;i++){
890+ t=row[j+i];
891+ b=row[j+1+i];
892+ row[j+i] =((t&0xaaaaaaaa)>>1) | (b&0xaaaaaaaa);
893+ row[j+1+i]= (t&0x55555555) | ((b&0x55555555)<<1);
894+ }
895+ }
896+#undef row
897+}
898+
899+//64-64----------------------------------------------------------
900+static inline void trasp64_64_88ccw(unsigned char *data){
901+/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
902+#define row ((unsigned long long int *)data)
903+ int i,j;
904+ for(j=0;j<64;j+=64){
905+ unsigned long long int t,b;
906+ for(i=0;i<32;i++){
907+ t=row[j+i];
908+ b=row[j+32+i];
909+ row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32);
910+ row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ;
911+ }
912+ }
913+ for(j=0;j<64;j+=32){
914+ unsigned long long int t,b;
915+ for(i=0;i<16;i++){
916+ t=row[j+i];
917+ b=row[j+16+i];
918+ row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
919+ row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
920+ }
921+ }
922+ for(j=0;j<64;j+=16){
923+ unsigned long long int t,b;
924+ for(i=0;i<8;i++){
925+ t=row[j+i];
926+ b=row[j+8+i];
927+ row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
928+ row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
929+ }
930+ }
931+ for(j=0;j<64;j+=8){
932+ unsigned long long int t,b;
933+ for(i=0;i<4;i++){
934+ t=row[j+i];
935+ b=row[j+4+i];
936+ row[j+i] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
937+ row[j+4+i] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
938+ }
939+ }
940+ for(j=0;j<64;j+=4){
941+ unsigned long long int t,b;
942+ for(i=0;i<2;i++){
943+ t=row[j+i];
944+ b=row[j+2+i];
945+ row[j+i] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
946+ row[j+2+i] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
947+ }
948+ }
949+ for(j=0;j<64;j+=2){
950+ unsigned long long int t,b;
951+ for(i=0;i<1;i++){
952+ t=row[j+i];
953+ b=row[j+1+i];
954+ row[j+i] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
955+ row[j+1+i] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
956+ }
957+ }
958+#undef row
959+}
960+
961+static inline void trasp64_64_88cw(unsigned char *data){
962+/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
963+#define row ((unsigned long long int *)data)
964+ int i,j;
965+ for(j=0;j<64;j+=64){
966+ unsigned long long int t,b;
967+ for(i=0;i<32;i++){
968+ t=row[j+i];
969+ b=row[j+32+i];
970+ row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32);
971+ row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ;
972+ }
973+ }
974+ for(j=0;j<64;j+=32){
975+ unsigned long long int t,b;
976+ for(i=0;i<16;i++){
977+ t=row[j+i];
978+ b=row[j+16+i];
979+ row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
980+ row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
981+ }
982+ }
983+ for(j=0;j<64;j+=16){
984+ unsigned long long int t,b;
985+ for(i=0;i<8;i++){
986+ t=row[j+i];
987+ b=row[j+8+i];
988+ row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
989+ row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
990+ }
991+ }
992+ for(j=0;j<64;j+=8){
993+ unsigned long long int t,b;
994+ for(i=0;i<4;i++){
995+ t=row[j+i];
996+ b=row[j+4+i];
997+ row[j+i] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
998+ row[j+4+i] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
999+ }
1000+ }
1001+ for(j=0;j<64;j+=4){
1002+ unsigned long long int t,b;
1003+ for(i=0;i<2;i++){
1004+ t=row[j+i];
1005+ b=row[j+2+i];
1006+ row[j+i] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
1007+ row[j+2+i] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
1008+ }
1009+ }
1010+ for(j=0;j<64;j+=2){
1011+ unsigned long long int t,b;
1012+ for(i=0;i<1;i++){
1013+ t=row[j+i];
1014+ b=row[j+1+i];
1015+ row[j+i] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
1016+ row[j+1+i] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
1017+ }
1018+ }
1019+#undef row
1020+}
1021+
1022+//64-128----------------------------------------------------------
1023+static inline void trasp64_128_88ccw(unsigned char *data){
1024+/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
1025+#define halfrow ((unsigned long long int *)data)
1026+ int i,j;
1027+ for(j=0;j<64;j+=64){
1028+ unsigned long long int t,b;
1029+ for(i=0;i<32;i++){
1030+ t=halfrow[2*(j+i)];
1031+ b=halfrow[2*(j+32+i)];
1032+ halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32);
1033+ halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ;
1034+ t=halfrow[2*(j+i)+1];
1035+ b=halfrow[2*(j+32+i)+1];
1036+ halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32);
1037+ halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ;
1038+ }
1039+ }
1040+ for(j=0;j<64;j+=32){
1041+ unsigned long long int t,b;
1042+ for(i=0;i<16;i++){
1043+ t=halfrow[2*(j+i)];
1044+ b=halfrow[2*(j+16+i)];
1045+ halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
1046+ halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
1047+ t=halfrow[2*(j+i)+1];
1048+ b=halfrow[2*(j+16+i)+1];
1049+ halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
1050+ halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
1051+ }
1052+ }
1053+ for(j=0;j<64;j+=16){
1054+ unsigned long long int t,b;
1055+ for(i=0;i<8;i++){
1056+ t=halfrow[2*(j+i)];
1057+ b=halfrow[2*(j+8+i)];
1058+ halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
1059+ halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
1060+ t=halfrow[2*(j+i)+1];
1061+ b=halfrow[2*(j+8+i)+1];
1062+ halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
1063+ halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
1064+ }
1065+ }
1066+ for(j=0;j<64;j+=8){
1067+ unsigned long long int t,b;
1068+ for(i=0;i<4;i++){
1069+ t=halfrow[2*(j+i)];
1070+ b=halfrow[2*(j+4+i)];
1071+ halfrow[2*(j+i)] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
1072+ halfrow[2*(j+4+i)] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
1073+ t=halfrow[2*(j+i)+1];
1074+ b=halfrow[2*(j+4+i)+1];
1075+ halfrow[2*(j+i)+1] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
1076+ halfrow[2*(j+4+i)+1] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
1077+ }
1078+ }
1079+ for(j=0;j<64;j+=4){
1080+ unsigned long long int t,b;
1081+ for(i=0;i<2;i++){
1082+ t=halfrow[2*(j+i)];
1083+ b=halfrow[2*(j+2+i)];
1084+ halfrow[2*(j+i)] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
1085+ halfrow[2*(j+2+i)] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
1086+ t=halfrow[2*(j+i)+1];
1087+ b=halfrow[2*(j+2+i)+1];
1088+ halfrow[2*(j+i)+1] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
1089+ halfrow[2*(j+2+i)+1] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
1090+ }
1091+ }
1092+ for(j=0;j<64;j+=2){
1093+ unsigned long long int t,b;
1094+ for(i=0;i<1;i++){
1095+ t=halfrow[2*(j+i)];
1096+ b=halfrow[2*(j+1+i)];
1097+ halfrow[2*(j+i)] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
1098+ halfrow[2*(j+1+i)] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
1099+ t=halfrow[2*(j+i)+1];
1100+ b=halfrow[2*(j+1+i)+1];
1101+ halfrow[2*(j+i)+1] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
1102+ halfrow[2*(j+1+i)+1] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
1103+ }
1104+ }
1105+#undef halfrow
1106+}
1107+
1108+static inline void trasp64_128_88cw(unsigned char *data){
1109+/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
1110+#define halfrow ((unsigned long long int *)data)
1111+ int i,j;
1112+ for(j=0;j<64;j+=64){
1113+ unsigned long long int t,b;
1114+ for(i=0;i<32;i++){
1115+ t=halfrow[2*(j+i)];
1116+ b=halfrow[2*(j+32+i)];
1117+ halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32);
1118+ halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ;
1119+ t=halfrow[2*(j+i)+1];
1120+ b=halfrow[2*(j+32+i)+1];
1121+ halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32);
1122+ halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ;
1123+ }
1124+ }
1125+ for(j=0;j<64;j+=32){
1126+ unsigned long long int t,b;
1127+ for(i=0;i<16;i++){
1128+ t=halfrow[2*(j+i)];
1129+ b=halfrow[2*(j+16+i)];
1130+ halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
1131+ halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
1132+ t=halfrow[2*(j+i)+1];
1133+ b=halfrow[2*(j+16+i)+1];
1134+ halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
1135+ halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
1136+ }
1137+ }
1138+ for(j=0;j<64;j+=16){
1139+ unsigned long long int t,b;
1140+ for(i=0;i<8;i++){
1141+ t=halfrow[2*(j+i)];
1142+ b=halfrow[2*(j+8+i)];
1143+ halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
1144+ halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
1145+ t=halfrow[2*(j+i)+1];
1146+ b=halfrow[2*(j+8+i)+1];
1147+ halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
1148+ halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
1149+ }
1150+ }
1151+ for(j=0;j<64;j+=8){
1152+ unsigned long long int t,b;
1153+ for(i=0;i<4;i++){
1154+ t=halfrow[2*(j+i)];
1155+ b=halfrow[2*(j+4+i)];
1156+ halfrow[2*(j+i)] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
1157+ halfrow[2*(j+4+i)] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
1158+ t=halfrow[2*(j+i)+1];
1159+ b=halfrow[2*(j+4+i)+1];
1160+ halfrow[2*(j+i)+1] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
1161+ halfrow[2*(j+4+i)+1] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
1162+ }
1163+ }
1164+ for(j=0;j<64;j+=4){
1165+ unsigned long long int t,b;
1166+ for(i=0;i<2;i++){
1167+ t=halfrow[2*(j+i)];
1168+ b=halfrow[2*(j+2+i)];
1169+ halfrow[2*(j+i)] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
1170+ halfrow[2*(j+2+i)] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
1171+ t=halfrow[2*(j+i)+1];
1172+ b=halfrow[2*(j+2+i)+1];
1173+ halfrow[2*(j+i)+1] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
1174+ halfrow[2*(j+2+i)+1] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
1175+ }
1176+ }
1177+ for(j=0;j<64;j+=2){
1178+ unsigned long long int t,b;
1179+ for(i=0;i<1;i++){
1180+ t=halfrow[2*(j+i)];
1181+ b=halfrow[2*(j+1+i)];
1182+ halfrow[2*(j+i)] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
1183+ halfrow[2*(j+1+i)] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
1184+ t=halfrow[2*(j+i)+1];
1185+ b=halfrow[2*(j+1+i)+1];
1186+ halfrow[2*(j+i)+1] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
1187+ halfrow[2*(j+1+i)+1] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
1188+ }
1189+ }
1190+#undef halfrow
1191+}
1192+#endif
1193+
1194+
1195+#ifdef STREAM_INIT
1196+void stream_cypher_group_init(
1197+ group iA[8][4], // [In] iA00,iA01,...iA73 32 groups | Derived from key.
1198+ group iB[8][4], // [In] iB00,iB01,...iB73 32 groups | Derived from key.
1199+ unsigned char *sb) // [In] (SB0,SB1,...SB7)...x32 32*8 bytes | Extra input.
1200+#endif
1201+#ifdef STREAM_NORMAL
1202+void stream_cypher_group_normal(
1203+ unsigned char *cb) // [Out] (CB0,CB1,...CB7)...x32 32*8 bytes | Output.
1204+#endif
1205+{
1206+#ifdef STREAM_INIT
1207+ group in1[4];
1208+ group in2[4];
1209+#endif
1210+ group extra_B[4];
1211+ group fa,fb,fc,fd,fe;
1212+ group s1a,s1b,s2a,s2b,s3a,s3b,s4a,s4b,s5a,s5b,s6a,s6b,s7a,s7b;
1213+ group next_E[4];
1214+ group tmp0,tmp1,tmp2,tmp3,tmp4;
1215+#ifdef STREAM_INIT
1216+ group *sb_g=(group *)sb;
1217+#endif
1218+#ifdef STREAM_NORMAL
1219+ group *cb_g=(group *)cb;
1220+#endif
1221+ int aboff;
1222+ int i,j,k,b;
1223+
1224+#ifdef STREAM_INIT
1225+#endif
1226+#ifdef STREAM_NORMAL
1227+#endif
1228+#ifdef STREAM_INIT
1229+
1230+#if GROUP_PARALLELISM==32
1231+trasp64_32_88ccw(sb);
1232+#endif
1233+#if GROUP_PARALLELISM==64
1234+trasp64_64_88ccw(sb);
1235+#endif
1236+#if GROUP_PARALLELISM==128
1237+trasp64_128_88ccw(sb);
1238+#endif
1239+
1240+#endif
1241+
1242+ aboff=32;
1243+
1244+#ifdef STREAM_INIT
1245+ // load first 32 bits of ck into A[aboff+0]..A[aboff+7]
1246+ // load last 32 bits of ck into B[aboff+0]..B[aboff+7]
1247+ // all other regs = 0
1248+ for(i=0;i<8;i++){
1249+ for(b=0;b<4;b++){
1250+ A[aboff+i][b]=iA[i][b];
1251+ B[aboff+i][b]=iB[i][b];
1252+ }
1253+ }
1254+ for(b=0;b<4;b++){
1255+ A[aboff+8][b]=FF0();
1256+ A[aboff+9][b]=FF0();
1257+ B[aboff+8][b]=FF0();
1258+ B[aboff+9][b]=FF0();
1259+ }
1260+ for(b=0;b<4;b++){
1261+ X[b]=FF0();
1262+ Y[b]=FF0();
1263+ Z[b]=FF0();
1264+ D[b]=FF0();
1265+ E[b]=FF0();
1266+ F[b]=FF0();
1267+ }
1268+ p=FF0();
1269+ q=FF0();
1270+ r=FF0();
1271+#endif
1272+
1273+
1274+////////////////////////////////////////////////////////////////////////////////
1275+
1276+ // EXTERNAL LOOP - 8 bytes per operation
1277+ for(i=0;i<8;i++){
1278+
1279+
1280+#ifdef STREAM_INIT
1281+ for(b=0;b<4;b++){
1282+ in1[b]=sb_g[8*i+4+b];
1283+ in2[b]=sb_g[8*i+b];
1284+ }
1285+#endif
1286+
1287+ // INTERNAL LOOP - 2 bits per iteration
1288+ for(j=0; j<4; j++){
1289+
1290+
1291+ // from A0..A9, 35 bits are selected as inputs to 7 s-boxes
1292+ // 5 bits input per s-box, 2 bits output per s-box
1293+
1294+ // we can select bits with zero masking and shifting operations
1295+ // and synthetize s-boxes with optimized boolean functions.
1296+ // this is the actual reason we do all the crazy transposition
1297+ // stuff to switch between normal and bit slice representations.
1298+ // this code really flies.
1299+
1300+ fe=A[aboff+3][0];fa=A[aboff+0][2];fb=A[aboff+5][1];fc=A[aboff+6][3];fd=A[aboff+8][0];
1301+/* 1000 1110 1110 0001 : lev 7: */ //tmp0=( fa^( fb^( ( ( ( fa|fb )^fc )|( fc^fd ) )^ALL_ONES ) ) );
1302+/* 1110 0010 0011 0011 : lev 6: */ //tmp1=( ( fa|fb )^( ( fc&( fa|( fb^fd ) ) )^ALL_ONES ) );
1303+/* 0011 0110 1000 1101 : lev 5: */ //tmp2=( fa^( ( fb&fd )^( ( fa&fd )|fc ) ) );
1304+/* 0101 0101 1001 0011 : lev 5: */ //tmp3=( ( fa&fc )^( fa^( ( fa&fb )|fd ) ) );
1305+/* 1000 1110 1110 0001 : lev 7: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFOR(FFXOR(FFOR(fa,fb),fc),FFXOR(fc,fd)),FF1())));
1306+/* 1110 0010 0011 0011 : lev 6: */ tmp1=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fa,FFXOR(fb,fd))),FF1()));
1307+/* 0011 0110 1000 1101 : lev 5: */ tmp2=FFXOR(fa,FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),fc)));
1308+/* 0101 0101 1001 0011 : lev 5: */ tmp3=FFXOR(FFAND(fa,fc),FFXOR(fa,FFOR(FFAND(fa,fb),fd)));
1309+ s1a=FFXOR(tmp0,FFAND(fe,tmp1));
1310+ s1b=FFXOR(tmp2,FFAND(fe,tmp3));
1311+//dump_mem("s1as1b-fe",&fe,BYPG,BYPG);
1312+//dump_mem("s1as1b-fa",&fa,BYPG,BYPG);
1313+//dump_mem("s1as1b-fb",&fb,BYPG,BYPG);
1314+//dump_mem("s1as1b-fc",&fc,BYPG,BYPG);
1315+//dump_mem("s1as1b-fd",&fd,BYPG,BYPG);
1316+
1317+ fe=A[aboff+1][1];fa=A[aboff+2][2];fb=A[aboff+5][3];fc=A[aboff+6][0];fd=A[aboff+8][1];
1318+/* 1001 1110 0110 0001 : lev 6: */ //tmp0=( fa^( ( fb&( fc|fd ) )^( fc^( fd^ALL_ONES ) ) ) );
1319+/* 0000 0011 0111 1011 : lev 5: */ //tmp1=( ( fa&( fb^fd ) )|( ( fa|fb )&fc ) );
1320+/* 1100 0110 1101 0010 : lev 6: */ //tmp2=( ( fb&fd )^( ( fa&fd )|( fb^( fc^ALL_ONES ) ) ) );
1321+/* 0001 1110 1111 0101 : lev 5: */ //tmp3=( ( fa&fd )|( fa^( fb^( fc&fd ) ) ) );
1322+/* 1001 1110 0110 0001 : lev 6: */ tmp0=FFXOR(fa,FFXOR(FFAND(fb,FFOR(fc,fd)),FFXOR(fc,FFXOR(fd,FF1()))));
1323+/* 0000 0011 0111 1011 : lev 5: */ tmp1=FFOR(FFAND(fa,FFXOR(fb,fd)),FFAND(FFOR(fa,fb),fc));
1324+/* 1100 0110 1101 0010 : lev 6: */ tmp2=FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),FFXOR(fb,FFXOR(fc,FF1()))));
1325+/* 0001 1110 1111 0101 : lev 5: */ tmp3=FFOR(FFAND(fa,fd),FFXOR(fa,FFXOR(fb,FFAND(fc,fd))));
1326+ s2a=FFXOR(tmp0,FFAND(fe,tmp1));
1327+ s2b=FFXOR(tmp2,FFAND(fe,tmp3));
1328+
1329+ fe=A[aboff+0][3];fa=A[aboff+1][0];fb=A[aboff+4][1];fc=A[aboff+4][3];fd=A[aboff+5][2];
1330+/* 0100 1011 1001 0110 : lev 5: */ //tmp0=( fa^( fb^( ( fc&( fa|fd ) )^fd ) ) );
1331+/* 1101 0101 1000 1100 : lev 7: */ //tmp1=( ( fa&fc )^( ( fa^fd )|( ( fb|fc )^( fd^ALL_ONES ) ) ) );
1332+/* 0010 0111 1101 1000 : lev 4: */ //tmp2=( fa^( ( ( fb^fc )&fd )^fc ) );
1333+/* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES;
1334+/* 0100 1011 1001 0110 : lev 5: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFAND(fc,FFOR(fa,fd)),fd)));
1335+/* 1101 0101 1000 1100 : lev 7: */ tmp1=FFXOR(FFAND(fa,fc),FFOR(FFXOR(fa,fd),FFXOR(FFOR(fb,fc),FFXOR(fd,FF1()))));
1336+/* 0010 0111 1101 1000 : lev 4: */ tmp2=FFXOR(fa,FFXOR(FFAND(FFXOR(fb,fc),fd),fc));
1337+/* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1();
1338+ s3a=FFXOR(tmp0,FFAND(FFNOT(fe),tmp1));
1339+ s3b=FFXOR(tmp2,FFAND(fe,tmp3));
1340+
1341+ fe=A[aboff+2][3];fa=A[aboff+0][1];fb=A[aboff+1][3];fc=A[aboff+3][2];fd=A[aboff+7][0];
1342+/* 1011 0101 0100 1001 : lev 7: */ //tmp0=( fa^( ( fc&( fa^fd ) )|( fb^( fc|( fd^ALL_ONES ) ) ) ) );
1343+/* 0010 1101 0110 0110 : lev 6: */ //tmp1=( ( fa&fb )^( fb^( ( ( fa|fc )&fd )^fc ) ) );
1344+/* 0110 0111 1101 0000 : lev 7: */ //tmp2=( fa^( ( fb&fc )|( ( ( fa&( fb^fd ) )|fc )^fd ) ) );
1345+/* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES;
1346+/* 1011 0101 0100 1001 : lev 7: */ tmp0=FFXOR(fa,FFOR(FFAND(fc,FFXOR(fa,fd)),FFXOR(fb,FFOR(fc,FFXOR(fd,FF1())))));
1347+/* 0010 1101 0110 0110 : lev 6: */ tmp1=FFXOR(FFAND(fa,fb),FFXOR(fb,FFXOR(FFAND(FFOR(fa,fc),fd),fc)));
1348+/* 0110 0111 1101 0000 : lev 7: */ tmp2=FFXOR(fa,FFOR(FFAND(fb,fc),FFXOR(FFOR(FFAND(fa,FFXOR(fb,fd)),fc),fd)));
1349+/* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1();
1350+ s4a=FFXOR(tmp0,FFAND(fe,FFXOR(tmp1,tmp0)));
1351+ s4b=FFXOR(FFXOR(s4a,tmp2),FFAND(fe,tmp3));
1352+
1353+ fe=A[aboff+4][2];fa=A[aboff+3][3];fb=A[aboff+5][0];fc=A[aboff+7][1];fd=A[aboff+8][2];
1354+/* 1000 1111 0011 0010 : lev 7: */ //tmp0=( ( ( fa&( fb|fc ) )^fb )|( ( ( fa^fc )|fd )^ALL_ONES ) );
1355+/* 0110 1011 0000 1011 : lev 6: */ //tmp1=( fb^( ( fc^fd )&( fc^( fb|( fa^fd ) ) ) ) );
1356+/* 0001 1010 0111 1001 : lev 6: */ //tmp2=( ( fa&fc )^( fb^( ( fb|( fa^fc ) )&fd ) ) );
1357+/* 0101 1101 1101 0101 : lev 4: */ //tmp3=( ( ( fa^fb )&( fc^ALL_ONES ) )|fd );
1358+/* 1000 1111 0011 0010 : lev 7: */ tmp0=FFOR(FFXOR(FFAND(fa,FFOR(fb,fc)),fb),FFXOR(FFOR(FFXOR(fa,fc),fd),FF1()));
1359+/* 0110 1011 0000 1011 : lev 6: */ tmp1=FFXOR(fb,FFAND(FFXOR(fc,fd),FFXOR(fc,FFOR(fb,FFXOR(fa,fd)))));
1360+/* 0001 1010 0111 1001 : lev 6: */ tmp2=FFXOR(FFAND(fa,fc),FFXOR(fb,FFAND(FFOR(fb,FFXOR(fa,fc)),fd)));
1361+/* 0101 1101 1101 0101 : lev 4: */ tmp3=FFOR(FFAND(FFXOR(fa,fb),FFXOR(fc,FF1())),fd);
1362+ s5a=FFXOR(tmp0,FFAND(fe,tmp1));
1363+ s5b=FFXOR(tmp2,FFAND(fe,tmp3));
1364+
1365+ fe=A[aboff+2][1];fa=A[aboff+3][1];fb=A[aboff+4][0];fc=A[aboff+6][2];fd=A[aboff+8][3];
1366+/* 0011 0110 0010 1101 : lev 6: */ //tmp0=( ( ( fa&fc )&fd )^( ( fb&( fa|fd ) )^fc ) );
1367+/* 1110 1110 1011 1011 : lev 3: */ //tmp1=( ( ( fa^fc )&fd )^ALL_ONES );
1368+/* 0101 1000 0110 0111 : lev 6: */ //tmp2=( ( fa&( fb|fc ) )^( fb^( ( fb&fc )|fd ) ) );
1369+/* 0001 0011 0000 0001 : lev 5: */ //tmp3=( fc&( ( fa&( fb^fd ) )^( fb|fd ) ) );
1370+/* 0011 0110 0010 1101 : lev 6: */ tmp0=FFXOR(FFAND(FFAND(fa,fc),fd),FFXOR(FFAND(fb,FFOR(fa,fd)),fc));
1371+/* 1110 1110 1011 1011 : lev 3: */ tmp1=FFXOR(FFAND(FFXOR(fa,fc),fd),FF1());
1372+/* 0101 1000 0110 0111 : lev 6: */ tmp2=FFXOR(FFAND(fa,FFOR(fb,fc)),FFXOR(fb,FFOR(FFAND(fb,fc),fd)));
1373+/* 0001 0011 0000 0001 : lev 5: */ tmp3=FFAND(fc,FFXOR(FFAND(fa,FFXOR(fb,fd)),FFOR(fb,fd)));
1374+ s6a=FFXOR(tmp0,FFAND(fe,tmp1));
1375+ s6b=FFXOR(tmp2,FFAND(fe,tmp3));
1376+
1377+ fe=A[aboff+1][2];fa=A[aboff+2][0];fb=A[aboff+6][1];fc=A[aboff+7][2];fd=A[aboff+7][3];
1378+/* 0111 1000 1001 0110 : lev 5: */ //tmp0=( fb^( ( fc&fd )|( fa^( fc^fd ) ) ) );
1379+/* 0100 1001 0101 1011 : lev 6: */ //tmp1=( ( fb|fd )&( ( fa&fc )|( fb^( fc^fd ) ) ) );
1380+/* 0100 1001 1011 1001 : lev 5: */ //tmp2=( ( fa|fb )^( ( fc&( fb|fd ) )^fd ) );
1381+/* 1111 1111 1101 1101 : lev 3: */ //tmp3=( fd|( ( fa&fc )^ALL_ONES ) );
1382+/* 0111 1000 1001 0110 : lev 5: */ tmp0=FFXOR(fb,FFOR(FFAND(fc,fd),FFXOR(fa,FFXOR(fc,fd))));
1383+/* 0100 1001 0101 1011 : lev 6: */ tmp1=FFAND(FFOR(fb,fd),FFOR(FFAND(fa,fc),FFXOR(fb,FFXOR(fc,fd))));
1384+/* 0100 1001 1011 1001 : lev 5: */ tmp2=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fb,fd)),fd));
1385+/* 1111 1111 1101 1101 : lev 3: */ tmp3=FFOR(fd,FFXOR(FFAND(fa,fc),FF1()));
1386+ s7a=FFXOR(tmp0,FFAND(fe,tmp1));
1387+ s7b=FFXOR(tmp2,FFAND(fe,tmp3));
1388+
1389+
1390+/*
1391+ we have just done this:
1392+
1393+ int sbox1[0x20] = {2,0,1,1,2,3,3,0, 3,2,2,0,1,1,0,3, 0,3,3,0,2,2,1,1, 2,2,0,3,1,1,3,0};
1394+ int sbox2[0x20] = {3,1,0,2,2,3,3,0, 1,3,2,1,0,0,1,2, 3,1,0,3,3,2,0,2, 0,0,1,2,2,1,3,1};
1395+ int sbox3[0x20] = {2,0,1,2,2,3,3,1, 1,1,0,3,3,0,2,0, 1,3,0,1,3,0,2,2, 2,0,1,2,0,3,3,1};
1396+ int sbox4[0x20] = {3,1,2,3,0,2,1,2, 1,2,0,1,3,0,0,3, 1,0,3,1,2,3,0,3, 0,3,2,0,1,2,2,1};
1397+ int sbox5[0x20] = {2,0,0,1,3,2,3,2, 0,1,3,3,1,0,2,1, 2,3,2,0,0,3,1,1, 1,0,3,2,3,1,0,2};
1398+ int sbox6[0x20] = {0,1,2,3,1,2,2,0, 0,1,3,0,2,3,1,3, 2,3,0,2,3,0,1,1, 2,1,1,2,0,3,3,0};
1399+ int sbox7[0x20] = {0,3,2,2,3,0,0,1, 3,0,1,3,1,2,2,1, 1,0,3,3,0,1,1,2, 2,3,1,0,2,3,0,2};
1400+
1401+ s12 = sbox1[ (((A3>>0)&1)<<4) | (((A0>>2)&1)<<3) | (((A5>>1)&1)<<2) | (((A6>>3)&1)<<1) | (((A8>>0)&1)<<0) ]
1402+ |sbox2[ (((A1>>1)&1)<<4) | (((A2>>2)&1)<<3) | (((A5>>3)&1)<<2) | (((A6>>0)&1)<<1) | (((A8>>1)&1)<<0) ];
1403+ s34 = sbox3[ (((A0>>3)&1)<<4) | (((A1>>0)&1)<<3) | (((A4>>1)&1)<<2) | (((A4>>3)&1)<<1) | (((A5>>2)&1)<<0) ]
1404+ |sbox4[ (((A2>>3)&1)<<4) | (((A0>>1)&1)<<3) | (((A1>>3)&1)<<2) | (((A3>>2)&1)<<1) | (((A7>>0)&1)<<0) ];
1405+ s56 = sbox5[ (((A4>>2)&1)<<4) | (((A3>>3)&1)<<3) | (((A5>>0)&1)<<2) | (((A7>>1)&1)<<1) | (((A8>>2)&1)<<0) ]
1406+ |sbox6[ (((A2>>1)&1)<<4) | (((A3>>1)&1)<<3) | (((A4>>0)&1)<<2) | (((A6>>2)&1)<<1) | (((A8>>3)&1)<<0) ];
1407+ s7 = sbox7[ (((A1>>2)&1)<<4) | (((A2>>0)&1)<<3) | (((A6>>1)&1)<<2) | (((A7>>2)&1)<<1) | (((A7>>3)&1)<<0) ];
1408+*/
1409+
1410+ // use 4x4 xor to produce extra nibble for T3
1411+
1412+ extra_B[3]=FFXOR(FFXOR(FFXOR(B[aboff+2][0],B[aboff+5][1]),B[aboff+6][2]),B[aboff+8][3]);
1413+ extra_B[2]=FFXOR(FFXOR(FFXOR(B[aboff+5][0],B[aboff+7][1]),B[aboff+2][3]),B[aboff+3][2]);
1414+ extra_B[1]=FFXOR(FFXOR(FFXOR(B[aboff+4][3],B[aboff+7][2]),B[aboff+3][0]),B[aboff+4][1]);
1415+ extra_B[0]=FFXOR(FFXOR(FFXOR(B[aboff+8][2],B[aboff+5][3]),B[aboff+2][1]),B[aboff+7][0]);
1416+
1417+ // T1 = xor all inputs
1418+ // in1, in2, D are only used in T1 during initialisation, not generation
1419+ for(b=0;b<4;b++){
1420+ A[aboff-1][b]=FFXOR(A[aboff+9][b],X[b]);
1421+ }
1422+
1423+#ifdef STREAM_INIT
1424+ for(b=0;b<4;b++){
1425+ A[aboff-1][b]=FFXOR(FFXOR(A[aboff-1][b],D[b]),((j % 2) ? in2[b] : in1[b]));
1426+ }
1427+#endif
1428+
1429+
1430+ // T2 = xor all inputs
1431+ // in1, in2 are only used in T1 during initialisation, not generation
1432+ // if p=0, use this, if p=1, rotate the result left
1433+ for(b=0;b<4;b++){
1434+ B[aboff-1][b]=FFXOR(FFXOR(B[aboff+6][b],B[aboff+9][b]),Y[b]);
1435+ }
1436+
1437+#ifdef STREAM_INIT
1438+ for(b=0;b<4;b++){
1439+ B[aboff-1][b]=FFXOR(B[aboff-1][b],((j % 2) ? in1[b] : in2[b]));
1440+ }
1441+#endif
1442+
1443+
1444+ // if p=1, rotate left (yes, this is what we're doing)
1445+ tmp3=B[aboff-1][3];
1446+ B[aboff-1][3]=FFXOR(B[aboff-1][3],FFAND(FFXOR(B[aboff-1][3],B[aboff-1][2]),p));
1447+ B[aboff-1][2]=FFXOR(B[aboff-1][2],FFAND(FFXOR(B[aboff-1][2],B[aboff-1][1]),p));
1448+ B[aboff-1][1]=FFXOR(B[aboff-1][1],FFAND(FFXOR(B[aboff-1][1],B[aboff-1][0]),p));
1449+ B[aboff-1][0]=FFXOR(B[aboff-1][0],FFAND(FFXOR(B[aboff-1][0],tmp3),p));
1450+
1451+
1452+ // T3 = xor all inputs
1453+ for(b=0;b<4;b++){
1454+ D[b]=FFXOR(FFXOR(E[b],Z[b]),extra_B[b]);
1455+ }
1456+
1457+
1458+ // T4 = sum, carry of Z + E + r
1459+ for(b=0;b<4;b++){
1460+ next_E[b]=F[b];
1461+ }
1462+
1463+ tmp0=FFXOR(Z[0],E[0]);
1464+ tmp1=FFAND(Z[0],E[0]);
1465+ F[0]=FFXOR(E[0],FFAND(q,FFXOR(Z[0],r)));
1466+ tmp3=FFAND(tmp0,r);
1467+ tmp4=FFOR(tmp1,tmp3);
1468+
1469+ tmp0=FFXOR(Z[1],E[1]);
1470+ tmp1=FFAND(Z[1],E[1]);
1471+ F[1]=FFXOR(E[1],FFAND(q,FFXOR(Z[1],tmp4)));
1472+ tmp3=FFAND(tmp0,tmp4);
1473+ tmp4=FFOR(tmp1,tmp3);
1474+
1475+ tmp0=FFXOR(Z[2],E[2]);
1476+ tmp1=FFAND(Z[2],E[2]);
1477+ F[2]=FFXOR(E[2],FFAND(q,FFXOR(Z[2],tmp4)));
1478+ tmp3=FFAND(tmp0,tmp4);
1479+ tmp4=FFOR(tmp1,tmp3);
1480+
1481+ tmp0=FFXOR(Z[3],E[3]);
1482+ tmp1=FFAND(Z[3],E[3]);
1483+ F[3]=FFXOR(E[3],FFAND(q,FFXOR(Z[3],tmp4)));
1484+ tmp3=FFAND(tmp0,tmp4);
1485+ r=FFXOR(r,FFAND(q,FFXOR(FFOR(tmp1,tmp3),r))); // ultimate carry
1486+
1487+/*
1488+ we have just done this: (believe it or not)
1489+
1490+ if (q) {
1491+ F = Z + E + r;
1492+ r = (F >> 4) & 1;
1493+ F = F & 0x0f;
1494+ }
1495+ else {
1496+ F = E;
1497+ }
1498+*/
1499+ for(b=0;b<4;b++){
1500+ E[b]=next_E[b];
1501+ }
1502+
1503+ // this simple instruction is virtually shifting all the shift registers
1504+ aboff--;
1505+
1506+/*
1507+ we've just done this:
1508+
1509+ A9=A8;A8=A7;A7=A6;A6=A5;A5=A4;A4=A3;A3=A2;A2=A1;A1=A0;A0=next_A0;
1510+ B9=B8;B8=B7;B7=B6;B6=B5;B5=B4;B4=B3;B3=B2;B2=B1;B1=B0;B0=next_B0;
1511+*/
1512+
1513+ X[0]=s1a;
1514+ X[1]=s2a;
1515+ X[2]=s3b;
1516+ X[3]=s4b;
1517+ Y[0]=s3a;
1518+ Y[1]=s4a;
1519+ Y[2]=s5b;
1520+ Y[3]=s6b;
1521+ Z[0]=s5a;
1522+ Z[1]=s6a;
1523+ Z[2]=s1b;
1524+ Z[3]=s2b;
1525+ p=s7a;
1526+ q=s7b;
1527+
1528+#ifdef STREAM_NORMAL
1529+ // require 4 loops per output byte
1530+ // 2 output bits are a function of the 4 bits of D
1531+ // xor 2 by 2
1532+ cb_g[8*i+7-2*j]=FFXOR(D[2],D[3]);
1533+ cb_g[8*i+6-2*j]=FFXOR(D[0],D[1]);
1534+#endif
1535+
1536+
1537+ } // INTERNAL LOOP
1538+
1539+
1540+ } // EXTERNAL LOOP
1541+
1542+ // move 32 steps forward, ready for next call
1543+ for(k=0;k<10;k++){
1544+ for(b=0;b<4;b++){
1545+ A[32+k][b]=A[k][b];
1546+ B[32+k][b]=B[k][b];
1547+ }
1548+ }
1549+
1550+
1551+////////////////////////////////////////////////////////////////////////////////
1552+
1553+#ifdef STREAM_NORMAL
1554+
1555+#if GROUP_PARALLELISM==32
1556+trasp64_32_88cw(cb);
1557+#endif
1558+#if GROUP_PARALLELISM==64
1559+trasp64_64_88cw(cb);
1560+#endif
1561+#if GROUP_PARALLELISM==128
1562+trasp64_128_88cw(cb);
1563+#endif
1564+
1565+#endif
1566+
1567+#ifdef STREAM_INIT
1568+#endif
1569+#ifdef STREAM_NORMAL
1570+#endif
1571+
1572+}
1573+
1574Index: libs/libmythtv/FFdecsa/parallel_064_8charA.h
1575===================================================================
1576--- /dev/null 1970-01-01 00:00:00.000000000 +0000
1577+++ libs/libmythtv/FFdecsa/parallel_064_8charA.h 2006-06-20 17:36:06.000000000 -0400
1578@@ -0,0 +1,171 @@
1579+/* FFdecsa -- fast decsa algorithm
1580+ *
1581+ * Copyright (C) 2003-2004 fatih89r
1582+ *
1583+ * This program is free software; you can redistribute it and/or modify
1584+ * it under the terms of the GNU General Public License as published by
1585+ * the Free Software Foundation; either version 2 of the License, or
1586+ * (at your option) any later version.
1587+ *
1588+ * This program is distributed in the hope that it will be useful,
1589+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1590+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1591+ * GNU General Public License for more details.
1592+ *
1593+ * You should have received a copy of the GNU General Public License
1594+ * along with this program; if not, write to the Free Software
1595+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
1596+ */
1597+
1598+
1599+struct group_t{
1600+ unsigned char s1[8];
1601+};
1602+typedef struct group_t group;
1603+
1604+#define GROUP_PARALLELISM 64
1605+
1606+group static inline FF0(){
1607+ group res;
1608+ int i;
1609+ for(i=0;i<8;i++) res.s1[i]=0x0;
1610+ return res;
1611+}
1612+
1613+group static inline FF1(){
1614+ group res;
1615+ int i;
1616+ for(i=0;i<8;i++) res.s1[i]=0xff;
1617+ return res;
1618+}
1619+
1620+group static inline FFAND(group a,group b){
1621+ group res;
1622+ int i;
1623+ for(i=0;i<8;i++) res.s1[i]=a.s1[i]&b.s1[i];
1624+ return res;
1625+}
1626+
1627+group static inline FFOR(group a,group b){
1628+ group res;
1629+ int i;
1630+ for(i=0;i<8;i++) res.s1[i]=a.s1[i]|b.s1[i];
1631+ return res;
1632+}
1633+
1634+group static inline FFXOR(group a,group b){
1635+ group res;
1636+ int i;
1637+ for(i=0;i<8;i++) res.s1[i]=a.s1[i]^b.s1[i];
1638+ return res;
1639+}
1640+
1641+group static inline FFNOT(group a){
1642+ group res;
1643+ int i;
1644+ for(i=0;i<8;i++) res.s1[i]=~a.s1[i];
1645+ return res;
1646+}
1647+
1648+
1649+/* 64 rows of 64 bits */
1650+
1651+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
1652+ *(((int *)tab)+2*g)=*((int *)data);
1653+ *(((int *)tab)+2*g+1)=*(((int *)data)+1);
1654+}
1655+
1656+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
1657+ *((int *)data)=*(((int *)tab)+2*g);
1658+ *(((int *)data)+1)=*(((int *)tab)+2*g+1);
1659+}
1660+
1661+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
1662+ int j;
1663+ for(j=0;j<n;j++){
1664+ *(data+j)^=*(tab+8*g+j);
1665+ }
1666+}
1667+
1668+struct batch_t{
1669+ unsigned char s1[8];
1670+};
1671+typedef struct batch_t batch;
1672+
1673+#define BYTES_PER_BATCH 8
1674+
1675+batch static inline B_FFAND(batch a,batch b){
1676+ batch res;
1677+ int i;
1678+ for(i=0;i<8;i++) res.s1[i]=a.s1[i]&b.s1[i];
1679+ return res;
1680+}
1681+
1682+batch static inline B_FFOR(batch a,batch b){
1683+ batch res;
1684+ int i;
1685+ for(i=0;i<8;i++) res.s1[i]=a.s1[i]|b.s1[i];
1686+ return res;
1687+}
1688+
1689+batch static inline B_FFXOR(batch a,batch b){
1690+ batch res;
1691+ int i;
1692+ for(i=0;i<8;i++) res.s1[i]=a.s1[i]^b.s1[i];
1693+ return res;
1694+}
1695+
1696+
1697+batch static inline B_FFN_ALL_29(){
1698+ batch res;
1699+ int i;
1700+ for(i=0;i<8;i++) res.s1[i]=0x29;
1701+ return res;
1702+}
1703+batch static inline B_FFN_ALL_02(){
1704+ batch res;
1705+ int i;
1706+ for(i=0;i<8;i++) res.s1[i]=0x02;
1707+ return res;
1708+}
1709+batch static inline B_FFN_ALL_04(){
1710+ batch res;
1711+ int i;
1712+ for(i=0;i<8;i++) res.s1[i]=0x04;
1713+ return res;
1714+}
1715+batch static inline B_FFN_ALL_10(){
1716+ batch res;
1717+ int i;
1718+ for(i=0;i<8;i++) res.s1[i]=0x10;
1719+ return res;
1720+}
1721+batch static inline B_FFN_ALL_40(){
1722+ batch res;
1723+ int i;
1724+ for(i=0;i<8;i++) res.s1[i]=0x40;
1725+ return res;
1726+}
1727+batch static inline B_FFN_ALL_80(){
1728+ batch res;
1729+ int i;
1730+ for(i=0;i<8;i++) res.s1[i]=0x80;
1731+ return res;
1732+}
1733+
1734+batch static inline B_FFSH8L(batch a,int n){
1735+ batch res;
1736+ int i;
1737+ for(i=0;i<8;i++) res.s1[i]=a.s1[i]<<n;
1738+ return res;
1739+}
1740+
1741+batch static inline B_FFSH8R(batch a,int n){
1742+ batch res;
1743+ int i;
1744+ for(i=0;i<8;i++) res.s1[i]=a.s1[i]>>n;
1745+ return res;
1746+}
1747+
1748+void static inline M_EMPTY(void){
1749+}
1750Index: libs/libmythtv/FFdecsa/parallel_128_sse.h
1751===================================================================
1752--- /dev/null 1970-01-01 00:00:00.000000000 +0000
1753+++ libs/libmythtv/FFdecsa/parallel_128_sse.h 2006-06-20 17:36:06.000000000 -0400
1754@@ -0,0 +1,184 @@
1755+/* FFdecsa -- fast decsa algorithm
1756+ *
1757+ * Copyright (C) 2003-2004 fatih89r
1758+ *
1759+ * This program is free software; you can redistribute it and/or modify
1760+ * it under the terms of the GNU General Public License as published by
1761+ * the Free Software Foundation; either version 2 of the License, or
1762+ * (at your option) any later version.
1763+ *
1764+ * This program is distributed in the hope that it will be useful,
1765+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1766+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1767+ * GNU General Public License for more details.
1768+ *
1769+ * You should have received a copy of the GNU General Public License
1770+ * along with this program; if not, write to the Free Software
1771+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
1772+ */
1773+
1774+
1775+#include <xmmintrin.h>
1776+
1777+struct group_t{
1778+ __m128 s1;
1779+};
1780+typedef struct group_t group;
1781+
1782+#define GROUP_PARALLELISM 128
1783+
1784+group static inline FF0(){
1785+ group res;
1786+ static unsigned long long l[2]={0x0000000000000000ULL,0x0000000000000000ULL};
1787+ res.s1=*(__m128*)l;
1788+ return res;
1789+}
1790+
1791+group static inline FF1(){
1792+ group res;
1793+ static unsigned long long l[2]={0xffffffffffffffffULL,0xffffffffffffffffULL};
1794+ res.s1=*(__m128*)l;
1795+ return res;
1796+}
1797+
1798+group static inline FFAND(group a,group b){
1799+ group res;
1800+ res.s1=_mm_and_ps(a.s1,b.s1);
1801+ return res;
1802+}
1803+
1804+group static inline FFOR(group a,group b){
1805+ group res;
1806+ res.s1=_mm_or_ps(a.s1,b.s1);
1807+ return res;
1808+}
1809+
1810+group static inline FFXOR(group a,group b){
1811+ group res;
1812+ res.s1=_mm_xor_ps(a.s1,b.s1);
1813+ return res;
1814+}
1815+
1816+group static inline FFNOT(group a){
1817+ group res;
1818+ res.s1=_mm_xor_ps(a.s1,FF1().s1);
1819+ return res;
1820+}
1821+
1822+
1823+/* 64 rows of 128 bits */
1824+
1825+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
1826+ *(((int *)tab)+2*g)=*((int *)data);
1827+ *(((int *)tab)+2*g+1)=*(((int *)data)+1);
1828+}
1829+
1830+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
1831+ *((int *)data)=*(((int *)tab)+2*g);
1832+ *(((int *)data)+1)=*(((int *)tab)+2*g+1);
1833+}
1834+
1835+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
1836+ int j;
1837+ for(j=0;j<n;j++){
1838+ *(data+j)^=*(tab+8*g+j);
1839+ }
1840+}
1841+
1842+
1843+struct batch_t{
1844+ __m64 s1;
1845+};
1846+typedef struct batch_t batch;
1847+
1848+#define BYTES_PER_BATCH 8
1849+
1850+batch static inline B_FFAND(batch a,batch b){
1851+ batch res;
1852+ res.s1=_m_pand(a.s1,b.s1);
1853+ return res;
1854+}
1855+
1856+batch static inline B_FFOR(batch a,batch b){
1857+ batch res;
1858+ res.s1=_m_por(a.s1,b.s1);
1859+ return res;
1860+}
1861+
1862+batch static inline B_FFXOR(batch a,batch b){
1863+ batch res;
1864+ res.s1=_m_pxor(a.s1,b.s1);
1865+ return res;
1866+}
1867+
1868+batch static inline B_FFN_ALL_29(){
1869+ batch res;
1870+ res.s1=(__m64)0x2929292929292929ULL;
1871+ return res;
1872+}
1873+batch static inline B_FFN_ALL_02(){
1874+ batch res;
1875+ res.s1=(__m64)0x0202020202020202ULL;
1876+ return res;
1877+}
1878+batch static inline B_FFN_ALL_04(){
1879+ batch res;
1880+ res.s1=(__m64)0x0404040404040404ULL;
1881+ return res;
1882+}
1883+batch static inline B_FFN_ALL_10(){
1884+ batch res;
1885+ res.s1=(__m64)0x1010101010101010ULL;
1886+ return res;
1887+}
1888+batch static inline B_FFN_ALL_40(){
1889+ batch res;
1890+ res.s1=(__m64)0x4040404040404040ULL;
1891+ return res;
1892+}
1893+batch static inline B_FFN_ALL_80(){
1894+ batch res;
1895+ res.s1=(__m64)0x8080808080808080ULL;
1896+ return res;
1897+}
1898+
1899+batch static inline B_FFSH8L(batch a,int n){
1900+ batch res;
1901+ res.s1=_m_psllqi(a.s1,n);
1902+ return res;
1903+}
1904+
1905+batch static inline B_FFSH8R(batch a,int n){
1906+ batch res;
1907+ res.s1=_m_psrlqi(a.s1,n);
1908+ return res;
1909+}
1910+
1911+void static inline M_EMPTY(void){
1912+ _m_empty();
1913+}
1914+
1915+
1916+#undef XOR_8_BY
1917+#define XOR_8_BY(d,s1,s2) do{ __m64 *pd=(__m64 *)(d), *ps1=(__m64 *)(s1), *ps2=(__m64 *)(s2); \
1918+ *pd = _m_pxor( *ps1 , *ps2 ); }while(0)
1919+
1920+#undef XOREQ_8_BY
1921+#define XOREQ_8_BY(d,s) do{ __m64 *pd=(__m64 *)(d), *ps=(__m64 *)(s); \
1922+ *pd = _m_pxor( *ps, *pd ); }while(0)
1923+
1924+#undef COPY_8_BY
1925+#define COPY_8_BY(d,s) do{ __m64 *pd=(__m64 *)(d), *ps=(__m64 *)(s); \
1926+ *pd = *ps; }while(0)
1927+
1928+#undef BEST_SPAN
1929+#define BEST_SPAN 8
1930+
1931+#undef XOR_BEST_BY
1932+#define XOR_BEST_BY(d,s1,s2) do{ XOR_8_BY(d,s1,s2); }while(0);
1933+
1934+#undef XOREQ_BEST_BY
1935+#define XOREQ_BEST_BY(d,s) do{ XOREQ_8_BY(d,s); }while(0);
1936+
1937+#undef COPY_BEST_BY
1938+#define COPY_BEST_BY(d,s) do{ COPY_8_BY(d,s); }while(0);
1939Index: libs/libmythtv/FFdecsa/docs/how_to_compile.txt
1940===================================================================
1941--- /dev/null 1970-01-01 00:00:00.000000000 +0000
1942+++ libs/libmythtv/FFdecsa/docs/how_to_compile.txt 2006-06-20 17:36:06.000000000 -0400
1943@@ -0,0 +1,114 @@
1944+-------
1945+FFdecsa
1946+-------
1947+
1948+Compiling is as easy as running a make command, if you have gcc and are
1949+using a little endian machine. 64 bit machines have not been tested but
1950+may work with little or no changes; big endian machines will certainly
1951+give incorrect results (read the technical_background.txt to know where
1952+the problem is).
1953+
1954+Before compiling you could edit the Makefile to tweak compiler flags for
1955+optimal performance. If you want to play with different bit-grouping
1956+strategies you have to edit FFdecsa_DBG.c and change the "our choice"
1957+definition. This is highly critical for performance.
1958+
1959+After compilation run the FFdecsa_test application. It will test correct
1960+decryption and print the meausered speed (use "nice --19 ./FFdecsa_test"
1961+on an idle machine for better results). Or just use "make test".
1962+
1963+gcc >=3.3.3 is highly recommended. Older versions could give performance
1964+problems.
1965+
1966+icc is currently unusable. In the initial phases of development of
1967+FFdecsa icc was able to compile the code and gave interesting speed
1968+results when using the 8charA grouping mode (array of 8 characters are
1969+automatically manipulated through MMX instructions). At some point the
1970+code began to work incorrectly because of a compiler bug (but I found a
1971+workaround). Then, the performance dropped with no reason; I found a
1972+workaround by adding an unused variable (alignment problem, grep for icc
1973+in the code to see where it happens). Then, with the introduction of
1974+group modes based on intrinsics, gcc was finally able to go beyond the
1975+speed record originally set by icc. Additional code tweaks added more
1976+speed to gcc, while icc started to segfault on compilation (both version
1977+7 and 8). In conclusion, icc is bugged and this code is too hard for it.
1978+gcc on the other hand is great. I tried to inspect generated assembler
1979+to find weak spots, and the generated code is very good indeed.
1980+
1981+Note: the code can be compiled with gcc or g++. g++ is 3% faster for
1982+some reason.
1983+
1984+You should not get any errors or warnings. I only get two "inlining
1985+failed" warnings on two functions I asked to be inlined but gcc doesn't
1986+want to inline.
1987+
1988+The build process creates additional temp files by running grep
1989+commands. This is how debugging output is handled. All the lines
1990+containing DBG are removed and the temp file is compiled (so the line
1991+numbers change between temp and original files). Don't edit the temp
1992+files, they will be overwritten. If you don't remove the DBG lines (for
1993+example, by changing "grep -v DBG" into "grep -v aaDBG" in Makefile) a
1994+lot of output will be generated. This is useful to understand what's
1995+wrong when the FFdecsa_test is failing. I included a reference "known
1996+good" output in the debug_output directory. Extra debug output is
1997+commented out in the code.
1998+
1999+The debug output functionality could be... bugged. This is because I
2000+tested everything using hard coded int grouping mode and then
2001+generalized the debug output to abstract grouping modes. A bug where 4
2002+bytes are printed instead of 8 could be present somewhere. I think it
2003+isn't, but you've been warned.
2004+
2005+This code was only tried on Linux.
2006+It should work on Windows or other platforms, but you may encounter
2007+problems related to the compiler quality. If you want to try, begin with
2008+the int grouping mode. It is only 30% slower then the best (MMX) and it
2009+should be easily portable because no intrinsics are used. I'm
2010+particularly interested in hearing what kind of performance can be
2011+obtained on x86_64 processors in int, long long int, mmx, 2mmx, sse
2012+modes.
2013+
2014+
2015+As a reference, here are the results I get on an Athlon XP 2400+ (this
2016+processor runs at 2000MHz); other processors belonging to the Athlon XP
2017+architecture, including Durons, should have the same speed per MHz.
2018+Cache size and bus speed don't matter.
2019+
2020+CPU: AMD Athlon XP 2400+
2021+
2022+Compiler: g++ (gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7))
2023+
2024+Flags: -O3 -march=athlon-xp -fexpensive-optimizations -funroll-loops
2025+ --param max-unrolled-insns=500
2026+
2027+grouping mode speed (Mbit/s) notes
2028+---------------------------------------------------------------------
2029+PARALLEL_32_4CHAR 14
2030+PARALLEL_32_4CHARA 12
2031+PARALLEL_32_INT 125 very good and very portable
2032+PARALLEL_64_8CHAR 17
2033+PARALLEL_64_8CHARA 15 needs a vectorizing compiler
2034+PARALLEL_64_2INT 75 x86 has too few registers
2035+PARALLEL_64_LONG 97 try this on x86_64
2036+PARALLEL_64_MMX 165 the best
2037+PARALLEL_128_16CHAR 6
2038+PARALLEL_128_16CHARA 7
2039+PARALLEL_128_4INT 69
2040+PARALLEL_128_2LONG 52
2041+PARALLEL_128_2MMX 36 slower than expected
2042+PARALLEL_128_SSE 156 just slower than 64_MMX
2043+
2044+Best speeds are obtained with native data types: int, mmx, sse (this
2045+could be a compiler artifact).
2046+
2047+64 bit processors should try 64_LONG.
2048+
2049+Vectorizing compilers should like *CHARA.
2050+
2051+64_MMX is faster than 128_SSE on the Athlon; perhaps SSE instruction are
2052+internally split into 64 bit chunks. Could be different on x86_64 or
2053+Intel processors.
2054+
2055+128_SSE has a 64 bit (MMX) batch type because SSE has no shifting
2056+instructions, they are only available on SSE2. As the Athlon XP doesn't
2057+support SSE2, I couldn't experiment with that.
2058Index: libs/libmythtv/FFdecsa/docs/FAQ.txt
2059===================================================================
2060--- /dev/null 1970-01-01 00:00:00.000000000 +0000
2061+++ libs/libmythtv/FFdecsa/docs/FAQ.txt 2006-06-20 17:36:06.000000000 -0400
2062@@ -0,0 +1,77 @@
2063+-------
2064+FFdecsa
2065+-------
2066+
2067+FFdecsa is a fast implementation of the CSA decryption algorithm for MPEG
2068+TS packets.
2069+
2070+Q: What does FF stands for?
2071+A: FFdecsa means "Fucking Fast decsa".
2072+
2073+Q: Why would you use such a rude name?
2074+A: Because this code is fucking fast, more than 800% the speed of the best
2075+ implementation I'm able to find around at the moment.
2076+
2077+Q: How it that possible? Are all other programmers stupid?
2078+A: No, they just tried to save a cycle or two tweaking a fundamentally wrong
2079+ implementation. The algorithm has to be implemented in a totally different
2080+ way to achieve good speed.
2081+
2082+Q: Do you use multimedia instructions?
2083+A: I use every trick I could come up with, including multimedia instructions.
2084+ They are not fundamental in achieving speed, a version without them runs
2085+ at 6x the speed of the best implementation around (which uses MMX).
2086+
2087+Q: So how did you do that?
2088+A: By using a different approach for the implementation. This code is not
2089+ exploiting some new CSA vulnerability, it is just doing the same
2090+ calculations better. Think about replacing bubble sort with quick sort.
2091+
2092+Q: You're joking, it's impossible to gain so much speed.
2093+A: Speed test are available, technical documentation is available, source
2094+ code is available. Try it yourself.
2095+ If you want details, these are some of the documented tricks I used
2096+ (more details in the docs directory):
2097+ TRICK NUMBER 0: emulate the hardware
2098+ TRICK NUMBER 1: virtual shift registers
2099+ TRICK NUMBER 2: parallel bitslice
2100+ TRICK NUMBER 3: multimedia instructions
2101+ TRICK NUMBER 4: parallel byteslice
2102+ TRICK NUMBER 5: efficient bit permutation
2103+ TRICK NUMBER 6: efficient normal<->slice conversion
2104+ TRICK NUMBER 7: try hard to process packets together
2105+ TRICK NUMBER 8: try to avoid doing the same thing many times
2106+ TRICK NUMBER 9: compiler
2107+ TRICK NUMBER a: a lot of brain work
2108+
2109+Q: How can be this code useful?
2110+A: You can use this code in place of the old slow implementations and save a
2111+ lot of CPU power.
2112+
2113+Q: Just that?
2114+A: Well, new applications are possible.
2115+ Decrypting a whole transponder is easily doable now. Well, a $50 CPU can
2116+ decrypt four transponder at the same time if you have four DVB boards (but
2117+ I couldn't test that).
2118+
2119+Q: You're cheating, this code is fake, I don't believe one word.
2120+A: Go away. This is technical stuff for people with brains.
2121+
2122+Q: This code is great, may I distribute your code in original or modified
2123+ form?
2124+A: Only if you respect the license.
2125+
2126+Q: May I use your code in my player/library/plugin...?
2127+A: Again, you have to respect the license.
2128+
2129+Q: Are you an extraterrestrial programmer?
2130+A: No, just a Turkish guy with a PC to play with :-)
2131+
2132+Q: Why did you spend your time doing this?
2133+A: Because I thought that my approach was doable and I was sure it would
2134+ have been much faster, so I had to implement it to confirm I was right.
2135+ I got 8x the speed and that's enough to be proud of it. And I could not
2136+ just keep the code for myself only.
2137+
2138+Q: What is the answer to the meaning of the universe?
2139+A: 42,43,71,5f,65,85,f6,76,0d,13,28,96,...
2140Index: libs/libmythtv/FFdecsa/parallel_032_4charA.h
2141===================================================================
2142--- /dev/null 1970-01-01 00:00:00.000000000 +0000
2143+++ libs/libmythtv/FFdecsa/parallel_032_4charA.h 2006-06-20 17:36:06.000000000 -0400
2144@@ -0,0 +1,171 @@
2145+/* FFdecsa -- fast decsa algorithm
2146+ *
2147+ * Copyright (C) 2003-2004 fatih89r
2148+ *
2149+ * This program is free software; you can redistribute it and/or modify
2150+ * it under the terms of the GNU General Public License as published by
2151+ * the Free Software Foundation; either version 2 of the License, or
2152+ * (at your option) any later version.
2153+ *
2154+ * This program is distributed in the hope that it will be useful,
2155+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2156+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2157+ * GNU General Public License for more details.
2158+ *
2159+ * You should have received a copy of the GNU General Public License
2160+ * along with this program; if not, write to the Free Software
2161+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
2162+ */
2163+
2164+
2165+struct group_t{
2166+ unsigned char s1[4];
2167+};
2168+typedef struct group_t group;
2169+
2170+#define GROUP_PARALLELISM 32
2171+
2172+group static inline FF0(){
2173+ group res;
2174+ int i;
2175+ for(i=0;i<4;i++) res.s1[i]=0x0;
2176+ return res;
2177+}
2178+
2179+group static inline FF1(){
2180+ group res;
2181+ int i;
2182+ for(i=0;i<4;i++) res.s1[i]=0xff;
2183+ return res;
2184+}
2185+
2186+group static inline FFAND(group a,group b){
2187+ group res;
2188+ int i;
2189+ for(i=0;i<4;i++) res.s1[i]=a.s1[i]&b.s1[i];
2190+ return res;
2191+}
2192+
2193+group static inline FFOR(group a,group b){
2194+ group res;
2195+ int i;
2196+ for(i=0;i<4;i++) res.s1[i]=a.s1[i]|b.s1[i];
2197+ return res;
2198+}
2199+
2200+group static inline FFXOR(group a,group b){
2201+ group res;
2202+ int i;
2203+ for(i=0;i<4;i++) res.s1[i]=a.s1[i]^b.s1[i];
2204+ return res;
2205+}
2206+
2207+group static inline FFNOT(group a){
2208+ group res;
2209+ int i;
2210+ for(i=0;i<4;i++) res.s1[i]=~a.s1[i];
2211+ return res;
2212+}
2213+
2214+
2215+/* 64 rows of 32 bits */
2216+
2217+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
2218+ *(((int *)tab)+g)=*((int *)data);
2219+ *(((int *)tab)+32+g)=*(((int *)data)+1);
2220+}
2221+
2222+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
2223+ *((int *)data)=*(((int *)tab)+g);
2224+ *(((int *)data)+1)=*(((int *)tab)+32+g);
2225+}
2226+
2227+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
2228+ int j;
2229+ for(j=0;j<n;j++){
2230+ *(data+j)^=*(tab+4*(g+(j>=4?32-1:0))+j);
2231+ }
2232+}
2233+
2234+struct batch_t{
2235+ unsigned char s1[4];
2236+};
2237+typedef struct batch_t batch;
2238+
2239+#define BYTES_PER_BATCH 4
2240+
2241+batch static inline B_FFAND(batch a,batch b){
2242+ batch res;
2243+ int i;
2244+ for(i=0;i<4;i++) res.s1[i]=a.s1[i]&b.s1[i];
2245+ return res;
2246+}
2247+
2248+batch static inline B_FFOR(batch a,batch b){
2249+ batch res;
2250+ int i;
2251+ for(i=0;i<4;i++) res.s1[i]=a.s1[i]|b.s1[i];
2252+ return res;
2253+}
2254+
2255+batch static inline B_FFXOR(batch a,batch b){
2256+ batch res;
2257+ int i;
2258+ for(i=0;i<4;i++) res.s1[i]=a.s1[i]^b.s1[i];
2259+ return res;
2260+}
2261+
2262+
2263+batch static inline B_FFN_ALL_29(){
2264+ batch res;
2265+ int i;
2266+ for(i=0;i<4;i++) res.s1[i]=0x29;
2267+ return res;
2268+}
2269+batch static inline B_FFN_ALL_02(){
2270+ batch res;
2271+ int i;
2272+ for(i=0;i<4;i++) res.s1[i]=0x02;
2273+ return res;
2274+}
2275+batch static inline B_FFN_ALL_04(){
2276+ batch res;
2277+ int i;
2278+ for(i=0;i<4;i++) res.s1[i]=0x04;
2279+ return res;
2280+}
2281+batch static inline B_FFN_ALL_10(){
2282+ batch res;
2283+ int i;
2284+ for(i=0;i<4;i++) res.s1[i]=0x10;
2285+ return res;
2286+}
2287+batch static inline B_FFN_ALL_40(){
2288+ batch res;
2289+ int i;
2290+ for(i=0;i<4;i++) res.s1[i]=0x40;
2291+ return res;
2292+}
2293+batch static inline B_FFN_ALL_80(){
2294+ batch res;
2295+ int i;
2296+ for(i=0;i<4;i++) res.s1[i]=0x80;
2297+ return res;
2298+}
2299+
2300+batch static inline B_FFSH8L(batch a,int n){
2301+ batch res;
2302+ int i;
2303+ for(i=0;i<4;i++) res.s1[i]=a.s1[i]<<n;
2304+ return res;
2305+}
2306+
2307+batch static inline B_FFSH8R(batch a,int n){
2308+ batch res;
2309+ int i;
2310+ for(i=0;i<4;i++) res.s1[i]=a.s1[i]>>n;
2311+ return res;
2312+}
2313+
2314+void static inline M_EMPTY(void){
2315+}
2316Index: libs/libmythtv/FFdecsa/parallel_128_16char.h
2317===================================================================
2318--- /dev/null 1970-01-01 00:00:00.000000000 +0000
2319+++ libs/libmythtv/FFdecsa/parallel_128_16char.h 2006-06-20 17:36:06.000000000 -0400
2320@@ -0,0 +1,411 @@
2321+/* FFdecsa -- fast decsa algorithm
2322+ *
2323+ * Copyright (C) 2003-2004 fatih89r
2324+ *
2325+ * This program is free software; you can redistribute it and/or modify
2326+ * it under the terms of the GNU General Public License as published by
2327+ * the Free Software Foundation; either version 2 of the License, or
2328+ * (at your option) any later version.
2329+ *
2330+ * This program is distributed in the hope that it will be useful,
2331+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2332+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2333+ * GNU General Public License for more details.
2334+ *
2335+ * You should have received a copy of the GNU General Public License
2336+ * along with this program; if not, write to the Free Software
2337+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
2338+ */
2339+
2340+
2341+struct group_t{
2342+ unsigned char s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,s16;
2343+};
2344+typedef struct group_t group;
2345+
2346+#define GROUP_PARALLELISM 128
2347+
2348+group static inline FF0(){
2349+ group res;
2350+ res.s1=0x0;
2351+ res.s2=0x0;
2352+ res.s3=0x0;
2353+ res.s4=0x0;
2354+ res.s5=0x0;
2355+ res.s6=0x0;
2356+ res.s7=0x0;
2357+ res.s8=0x0;
2358+ res.s9=0x0;
2359+ res.s10=0x0;
2360+ res.s11=0x0;
2361+ res.s12=0x0;
2362+ res.s13=0x0;
2363+ res.s14=0x0;
2364+ res.s15=0x0;
2365+ res.s16=0x0;
2366+ return res;
2367+}
2368+
2369+group static inline FF1(){
2370+ group res;
2371+ res.s1=0xff;
2372+ res.s2=0xff;
2373+ res.s3=0xff;
2374+ res.s4=0xff;
2375+ res.s5=0xff;
2376+ res.s6=0xff;
2377+ res.s7=0xff;
2378+ res.s8=0xff;
2379+ res.s9=0xff;
2380+ res.s10=0xff;
2381+ res.s11=0xff;
2382+ res.s12=0xff;
2383+ res.s13=0xff;
2384+ res.s14=0xff;
2385+ res.s15=0xff;
2386+ res.s16=0xff;
2387+ return res;
2388+}
2389+
2390+group static inline FFAND(group a,group b){
2391+ group res;
2392+ res.s1=a.s1&b.s1;
2393+ res.s2=a.s2&b.s2;
2394+ res.s3=a.s3&b.s3;
2395+ res.s4=a.s4&b.s4;
2396+ res.s5=a.s5&b.s5;
2397+ res.s6=a.s6&b.s6;
2398+ res.s7=a.s7&b.s7;
2399+ res.s8=a.s8&b.s8;
2400+ res.s9=a.s9&b.s9;
2401+ res.s10=a.s10&b.s10;
2402+ res.s11=a.s11&b.s11;
2403+ res.s12=a.s12&b.s12;
2404+ res.s13=a.s13&b.s13;
2405+ res.s14=a.s14&b.s14;
2406+ res.s15=a.s15&b.s15;
2407+ res.s16=a.s16&b.s16;
2408+ return res;
2409+}
2410+
2411+group static inline FFOR(group a,group b){
2412+ group res;
2413+ res.s1=a.s1|b.s1;
2414+ res.s2=a.s2|b.s2;
2415+ res.s3=a.s3|b.s3;
2416+ res.s4=a.s4|b.s4;
2417+ res.s5=a.s5|b.s5;
2418+ res.s6=a.s6|b.s6;
2419+ res.s7=a.s7|b.s7;
2420+ res.s8=a.s8|b.s8;
2421+ res.s9=a.s9|b.s9;
2422+ res.s10=a.s10|b.s10;
2423+ res.s11=a.s11|b.s11;
2424+ res.s12=a.s12|b.s12;
2425+ res.s13=a.s13|b.s13;
2426+ res.s14=a.s14|b.s14;
2427+ res.s15=a.s15|b.s15;
2428+ res.s16=a.s16|b.s16;
2429+ return res;
2430+}
2431+
2432+group static inline FFXOR(group a,group b){
2433+ group res;
2434+ res.s1=a.s1^b.s1;
2435+ res.s2=a.s2^b.s2;
2436+ res.s3=a.s3^b.s3;
2437+ res.s4=a.s4^b.s4;
2438+ res.s5=a.s5^b.s5;
2439+ res.s6=a.s6^b.s6;
2440+ res.s7=a.s7^b.s7;
2441+ res.s8=a.s8^b.s8;
2442+ res.s9=a.s9^b.s9;
2443+ res.s10=a.s10^b.s10;
2444+ res.s11=a.s11^b.s11;
2445+ res.s12=a.s12^b.s12;
2446+ res.s13=a.s13^b.s13;
2447+ res.s14=a.s14^b.s14;
2448+ res.s15=a.s15^b.s15;
2449+ res.s16=a.s16^b.s16;
2450+ return res;
2451+}
2452+
2453+group static inline FFNOT(group a){
2454+ group res;
2455+ res.s1=~a.s1;
2456+ res.s2=~a.s2;
2457+ res.s3=~a.s3;
2458+ res.s4=~a.s4;
2459+ res.s5=~a.s5;
2460+ res.s6=~a.s6;
2461+ res.s7=~a.s7;
2462+ res.s8=~a.s8;
2463+ res.s9=~a.s9;
2464+ res.s10=~a.s10;
2465+ res.s11=~a.s11;
2466+ res.s12=~a.s12;
2467+ res.s13=~a.s13;
2468+ res.s14=~a.s14;
2469+ res.s15=~a.s15;
2470+ res.s16=~a.s16;
2471+ return res;
2472+}
2473+
2474+
2475+/* 64 rows of 128 bits */
2476+
2477+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
2478+ *(((int *)tab)+2*g)=*((int *)data);
2479+ *(((int *)tab)+2*g+1)=*(((int *)data)+1);
2480+}
2481+
2482+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
2483+ *((int *)data)=*(((int *)tab)+2*g);
2484+ *(((int *)data)+1)=*(((int *)tab)+2*g+1);
2485+}
2486+
2487+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
2488+ int j;
2489+ for(j=0;j<n;j++){
2490+ *(data+j)^=*(tab+8*g+j);
2491+ }
2492+}
2493+
2494+
2495+struct batch_t{
2496+ unsigned char s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,s16;
2497+};
2498+typedef struct batch_t batch;
2499+
2500+#define BYTES_PER_BATCH 16
2501+
2502+batch static inline B_FFAND(batch a,batch b){
2503+ batch res;
2504+ res.s1=a.s1&b.s1;
2505+ res.s2=a.s2&b.s2;
2506+ res.s3=a.s3&b.s3;
2507+ res.s4=a.s4&b.s4;
2508+ res.s5=a.s5&b.s5;
2509+ res.s6=a.s6&b.s6;
2510+ res.s7=a.s7&b.s7;
2511+ res.s8=a.s8&b.s8;
2512+ res.s9=a.s9&b.s9;
2513+ res.s10=a.s10&b.s10;
2514+ res.s11=a.s11&b.s11;
2515+ res.s12=a.s12&b.s12;
2516+ res.s13=a.s13&b.s13;
2517+ res.s14=a.s14&b.s14;
2518+ res.s15=a.s15&b.s15;
2519+ res.s16=a.s16&b.s16;
2520+ return res;
2521+}
2522+
2523+batch static inline B_FFOR(batch a,batch b){
2524+ batch res;
2525+ res.s1=a.s1|b.s1;
2526+ res.s2=a.s2|b.s2;
2527+ res.s3=a.s3|b.s3;
2528+ res.s4=a.s4|b.s4;
2529+ res.s5=a.s5|b.s5;
2530+ res.s6=a.s6|b.s6;
2531+ res.s7=a.s7|b.s7;
2532+ res.s8=a.s8|b.s8;
2533+ res.s9=a.s9|b.s9;
2534+ res.s10=a.s10|b.s10;
2535+ res.s11=a.s11|b.s11;
2536+ res.s12=a.s12|b.s12;
2537+ res.s13=a.s13|b.s13;
2538+ res.s14=a.s14|b.s14;
2539+ res.s15=a.s15|b.s15;
2540+ res.s16=a.s16|b.s16;
2541+ return res;
2542+}
2543+
2544+batch static inline B_FFXOR(batch a,batch b){
2545+ batch res;
2546+ res.s1=a.s1^b.s1;
2547+ res.s2=a.s2^b.s2;
2548+ res.s3=a.s3^b.s3;
2549+ res.s4=a.s4^b.s4;
2550+ res.s5=a.s5^b.s5;
2551+ res.s6=a.s6^b.s6;
2552+ res.s7=a.s7^b.s7;
2553+ res.s8=a.s8^b.s8;
2554+ res.s9=a.s9^b.s9;
2555+ res.s10=a.s10^b.s10;
2556+ res.s11=a.s11^b.s11;
2557+ res.s12=a.s12^b.s12;
2558+ res.s13=a.s13^b.s13;
2559+ res.s14=a.s14^b.s14;
2560+ res.s15=a.s15^b.s15;
2561+ res.s16=a.s16^b.s16;
2562+ return res;
2563+}
2564+
2565+
2566+batch static inline B_FFN_ALL_29(){
2567+ batch res;
2568+ res.s1=0x29;
2569+ res.s2=0x29;
2570+ res.s3=0x29;
2571+ res.s4=0x29;
2572+ res.s5=0x29;
2573+ res.s6=0x29;
2574+ res.s7=0x29;
2575+ res.s8=0x29;
2576+ res.s9=0x29;
2577+ res.s10=0x29;
2578+ res.s11=0x29;
2579+ res.s12=0x29;
2580+ res.s13=0x29;
2581+ res.s14=0x29;
2582+ res.s15=0x29;
2583+ res.s16=0x29;
2584+ return res;
2585+}
2586+batch static inline B_FFN_ALL_02(){
2587+ batch res;
2588+ res.s1=0x02;
2589+ res.s2=0x02;
2590+ res.s3=0x02;
2591+ res.s4=0x02;
2592+ res.s5=0x02;
2593+ res.s6=0x02;
2594+ res.s7=0x02;
2595+ res.s8=0x02;
2596+ res.s9=0x02;
2597+ res.s10=0x02;
2598+ res.s11=0x02;
2599+ res.s12=0x02;
2600+ res.s13=0x02;
2601+ res.s14=0x02;
2602+ res.s15=0x02;
2603+ res.s16=0x02;
2604+ return res;
2605+}
2606+batch static inline B_FFN_ALL_04(){
2607+ batch res;
2608+ res.s1=0x04;
2609+ res.s2=0x04;
2610+ res.s3=0x04;
2611+ res.s4=0x04;
2612+ res.s5=0x04;
2613+ res.s6=0x04;
2614+ res.s7=0x04;
2615+ res.s8=0x04;
2616+ res.s9=0x04;
2617+ res.s10=0x04;
2618+ res.s11=0x04;
2619+ res.s12=0x04;
2620+ res.s13=0x04;
2621+ res.s14=0x04;
2622+ res.s15=0x04;
2623+ res.s16=0x04;
2624+ return res;
2625+}
2626+batch static inline B_FFN_ALL_10(){
2627+ batch res;
2628+ res.s1=0x10;
2629+ res.s2=0x10;
2630+ res.s3=0x10;
2631+ res.s4=0x10;
2632+ res.s5=0x10;
2633+ res.s6=0x10;
2634+ res.s7=0x10;
2635+ res.s8=0x10;
2636+ res.s9=0x10;
2637+ res.s10=0x10;
2638+ res.s11=0x10;
2639+ res.s12=0x10;
2640+ res.s13=0x10;
2641+ res.s14=0x10;
2642+ res.s15=0x10;
2643+ res.s16=0x10;
2644+ return res;
2645+}
2646+batch static inline B_FFN_ALL_40(){
2647+ batch res;
2648+ res.s1=0x40;
2649+ res.s2=0x40;
2650+ res.s3=0x40;
2651+ res.s4=0x40;
2652+ res.s5=0x40;
2653+ res.s6=0x40;
2654+ res.s7=0x40;
2655+ res.s8=0x40;
2656+ res.s9=0x40;
2657+ res.s10=0x40;
2658+ res.s11=0x40;
2659+ res.s12=0x40;
2660+ res.s13=0x40;
2661+ res.s14=0x40;
2662+ res.s15=0x40;
2663+ res.s16=0x40;
2664+ return res;
2665+}
2666+batch static inline B_FFN_ALL_80(){
2667+ batch res;
2668+ res.s1=0x80;
2669+ res.s2=0x80;
2670+ res.s3=0x80;
2671+ res.s4=0x80;
2672+ res.s5=0x80;
2673+ res.s6=0x80;
2674+ res.s7=0x80;
2675+ res.s8=0x80;
2676+ res.s9=0x80;
2677+ res.s10=0x80;
2678+ res.s11=0x80;
2679+ res.s12=0x80;
2680+ res.s13=0x80;
2681+ res.s14=0x80;
2682+ res.s15=0x80;
2683+ res.s16=0x80;
2684+ return res;
2685+}
2686+
2687+batch static inline B_FFSH8L(batch a,int n){
2688+ batch res;
2689+ res.s1=a.s1<<n;
2690+ res.s2=a.s2<<n;
2691+ res.s3=a.s3<<n;
2692+ res.s4=a.s4<<n;
2693+ res.s5=a.s5<<n;
2694+ res.s6=a.s6<<n;
2695+ res.s7=a.s7<<n;
2696+ res.s8=a.s8<<n;
2697+ res.s9=a.s9<<n;
2698+ res.s10=a.s10<<n;
2699+ res.s11=a.s11<<n;
2700+ res.s12=a.s12<<n;
2701+ res.s13=a.s13<<n;
2702+ res.s14=a.s14<<n;
2703+ res.s15=a.s15<<n;
2704+ res.s16=a.s16<<n;
2705+ return res;
2706+}
2707+
2708+batch static inline B_FFSH8R(batch a,int n){
2709+ batch res;
2710+ res.s1=a.s1>>n;
2711+ res.s2=a.s2>>n;
2712+ res.s3=a.s3>>n;
2713+ res.s4=a.s4>>n;
2714+ res.s5=a.s5>>n;
2715+ res.s6=a.s6>>n;
2716+ res.s7=a.s7>>n;
2717+ res.s8=a.s8>>n;
2718+ res.s9=a.s9>>n;
2719+ res.s10=a.s10>>n;
2720+ res.s11=a.s11>>n;
2721+ res.s12=a.s12>>n;
2722+ res.s13=a.s13>>n;
2723+ res.s14=a.s14>>n;
2724+ res.s15=a.s15>>n;
2725+ res.s16=a.s16>>n;
2726+ return res;
2727+}
2728+
2729+
2730+void static inline M_EMPTY(void){
2731+}
2732Index: libs/libmythtv/FFdecsa/FFdecsa_test.c
2733===================================================================
2734--- /dev/null 1970-01-01 00:00:00.000000000 +0000
2735+++ libs/libmythtv/FFdecsa/FFdecsa_test.c 2006-06-20 17:36:06.000000000 -0400
2736@@ -0,0 +1,174 @@
2737+/* FFdecsa -- fast decsa algorithm
2738+ *
2739+ * Copyright (C) 2003-2004 fatih89r
2740+ *
2741+ * This program is free software; you can redistribute it and/or modify
2742+ * it under the terms of the GNU General Public License as published by
2743+ * the Free Software Foundation; either version 2 of the License, or
2744+ * (at your option) any later version.
2745+ *
2746+ * This program is distributed in the hope that it will be useful,
2747+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2748+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2749+ * GNU General Public License for more details.
2750+ *
2751+ * You should have received a copy of the GNU General Public License
2752+ * along with this program; if not, write to the Free Software
2753+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
2754+ */
2755+
2756+
2757+#include <string.h>
2758+#include <stdio.h>
2759+#include <sys/time.h>
2760+
2761+#include "FFdecsa.h"
2762+
2763+#ifndef NULL
2764+#define NULL 0
2765+#endif
2766+
2767+#include "FFdecsa_test_testcases.h"
2768+
2769+int compare(unsigned char *p1, unsigned char *p2, int n, int silently){
2770+ int i;
2771+ int ok=1;
2772+ for(i=0;i<n;i++){
2773+ if(i==3) continue; // tolerate this
2774+ if(p1[i]!=p2[i]){
2775+ fprintf(stderr,"at pos 0x%02x, got 0x%02x instead of 0x%02x\n",i,p1[i],p2[i]);
2776+ ok=0;
2777+ }
2778+ }
2779+ if(!silently){
2780+ if(ok){
2781+ fprintf(stderr,"CORRECT!\n");
2782+ }
2783+ else{
2784+ fprintf(stderr,"FAILED!\n");
2785+ }
2786+ }
2787+ return ok;
2788+}
2789+
2790+
2791+//MAIN
2792+
2793+#define TS_PKTS_FOR_TEST 30*1000
2794+//#define TS_PKTS_FOR_TEST 1000*1000
2795+unsigned char megabuf[188*TS_PKTS_FOR_TEST];
2796+unsigned char onebuf[188];
2797+
2798+unsigned char *cluster[10];
2799+
2800+int main(void){
2801+ int i;
2802+ struct timeval tvs,tve;
2803+ void *keys=get_key_struct();
2804+
2805+ fprintf(stderr,"FFdecsa 1.0: testing correctness and speed\n");
2806+
2807+/* begin correctness testing */
2808+
2809+ set_control_words(keys,test_invalid_key,test_1_key);
2810+ memcpy(onebuf,test_1_encrypted,188);
2811+ cluster[0]=onebuf;cluster[1]=onebuf+188;cluster[2]=NULL;
2812+ decrypt_packets(keys,cluster);
2813+ compare(onebuf,test_1_expected,188,0);
2814+
2815+ set_control_words(keys,test_2_key,test_invalid_key);
2816+ memcpy(onebuf,test_2_encrypted,188);
2817+ cluster[0]=onebuf;cluster[1]=onebuf+188;cluster[2]=NULL;
2818+ decrypt_packets(keys,cluster);
2819+ compare(onebuf,test_2_expected,188,0);
2820+
2821+ set_control_words(keys,test_3_key,test_invalid_key);
2822+ memcpy(onebuf,test_3_encrypted,188);
2823+ cluster[0]=onebuf;cluster[1]=onebuf+188;cluster[2]=NULL;
2824+ decrypt_packets(keys,cluster);
2825+ compare(onebuf,test_3_expected,188,0);
2826+
2827+ set_control_words(keys,test_p_10_0_key,test_invalid_key);
2828+ memcpy(onebuf,test_p_10_0_encrypted,188);
2829+ cluster[0]=onebuf;cluster[1]=onebuf+188;cluster[2]=NULL;
2830+ decrypt_packets(keys,cluster);
2831+ compare(onebuf,test_p_10_0_expected,188,0);
2832+
2833+ set_control_words(keys,test_p_1_6_key,test_invalid_key);
2834+ memcpy(onebuf,test_p_1_6_encrypted,188);
2835+ cluster[0]=onebuf;cluster[1]=onebuf+188;cluster[2]=NULL;
2836+ decrypt_packets(keys,cluster);
2837+ compare(onebuf,test_p_1_6_expected,188,0);
2838+
2839+/* begin speed testing */
2840+
2841+#if 0
2842+// test on short packets
2843+#define s_encrypted test_p_1_6_encrypted
2844+#define s_key_e test_p_1_6_key
2845+#define s_key_o test_invalid_key
2846+#define s_expected test_p_1_6_expected
2847+
2848+#else
2849+//test on full packets
2850+#define s_encrypted test_2_encrypted
2851+#define s_key_e test_2_key
2852+#define s_key_o test_invalid_key
2853+#define s_expected test_2_expected
2854+
2855+#endif
2856+
2857+ for(i=0;i<TS_PKTS_FOR_TEST;i++){
2858+ memcpy(&megabuf[188*i],s_encrypted,188);
2859+ }
2860+// test that packets are not shuffled around
2861+// so, let's put an undecryptable packet somewhere in the middle (we will use a wrong key)
2862+#define noONE_POISONED_PACKET
2863+#ifdef ONE_POISONED_PACKET
2864+ memcpy(&megabuf[188*(TS_PKTS_FOR_TEST*2/3)],test_3_encrypted,188);
2865+#endif
2866+
2867+ // start decryption
2868+ set_control_words(keys,s_key_e,s_key_o);
2869+ gettimeofday(&tvs,NULL);
2870+#if 0
2871+// force one by one
2872+ for(i=0;i<TS_PKTS_FOR_TEST;i++){
2873+ cluster[0]=megabuf+188*i;cluster[1]=onebuf+188*i+188;cluster[2]=NULL;
2874+ decrypt_packets(keys,cluster);
2875+ }
2876+#else
2877+ {
2878+ int done=0;
2879+ while(done<TS_PKTS_FOR_TEST){
2880+ //fprintf(stderr,"done=%i\n",done);
2881+ cluster[0]=megabuf+188*done;cluster[1]=megabuf+188*TS_PKTS_FOR_TEST;cluster[2]=NULL;
2882+ done+=decrypt_packets(keys,cluster);
2883+ }
2884+ }
2885+#endif
2886+ gettimeofday(&tve,NULL);
2887+ //end decryption
2888+
2889+ fprintf(stderr,"speed=%f Mbit/s\n",(184*TS_PKTS_FOR_TEST*8)/((tve.tv_sec-tvs.tv_sec)+1e-6*(tve.tv_usec-tvs.tv_usec))/1000000);
2890+ fprintf(stderr,"speed=%f pkts/s\n",TS_PKTS_FOR_TEST/((tve.tv_sec-tvs.tv_sec)+1e-6*(tve.tv_usec-tvs.tv_usec)));
2891+
2892+ // this packet couldn't be decrypted correctly
2893+#ifdef ONE_POISONED_PACKET
2894+ compare(megabuf+188*(TS_PKTS_FOR_TEST*2/3),test_3_expected,188,0); /* will fail because we used a wrong key */
2895+#endif
2896+ // these should be ok
2897+ compare(megabuf,s_expected,188,0);
2898+ compare(megabuf+188*511,s_expected,188,0);
2899+ compare(megabuf+188*512,s_expected,188,0);
2900+ compare(megabuf+188*319,s_expected,188,0);
2901+ compare(megabuf+188*(TS_PKTS_FOR_TEST-1),s_expected,188,0);
2902+
2903+ for(i=0;i<TS_PKTS_FOR_TEST;i++){
2904+ if(!compare(megabuf+188*i,s_expected,188,1)){
2905+ fprintf(stderr,"FAILED COMPARISON OF PACKET %10i\n",i);
2906+ };
2907+ }
2908+
2909+ return 0;
2910+}
2911Index: libs/libmythtv/dvbcam.cpp
2912===================================================================
2913--- libs/libmythtv/dvbcam.cpp.orig 2006-06-20 17:36:05.000000000 -0400
2914+++ libs/libmythtv/dvbcam.cpp 2006-06-20 17:36:06.000000000 -0400
2915@@ -374,6 +374,12 @@
2916 QString keyFifo = QDir::homeDirPath() +
2917 QString("/.mythtv/externcam_%1/keyfifo").arg(cardnum);
2918 unlink(keyFifo.ascii());
2919+ bool spawn_thread = false;
2920+ if(numslots==0) {
2921+ cmd += " --writekey " + keyFifo;
2922+ mkfifo(keyFifo.ascii(), 0600);
2923+ spawn_thread = true;
2924+ }
2925
2926 int tmppid[2];
2927 if (pipe(tmppid) < 0)
2928@@ -399,6 +405,9 @@
2929 }
2930 close(tmppid[0]);
2931 external_cam_fd=tmppid[1];
2932+ if(spawn_thread)
2933+ pthread_create(&externSoftkeyThread, NULL,
2934+ ExternalCamSoftkeyHelper, this);
2935
2936 //we need to fake ciThreadRunning in oreder to get PMTs
2937 ciThreadRunning = true;
2938@@ -415,7 +424,6 @@
2939 unsigned char newpmt[512];
2940 int hdr[34]; //Allow for up to 16 PIDs
2941 int pmtlen=0, hdrlen = 0;
2942- bool need_pcrpid = false;
2943
2944 hdr[hdrlen++]=pmt->ServiceID;
2945 hdr[hdrlen++]=1; //Place holder for transportID (future)
2946@@ -489,6 +497,43 @@
2947 }
2948 }
2949
2950+void *DVBCam::ExternalCamSoftkeyHelper(void*self)
2951+{
2952+ ((DVBCam*)self)->ExternalCamSoftkeyLoop();
2953+ return NULL;
2954+}
2955+void DVBCam::ExternalCamSoftkeyLoop()
2956+{
2957+ int keyfd;
2958+ unsigned char key[8];
2959+ int index, pid;
2960+ skThreadRunning = true;
2961+ QString keyFifo = QDir::homeDirPath() +
2962+ QString("/.mythtv/externcam_%1/keyfifo").arg(cardnum);
2963+ if ((keyfd=open(keyFifo.ascii(),O_RDONLY))<0) {
2964+ ERROR("Could not open keyfifo!");
2965+ skThreadRunning = false;
2966+ return;
2967+ }
2968+ while (!exitSkThread)
2969+ {
2970+ unsigned char keytype;
2971+ read(keyfd, &keytype,1);
2972+ index = 0;
2973+ pid = 0;
2974+ read(keyfd, &index, sizeof(int));
2975+ if(keytype == 'E' || keytype == 'O')
2976+ read(keyfd, key, 8);
2977+ else if (keytype == 'P')
2978+ read(keyfd, &pid, sizeof(int));
2979+
2980+ DVBRecorder::UpdateDeCSAKeys(cardnum, keytype, index, key, pid);
2981+ }
2982+ skThreadRunning = false;
2983+ GENERAL(QString("CA: External Softkey thread stopped"));
2984+ return;
2985+}
2986+
2987 void DVBCam::stopExternalCam()
2988 {
2989 if(external_cam_fd != -1) {
2990Index: libs/libmythtv/FFdecsa/parallel_064_2int.h
2991===================================================================
2992--- /dev/null 1970-01-01 00:00:00.000000000 +0000
2993+++ libs/libmythtv/FFdecsa/parallel_064_2int.h 2006-06-20 17:36:06.000000000 -0400
2994@@ -0,0 +1,175 @@
2995+/* FFdecsa -- fast decsa algorithm
2996+ *
2997+ * Copyright (C) 2003-2004 fatih89r
2998+ *
2999+ * This program is free software; you can redistribute it and/or modify
3000+ * it under the terms of the GNU General Public License as published by
3001+ * the Free Software Foundation; either version 2 of the License, or
3002+ * (at your option) any later version.
3003+ *
3004+ * This program is distributed in the hope that it will be useful,
3005+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3006+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3007+ * GNU General Public License for more details.
3008+ *
3009+ * You should have received a copy of the GNU General Public License
3010+ * along with this program; if not, write to the Free Software
3011+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
3012+ */
3013+
3014+
3015+struct group_t{
3016+ unsigned int s1;
3017+ unsigned int s2;
3018+};
3019+typedef struct group_t group;
3020+
3021+#define GROUP_PARALLELISM 64
3022+
3023+group static inline FF0(){
3024+ group res;
3025+ res.s1=0x0;
3026+ res.s2=0x0;
3027+ return res;
3028+}
3029+
3030+group static inline FF1(){
3031+ group res;
3032+ res.s1=0xffffffff;
3033+ res.s2=0xffffffff;
3034+ return res;
3035+}
3036+
3037+group static inline FFAND(group a,group b){
3038+ group res;
3039+ res.s1=a.s1&b.s1;
3040+ res.s2=a.s2&b.s2;
3041+ return res;
3042+}
3043+
3044+group static inline FFOR(group a,group b){
3045+ group res;
3046+ res.s1=a.s1|b.s1;
3047+ res.s2=a.s2|b.s2;
3048+ return res;
3049+}
3050+
3051+group static inline FFXOR(group a,group b){
3052+ group res;
3053+ res.s1=a.s1^b.s1;
3054+ res.s2=a.s2^b.s2;
3055+ return res;
3056+}
3057+
3058+group static inline FFNOT(group a){
3059+ group res;
3060+ res.s1=~a.s1;
3061+ res.s2=~a.s2;
3062+ return res;
3063+}
3064+
3065+
3066+/* 64 rows of 64 bits */
3067+
3068+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
3069+ *(((int *)tab)+2*g)=*((int *)data);
3070+ *(((int *)tab)+2*g+1)=*(((int *)data)+1);
3071+}
3072+
3073+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
3074+ *((int *)data)=*(((int *)tab)+2*g);
3075+ *(((int *)data)+1)=*(((int *)tab)+2*g+1);
3076+}
3077+
3078+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
3079+ int j;
3080+ for(j=0;j<n;j++){
3081+ *(data+j)^=*(tab+8*g+j);
3082+ }
3083+}
3084+
3085+struct batch_t{
3086+ unsigned int s1;
3087+ unsigned int s2;
3088+};
3089+typedef struct batch_t batch;
3090+
3091+#define BYTES_PER_BATCH 8
3092+
3093+batch static inline B_FFAND(batch a,batch b){
3094+ batch res;
3095+ res.s1=a.s1&b.s1;
3096+ res.s2=a.s2&b.s2;
3097+ return res;
3098+}
3099+
3100+batch static inline B_FFOR(batch a,batch b){
3101+ batch res;
3102+ res.s1=a.s1|b.s1;
3103+ res.s2=a.s2|b.s2;
3104+ return res;
3105+}
3106+
3107+batch static inline B_FFXOR(batch a,batch b){
3108+ batch res;
3109+ res.s1=a.s1^b.s1;
3110+ res.s2=a.s2^b.s2;
3111+ return res;
3112+}
3113+
3114+
3115+batch static inline B_FFN_ALL_29(){
3116+ batch res;
3117+ res.s1=0x29292929;
3118+ res.s2=0x29292929;
3119+ return res;
3120+}
3121+batch static inline B_FFN_ALL_02(){
3122+ batch res;
3123+ res.s1=0x02020202;
3124+ res.s2=0x02020202;
3125+ return res;
3126+}
3127+batch static inline B_FFN_ALL_04(){
3128+ batch res;
3129+ res.s1=0x04040404;
3130+ res.s2=0x04040404;
3131+ return res;
3132+}
3133+batch static inline B_FFN_ALL_10(){
3134+ batch res;
3135+ res.s1=0x10101010;
3136+ res.s2=0x10101010;
3137+ return res;
3138+}
3139+batch static inline B_FFN_ALL_40(){
3140+ batch res;
3141+ res.s1=0x40404040;
3142+ res.s2=0x40404040;
3143+ return res;
3144+}
3145+batch static inline B_FFN_ALL_80(){
3146+ batch res;
3147+ res.s1=0x80808080;
3148+ res.s2=0x80808080;
3149+ return res;
3150+}
3151+
3152+
3153+batch static inline B_FFSH8L(batch a,int n){
3154+ batch res;
3155+ res.s1=a.s1<<n;
3156+ res.s2=a.s2<<n;
3157+ return res;
3158+}
3159+
3160+batch static inline B_FFSH8R(batch a,int n){
3161+ batch res;
3162+ res.s1=a.s1>>n;
3163+ res.s2=a.s2>>n;
3164+ return res;
3165+}
3166+
3167+
3168+void static inline M_EMPTY(void){
3169+}
3170Index: libs/libmythtv/FFdecsa/logic/logic.c
3171===================================================================
3172--- /dev/null 1970-01-01 00:00:00.000000000 +0000
3173+++ libs/libmythtv/FFdecsa/logic/logic.c 2006-06-20 17:36:06.000000000 -0400
3174@@ -0,0 +1,330 @@
3175+/* logic -- synthetize logic functions with 4 inputs
3176+ *
3177+ * Copyright (C) 2003-2004 fatih89r
3178+ *
3179+ * This program is free software; you can redistribute it and/or modify
3180+ * it under the terms of the GNU General Public License as published by
3181+ * the Free Software Foundation; either version 2 of the License, or
3182+ * (at your option) any later version.
3183+ *
3184+ * This program is distributed in the hope that it will be useful,
3185+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3186+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3187+ * GNU General Public License for more details.
3188+ *
3189+ * You should have received a copy of the GNU General Public License
3190+ * along with this program; if not, write to the Free Software
3191+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
3192+ */
3193+
3194+
3195+
3196+
3197+/* Can we use negated inputs? */
3198+#define noNEGATEDTOO
3199+
3200+
3201+#include <stdio.h>
3202+
3203+
3204+/*
3205+ * abcd
3206+ */
3207+
3208+#define BINARY(b15,b14,b13,b12,b11,b10,b9,b8,b7,b6,b5,b4,b3,b2,b1,b0) \
3209+ ((b15)<<15)|((b14)<<14)|((b13)<<13)|((b12)<<12)| \
3210+ ((b11)<<11)|((b10)<<10)|((b9) << 9)|((b8) << 8)| \
3211+ ((b7) << 7)|((b6) << 6)|((b5) << 5)|((b4) << 4)| \
3212+ ((b3) << 3)|((b2) << 2)|((b1) << 1)|((b0) << 0)
3213+
3214+struct fun{
3215+ int level;
3216+ int op_type;
3217+ int op1;
3218+ int op2;
3219+};
3220+
3221+struct fun db[65536];
3222+int n_fun;
3223+
3224+#define LEVEL_ALOT 1000000
3225+
3226+#define OP_FALSE 0
3227+#define OP_TRUE 1
3228+#define OP_SRC 2
3229+#define OP_AND 3
3230+#define OP_OR 4
3231+#define OP_XOR 5
3232+
3233+#define SRC_A 10
3234+#define SRC_B 20
3235+#define SRC_C 30
3236+#define SRC_D 40
3237+#define SRC_AN 11
3238+#define SRC_BN 21
3239+#define SRC_CN 31
3240+#define SRC_DN 41
3241+
3242+void dump_element_prefix(int);
3243+void dump_element_infix(int);
3244+
3245+int main(void){
3246+ int i,j;
3247+ int l,p1,p2;
3248+ int candidate;
3249+ int max_p2_lev;
3250+
3251+ for(i=0;i<65536;i++){
3252+ db[i].level=LEVEL_ALOT;
3253+ }
3254+ n_fun=0;
3255+
3256+ db[0].level=0;
3257+ db[0].op_type=OP_FALSE;
3258+ n_fun++;
3259+
3260+ db[65535].level=0;
3261+ db[65535].op_type=OP_TRUE;
3262+ n_fun++;
3263+
3264+ db[BINARY(0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1)].level=0;
3265+ db[BINARY(0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1)].op_type=OP_SRC;
3266+ db[BINARY(0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1)].op1=SRC_A;
3267+ n_fun++;
3268+
3269+ db[BINARY(0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1)].level=0;
3270+ db[BINARY(0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1)].op_type=OP_SRC;
3271+ db[BINARY(0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1)].op1=SRC_B;
3272+ n_fun++;
3273+
3274+ db[BINARY(0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1)].level=0;
3275+ db[BINARY(0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1)].op_type=OP_SRC;
3276+ db[BINARY(0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1)].op1=SRC_C;
3277+ n_fun++;
3278+
3279+ db[BINARY(0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1)].level=0;
3280+ db[BINARY(0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1)].op_type=OP_SRC;
3281+ db[BINARY(0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1)].op1=SRC_D;
3282+ n_fun++;
3283+#ifdef NEGATEDTOO
3284+ db[BINARY(1,1,1,1, 1,1,1,1, 0,0,0,0, 0,0,0,0)].level=0;
3285+ db[BINARY(1,1,1,1, 1,1,1,1, 0,0,0,0, 0,0,0,0)].op_type=OP_SRC;
3286+ db[BINARY(1,1,1,1, 1,1,1,1, 0,0,0,0, 0,0,0,0)].op1=SRC_AN;
3287+ n_fun++;
3288+
3289+ db[BINARY(1,1,1,1, 0,0,0,0, 1,1,1,1, 0,0,0,0)].level=0;
3290+ db[BINARY(1,1,1,1, 0,0,0,0, 1,1,1,1, 0,0,0,0)].op_type=OP_SRC;
3291+ db[BINARY(1,1,1,1, 0,0,0,0, 1,1,1,1, 0,0,0,0)].op1=SRC_BN;
3292+ n_fun++;
3293+
3294+ db[BINARY(1,1,0,0, 1,1,0,0, 1,1,0,0, 1,1,0,0)].level=0;
3295+ db[BINARY(1,1,0,0, 1,1,0,0, 1,1,0,0, 1,1,0,0)].op_type=OP_SRC;
3296+ db[BINARY(1,1,0,0, 1,1,0,0, 1,1,0,0, 1,1,0,0)].op1=SRC_CN;
3297+ n_fun++;
3298+
3299+ db[BINARY(1,0,1,0, 1,0,1,0, 1,0,1,0, 1,0,1,0)].level=0;
3300+ db[BINARY(1,0,1,0, 1,0,1,0, 1,0,1,0, 1,0,1,0)].op_type=OP_SRC;
3301+ db[BINARY(1,0,1,0, 1,0,1,0, 1,0,1,0, 1,0,1,0)].op1=SRC_DN;
3302+ n_fun++;
3303+#endif
3304+
3305+ for(l=0;l<100;l++){
3306+ printf("calculating level %i\n",l);
3307+ for(p1=1;p1<65536;p1++){
3308+ if(db[p1].level==LEVEL_ALOT) continue;
3309+ max_p2_lev=l-db[p1].level-1;
3310+ for(p2=p1+1;p2<65536;p2++){
3311+ if(db[p2].level>max_p2_lev) continue;
3312+
3313+ candidate=p1&p2;
3314+ if(db[candidate].level==LEVEL_ALOT){
3315+ //found new
3316+ db[candidate].level=db[p1].level+db[p2].level+1;
3317+ db[candidate].op_type=OP_AND;
3318+ db[candidate].op1=p1;
3319+ db[candidate].op2=p2;
3320+ n_fun++;
3321+ }
3322+
3323+ candidate=p1|p2;
3324+ if(db[candidate].level==LEVEL_ALOT){
3325+ //found new
3326+ db[candidate].level=db[p1].level+db[p2].level+1;
3327+ db[candidate].op_type=OP_OR;
3328+ db[candidate].op1=p1;
3329+ db[candidate].op2=p2;
3330+ n_fun++;
3331+ }
3332+
3333+ candidate=p1^p2;
3334+ if(db[candidate].level==LEVEL_ALOT){
3335+ //found new
3336+ db[candidate].level=db[p1].level+db[p2].level+1;
3337+ db[candidate].op_type=OP_XOR;
3338+ db[candidate].op1=p1;
3339+ db[candidate].op2=p2;
3340+ n_fun++;
3341+ }
3342+
3343+ }
3344+ }
3345+ printf("num fun=%i\n\n",n_fun);
3346+ fflush(stdout);
3347+ if(n_fun>=65536) break;
3348+ }
3349+
3350+
3351+ for(i=0;i<65536;i++){
3352+ if(db[i].level==LEVEL_ALOT) continue;
3353+
3354+ printf("PREFIX ");
3355+ for(j=15;j>=0;j--){
3356+ printf("%i",i&(1<<j)?1:0);
3357+ if(j%4==0) printf(" ");
3358+ if(j%8==0) printf(" ");
3359+ }
3360+ printf(" : lev %2i: ",db[i].level);
3361+ dump_element_prefix(i);
3362+ printf("\n");
3363+
3364+ printf("INFIX ");
3365+ for(j=15;j>=0;j--){
3366+ printf("%i",i&(1<<j)?1:0);
3367+ if(j%4==0) printf(" ");
3368+ if(j%8==0) printf(" ");
3369+ }
3370+ printf(" : lev %2i: ",db[i].level);
3371+ dump_element_infix(i);
3372+ printf("\n");
3373+ }
3374+
3375+ return 0;
3376+}
3377+
3378+void dump_element_prefix(int e){
3379+ if(db[e].level==LEVEL_ALOT){
3380+ printf("PANIC!\n");
3381+ return;
3382+ };
3383+ switch(db[e].op_type){
3384+ case OP_FALSE:
3385+ printf("0");
3386+ break;
3387+ case OP_TRUE:
3388+ printf("1");
3389+ break;
3390+ case OP_SRC:
3391+ switch(db[e].op1){
3392+ case SRC_A:
3393+ printf("a");
3394+ break;
3395+ case SRC_B:
3396+ printf("b");
3397+ break;
3398+ case SRC_C:
3399+ printf("c");
3400+ break;
3401+ case SRC_D:
3402+ printf("d");
3403+ break;
3404+ case SRC_AN:
3405+ printf("an");
3406+ break;
3407+ case SRC_BN:
3408+ printf("bn");
3409+ break;
3410+ case SRC_CN:
3411+ printf("cn");
3412+ break;
3413+ case SRC_DN:
3414+ printf("dn");
3415+ break;
3416+ }
3417+ break;
3418+ case OP_AND:
3419+ printf("FFAND(");
3420+ dump_element_prefix(db[e].op1);
3421+ printf(",");
3422+ dump_element_prefix(db[e].op2);
3423+ printf(")");
3424+ break;
3425+ case OP_OR:
3426+ printf("FFOR(");
3427+ dump_element_prefix(db[e].op1);
3428+ printf(",");
3429+ dump_element_prefix(db[e].op2);
3430+ printf(")");
3431+ break;
3432+ case OP_XOR:
3433+ printf("FFXOR(");
3434+ dump_element_prefix(db[e].op1);
3435+ printf(",");
3436+ dump_element_prefix(db[e].op2);
3437+ printf(")");
3438+ break;
3439+ }
3440+}
3441+
3442+void dump_element_infix(int e){
3443+ if(db[e].level==LEVEL_ALOT){
3444+ printf("PANIC!\n");
3445+ return;
3446+ };
3447+ switch(db[e].op_type){
3448+ case OP_FALSE:
3449+ printf("0");
3450+ break;
3451+ case OP_TRUE:
3452+ printf("1");
3453+ break;
3454+ case OP_SRC:
3455+ switch(db[e].op1){
3456+ case SRC_A:
3457+ printf("a");
3458+ break;
3459+ case SRC_B:
3460+ printf("b");
3461+ break;
3462+ case SRC_C:
3463+ printf("c");
3464+ break;
3465+ case SRC_D:
3466+ printf("d");
3467+ break;
3468+ case SRC_AN:
3469+ printf("an");
3470+ break;
3471+ case SRC_BN:
3472+ printf("bn");
3473+ break;
3474+ case SRC_CN:
3475+ printf("cn");
3476+ break;
3477+ case SRC_DN:
3478+ printf("dn");
3479+ break;
3480+ }
3481+ break;
3482+ case OP_AND:
3483+ printf("( ");
3484+ dump_element_infix(db[e].op1);
3485+ printf("&");
3486+ dump_element_infix(db[e].op2);
3487+ printf(" )");
3488+ break;
3489+ case OP_OR:
3490+ printf("( ");
3491+ dump_element_infix(db[e].op1);
3492+ printf("|");
3493+ dump_element_infix(db[e].op2);
3494+ printf(" )");
3495+ break;
3496+ case OP_XOR:
3497+ printf("( ");
3498+ dump_element_infix(db[e].op1);
3499+ printf("^");
3500+ dump_element_infix(db[e].op2);
3501+ printf(" )");
3502+ break;
3503+ }
3504+}
3505Index: libs/libmythtv/FFdecsa/parallel_128_4int.h
3506===================================================================
3507--- /dev/null 1970-01-01 00:00:00.000000000 +0000
3508+++ libs/libmythtv/FFdecsa/parallel_128_4int.h 2006-06-20 17:36:06.000000000 -0400
3509@@ -0,0 +1,207 @@
3510+/* FFdecsa -- fast decsa algorithm
3511+ *
3512+ * Copyright (C) 2003-2004 fatih89r
3513+ *
3514+ * This program is free software; you can redistribute it and/or modify
3515+ * it under the terms of the GNU General Public License as published by
3516+ * the Free Software Foundation; either version 2 of the License, or
3517+ * (at your option) any later version.
3518+ *
3519+ * This program is distributed in the hope that it will be useful,
3520+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3521+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3522+ * GNU General Public License for more details.
3523+ *
3524+ * You should have received a copy of the GNU General Public License
3525+ * along with this program; if not, write to the Free Software
3526+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
3527+ */
3528+
3529+
3530+struct group_t{
3531+ unsigned int s1,s2,s3,s4;
3532+};
3533+typedef struct group_t group;
3534+
3535+#define GROUP_PARALLELISM 128
3536+
3537+group static inline FF0(){
3538+ group res;
3539+ res.s1=0x0;
3540+ res.s2=0x0;
3541+ res.s3=0x0;
3542+ res.s4=0x0;
3543+ return res;
3544+}
3545+
3546+group static inline FF1(){
3547+ group res;
3548+ res.s1=0xffffffff;
3549+ res.s2=0xffffffff;
3550+ res.s3=0xffffffff;
3551+ res.s4=0xffffffff;
3552+ return res;
3553+}
3554+
3555+group static inline FFAND(group a,group b){
3556+ group res;
3557+ res.s1=a.s1&b.s1;
3558+ res.s2=a.s2&b.s2;
3559+ res.s3=a.s3&b.s3;
3560+ res.s4=a.s4&b.s4;
3561+ return res;
3562+}
3563+
3564+group static inline FFOR(group a,group b){
3565+ group res;
3566+ res.s1=a.s1|b.s1;
3567+ res.s2=a.s2|b.s2;
3568+ res.s3=a.s3|b.s3;
3569+ res.s4=a.s4|b.s4;
3570+ return res;
3571+}
3572+
3573+group static inline FFXOR(group a,group b){
3574+ group res;
3575+ res.s1=a.s1^b.s1;
3576+ res.s2=a.s2^b.s2;
3577+ res.s3=a.s3^b.s3;
3578+ res.s4=a.s4^b.s4;
3579+ return res;
3580+}
3581+
3582+group static inline FFNOT(group a){
3583+ group res;
3584+ res.s1=~a.s1;
3585+ res.s2=~a.s2;
3586+ res.s3=~a.s3;
3587+ res.s4=~a.s4;
3588+ return res;
3589+}
3590+
3591+
3592+/* 64 rows of 128 bits */
3593+
3594+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
3595+ *(((int *)tab)+2*g)=*((int *)data);
3596+ *(((int *)tab)+2*g+1)=*(((int *)data)+1);
3597+}
3598+
3599+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
3600+ *((int *)data)=*(((int *)tab)+2*g);
3601+ *(((int *)data)+1)=*(((int *)tab)+2*g+1);
3602+}
3603+
3604+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
3605+ int j;
3606+ for(j=0;j<n;j++){
3607+ *(data+j)^=*(tab+8*g+j);
3608+ }
3609+}
3610+
3611+
3612+struct batch_t{
3613+ unsigned int s1,s2,s3,s4;
3614+};
3615+typedef struct batch_t batch;
3616+
3617+#define BYTES_PER_BATCH 16
3618+
3619+batch static inline B_FFAND(batch a,batch b){
3620+ batch res;
3621+ res.s1=a.s1&b.s1;
3622+ res.s2=a.s2&b.s2;
3623+ res.s3=a.s3&b.s3;
3624+ res.s4=a.s4&b.s4;
3625+ return res;
3626+}
3627+
3628+batch static inline B_FFOR(batch a,batch b){
3629+ batch res;
3630+ res.s1=a.s1|b.s1;
3631+ res.s2=a.s2|b.s2;
3632+ res.s3=a.s3|b.s3;
3633+ res.s4=a.s4|b.s4;
3634+ return res;
3635+}
3636+
3637+batch static inline B_FFXOR(batch a,batch b){
3638+ batch res;
3639+ res.s1=a.s1^b.s1;
3640+ res.s2=a.s2^b.s2;
3641+ res.s3=a.s3^b.s3;
3642+ res.s4=a.s4^b.s4;
3643+ return res;
3644+}
3645+
3646+
3647+batch static inline B_FFN_ALL_29(){
3648+ batch res;
3649+ res.s1=0x29292929;
3650+ res.s2=0x29292929;
3651+ res.s3=0x29292929;
3652+ res.s4=0x29292929;
3653+ return res;
3654+}
3655+batch static inline B_FFN_ALL_02(){
3656+ batch res;
3657+ res.s1=0x02020202;
3658+ res.s2=0x02020202;
3659+ res.s3=0x02020202;
3660+ res.s4=0x02020202;
3661+ return res;
3662+}
3663+batch static inline B_FFN_ALL_04(){
3664+ batch res;
3665+ res.s1=0x04040404;
3666+ res.s2=0x04040404;
3667+ res.s3=0x04040404;
3668+ res.s4=0x04040404;
3669+ return res;
3670+}
3671+batch static inline B_FFN_ALL_10(){
3672+ batch res;
3673+ res.s1=0x10101010;
3674+ res.s2=0x10101010;
3675+ res.s3=0x10101010;
3676+ res.s4=0x10101010;
3677+ return res;
3678+}
3679+batch static inline B_FFN_ALL_40(){
3680+ batch res;
3681+ res.s1=0x40404040;
3682+ res.s2=0x40404040;
3683+ res.s3=0x40404040;
3684+ res.s4=0x40404040;
3685+ return res;
3686+}
3687+batch static inline B_FFN_ALL_80(){
3688+ batch res;
3689+ res.s1=0x80808080;
3690+ res.s2=0x80808080;
3691+ res.s3=0x80808080;
3692+ res.s4=0x80808080;
3693+ return res;
3694+}
3695+
3696+batch static inline B_FFSH8L(batch a,int n){
3697+ batch res;
3698+ res.s1=a.s1<<n;
3699+ res.s2=a.s2<<n;
3700+ res.s3=a.s3<<n;
3701+ res.s4=a.s4<<n;
3702+ return res;
3703+}
3704+
3705+batch static inline B_FFSH8R(batch a,int n){
3706+ batch res;
3707+ res.s1=a.s1>>n;
3708+ res.s2=a.s2>>n;
3709+ res.s3=a.s3>>n;
3710+ res.s4=a.s4>>n;
3711+ return res;
3712+}
3713+
3714+
3715+void static inline M_EMPTY(void){
3716+}
3717Index: libs/libmythtv/FFdecsa/parallel_128_2mmx.h
3718===================================================================
3719--- /dev/null 1970-01-01 00:00:00.000000000 +0000
3720+++ libs/libmythtv/FFdecsa/parallel_128_2mmx.h 2006-06-20 17:36:06.000000000 -0400
3721@@ -0,0 +1,199 @@
3722+/* FFdecsa -- fast decsa algorithm
3723+ *
3724+ * Copyright (C) 2003-2004 fatih89r
3725+ *
3726+ * This program is free software; you can redistribute it and/or modify
3727+ * it under the terms of the GNU General Public License as published by
3728+ * the Free Software Foundation; either version 2 of the License, or
3729+ * (at your option) any later version.
3730+ *
3731+ * This program is distributed in the hope that it will be useful,
3732+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3733+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3734+ * GNU General Public License for more details.
3735+ *
3736+ * You should have received a copy of the GNU General Public License
3737+ * along with this program; if not, write to the Free Software
3738+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
3739+ */
3740+
3741+
3742+#include <mmintrin.h>
3743+
3744+struct group_t{
3745+ __m64 s1,s2;
3746+};
3747+typedef struct group_t group;
3748+
3749+#define GROUP_PARALLELISM 128
3750+
3751+group static inline FF0(){
3752+ group res;
3753+ res.s1=(__m64)0x0ULL;
3754+ res.s2=(__m64)0x0ULL;
3755+ return res;
3756+}
3757+
3758+group static inline FF1(){
3759+ group res;
3760+ res.s1=(__m64)0xffffffffffffffffULL;
3761+ res.s2=(__m64)0xffffffffffffffffULL;
3762+ return res;
3763+}
3764+
3765+group static inline FFAND(group a,group b){
3766+ group res;
3767+ res.s1=_m_pand(a.s1,b.s1);
3768+ res.s2=_m_pand(a.s2,b.s2);
3769+ return res;
3770+}
3771+
3772+group static inline FFOR(group a,group b){
3773+ group res;
3774+ res.s1=_m_por(a.s1,b.s1);
3775+ res.s2=_m_por(a.s2,b.s2);
3776+ return res;
3777+}
3778+
3779+group static inline FFXOR(group a,group b){
3780+ group res;
3781+ res.s1=_m_pxor(a.s1,b.s1);
3782+ res.s2=_m_pxor(a.s2,b.s2);
3783+ return res;
3784+}
3785+
3786+group static inline FFNOT(group a){
3787+ group res;
3788+ res.s1=_m_pxor(a.s1,FF1().s1);
3789+ res.s2=_m_pxor(a.s2,FF1().s2);
3790+ return res;
3791+}
3792+
3793+
3794+/* 64 rows of 128 bits */
3795+
3796+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
3797+ *(((int *)tab)+2*g)=*((int *)data);
3798+ *(((int *)tab)+2*g+1)=*(((int *)data)+1);
3799+}
3800+
3801+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
3802+ *((int *)data)=*(((int *)tab)+2*g);
3803+ *(((int *)data)+1)=*(((int *)tab)+2*g+1);
3804+}
3805+
3806+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
3807+ int j;
3808+ for(j=0;j<n;j++){
3809+ *(data+j)^=*(tab+8*g+j);
3810+ }
3811+}
3812+
3813+
3814+struct batch_t{
3815+ __m64 s1,s2;
3816+};
3817+typedef struct batch_t batch;
3818+
3819+#define BYTES_PER_BATCH 16
3820+
3821+batch static inline B_FFAND(batch a,batch b){
3822+ batch res;
3823+ res.s1=_m_pand(a.s1,b.s1);
3824+ res.s2=_m_pand(a.s2,b.s2);
3825+ return res;
3826+}
3827+
3828+batch static inline B_FFOR(batch a,batch b){
3829+ batch res;
3830+ res.s1=_m_por(a.s1,b.s1);
3831+ res.s2=_m_por(a.s2,b.s2);
3832+ return res;
3833+}
3834+
3835+batch static inline B_FFXOR(batch a,batch b){
3836+ batch res;
3837+ res.s1=_m_pxor(a.s1,b.s1);
3838+ res.s2=_m_pxor(a.s2,b.s2);
3839+ return res;
3840+}
3841+
3842+batch static inline B_FFN_ALL_29(){
3843+ batch res;
3844+ res.s1=(__m64)0x2929292929292929ULL;
3845+ res.s2=(__m64)0x2929292929292929ULL;
3846+ return res;
3847+}
3848+batch static inline B_FFN_ALL_02(){
3849+ batch res;
3850+ res.s1=(__m64)0x0202020202020202ULL;
3851+ res.s2=(__m64)0x0202020202020202ULL;
3852+ return res;
3853+}
3854+batch static inline B_FFN_ALL_04(){
3855+ batch res;
3856+ res.s1=(__m64)0x0404040404040404ULL;
3857+ res.s2=(__m64)0x0404040404040404ULL;
3858+ return res;
3859+}
3860+batch static inline B_FFN_ALL_10(){
3861+ batch res;
3862+ res.s1=(__m64)0x1010101010101010ULL;
3863+ res.s2=(__m64)0x1010101010101010ULL;
3864+ return res;
3865+}
3866+batch static inline B_FFN_ALL_40(){
3867+ batch res;
3868+ res.s1=(__m64)0x4040404040404040ULL;
3869+ res.s2=(__m64)0x4040404040404040ULL;
3870+ return res;
3871+}
3872+batch static inline B_FFN_ALL_80(){
3873+ batch res;
3874+ res.s1=(__m64)0x8080808080808080ULL;
3875+ res.s2=(__m64)0x8080808080808080ULL;
3876+ return res;
3877+}
3878+
3879+batch static inline B_FFSH8L(batch a,int n){
3880+ batch res;
3881+ res.s1=_m_psllqi(a.s1,n);
3882+ res.s2=_m_psllqi(a.s2,n);
3883+ return res;
3884+}
3885+
3886+batch static inline B_FFSH8R(batch a,int n){
3887+ batch res;
3888+ res.s1=_m_psrlqi(a.s1,n);
3889+ res.s2=_m_psrlqi(a.s2,n);
3890+ return res;
3891+}
3892+
3893+void static inline M_EMPTY(void){
3894+ _m_empty();
3895+}
3896+
3897+
3898+#undef XOR_8_BY
3899+#define XOR_8_BY(d,s1,s2) do{ __m64 *pd=(__m64 *)(d), *ps1=(__m64 *)(s1), *ps2=(__m64 *)(s2); \
3900+ *pd = _m_pxor( *ps1 , *ps2 ); }while(0)
3901+
3902+#undef XOREQ_8_BY
3903+#define XOREQ_8_BY(d,s) do{ __m64 *pd=(__m64 *)(d), *ps=(__m64 *)(s); \
3904+ *pd = _m_pxor( *ps, *pd ); }while(0)
3905+
3906+#undef COPY_8_BY
3907+#define COPY_8_BY(d,s) do{ __m64 *pd=(__m64 *)(d), *ps=(__m64 *)(s); \
3908+ *pd = *ps; }while(0)
3909+
3910+#undef BEST_SPAN
3911+#define BEST_SPAN 8
3912+
3913+#undef XOR_BEST_BY
3914+#define XOR_BEST_BY(d,s1,s2) do{ XOR_8_BY(d,s1,s2); }while(0);
3915+
3916+#undef XOREQ_BEST_BY
3917+#define XOREQ_BEST_BY(d,s) do{ XOREQ_8_BY(d,s); }while(0);
3918+
3919+#undef COPY_BEST_BY
3920+#define COPY_BEST_BY(d,s) do{ COPY_8_BY(d,s); }while(0);
3921Index: libs/libmythtv/FFdecsa/Makefile
3922===================================================================
3923--- /dev/null 1970-01-01 00:00:00.000000000 +0000
3924+++ libs/libmythtv/FFdecsa/Makefile 2006-06-20 17:36:06.000000000 -0400
3925@@ -0,0 +1,54 @@
3926+##### compiling with g++ gives a little more speed
3927+#COMPILER=gcc
3928+#COMPILER=g++
3929+
3930+###there are two functions which apparently don't want to be inlined
3931+#FLAGS=-O3 -march=athlon-xp -fexpensive-optimizations -funroll-loops -finline-limit=6000000 --param max-unrolled-insns=500
3932+#FLAGS=-O3 -march=athlon-xp -fexpensive-optimizations -funroll-loops --param max-unrolled-insns=500
3933+#FLAGS=-O3 -march=pentium3 -fexpensive-optimizations -funroll-loops
3934+
3935+###icc crashes for unknown reasons
3936+#COMPILER=/opt/intel_cc_80/bin/icc
3937+#FLAGS=-O3 -march=pentiumiii
3938+
3939+#FLAGS += -g
3940+#FLAGS += -fno-alias
3941+#FLAGS += -vec_report3
3942+#FLAGS += -Wall -Winline
3943+#FLAGS += -fomit-frame-pointer
3944+#FLAGS += -pg
3945+
3946+COMPILER ?= g++
3947+FLAGS ?= -Wall -O3 -march=pentium -mmmx -fomit-frame-pointer -fexpensive-optimizations -funroll-loops
3948+
3949+H_FILES = FFdecsa.h parallel_generic.h parallel_std_def.h \
3950+ parallel_032_4char.h \
3951+ parallel_032_int.h \
3952+ parallel_064_2int.h \
3953+ parallel_064_8charA.h \
3954+ parallel_064_8char.h \
3955+ parallel_064_long.h \
3956+ parallel_064_mmx.h \
3957+ parallel_128_16charA.h \
3958+ parallel_128_16char.h \
3959+ parallel_128_2long.h \
3960+ parallel_128_2mmx.h \
3961+ parallel_128_4int.h \
3962+ parallel_128_sse.h
3963+
3964+all: FFdecsa.o
3965+
3966+%.o: %.c
3967+ $(COMPILER) $(FLAGS) -c $<
3968+
3969+FFdecsa_test: FFdecsa_test.o FFdecsa.o
3970+ $(COMPILER) $(FLAGS) -o FFdecsa_test FFdecsa_test.o FFdecsa.o
3971+
3972+FFdecsa_test.o: FFdecsa_test.c FFdecsa.h FFdecsa_test_testcases.h
3973+FFdecsa.o: FFdecsa.c stream.c $(H_FILES)
3974+
3975+clean:
3976+ rm -f FFdecsa_test *.o
3977+
3978+test: FFdecsa_test
3979+ sync;usleep 200000;nice --19 ./FFdecsa_test
3980Index: libs/libmythtv/FFdecsa/vdr_patches/README_vdr.txt
3981===================================================================
3982--- /dev/null 1970-01-01 00:00:00.000000000 +0000
3983+++ libs/libmythtv/FFdecsa/vdr_patches/README_vdr.txt 2006-06-20 17:36:06.000000000 -0400
3984@@ -0,0 +1,58 @@
3985+-------
3986+FFdecsa
3987+-------
3988+
3989+This directory contains patches to use FFdecsa with vdr, by means of a
3990+new FFdecsa-based SoftCSA.
3991+
3992+You don't need a SoftCSA patch!!!
3993+
3994+Step by step instructions:
3995+
3996+- create a directory somewhere, we will call this dir $BASE
3997+
3998+- download vdr-1.3.11.tar.bz2 and put it in $BASE
3999+
4000+- download vdr-sc-0.3.15.tar.gz and put it in $BASE
4001+
4002+- download FFdecsa-1.0.0.tar.bz2 and put it in $BASE
4003+
4004+- cd $BASE
4005+
4006+- tar xvjf vdr-1.3.11.tar.bz2
4007+
4008+- cd vdr-1.3.11/PLUGINS/src/
4009+
4010+- tar xvzf ../../../vdr-sc-0.3.15.tar.gz
4011+
4012+- ln -s sc-0.3.15 sc
4013+
4014+- cd $BASE/vdr-1.3.11
4015+
4016+- tar xvjf ../FFdecsa-1.0.0.tar.bz2
4017+
4018+- ln -s FFdecsa-1.0.0 FFdecsa
4019+
4020+- patch -p1 <PLUGINS/src/sc-0.3.15/patches/vdr-1.3.10-sc.diff
4021+
4022+- patch -p1 <FFdecsa/vdr_patches/vdr-1.3.11-FFdecsa.diff
4023+
4024+- cd FFdecsa
4025+
4026+- optional: edit Makefile
4027+
4028+- make
4029+
4030+- ./FFdecsa_test
4031+
4032+- cd $BASE/vdr-1.3.11
4033+
4034+- cp Make.config.template Make.config
4035+
4036+- optional: edit Make.config
4037+
4038+- make
4039+
4040+- make plugins
4041+
4042+Good luck!
4043Index: libs/libmythtv/FFdecsa/tmp_autogenerated_stuff_FFdecsa.c
4044===================================================================
4045--- /dev/null 1970-01-01 00:00:00.000000000 +0000
4046+++ libs/libmythtv/FFdecsa/tmp_autogenerated_stuff_FFdecsa.c 2006-06-20 17:36:06.000000000 -0400
4047@@ -0,0 +1,790 @@
4048+/* FFdecsa -- fast decsa algorithm
4049+ *
4050+ * Copyright (C) 2003-2004 fatih89r
4051+ *
4052+ * This program is free software; you can redistribute it and/or modify
4053+ * it under the terms of the GNU General Public License as published by
4054+ * the Free Software Foundation; either version 2 of the License, or
4055+ * (at your option) any later version.
4056+ *
4057+ * This program is distributed in the hope that it will be useful,
4058+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4059+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4060+ * GNU General Public License for more details.
4061+ *
4062+ * You should have received a copy of the GNU General Public License
4063+ * along with this program; if not, write to the Free Software
4064+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
4065+ */
4066+
4067+
4068+#include <sys/types.h>
4069+#include <string.h>
4070+#include <stdio.h>
4071+
4072+#include "FFdecsa.h"
4073+
4074+#ifndef NULL
4075+#define NULL 0
4076+#endif
4077+
4078+// activate debug by changing the grep command there.
4079+// don't edit autogenerated files (name beginning with "_").
4080+
4081+//// parallelization stuff, large speed differences are possible
4082+// possible choices
4083+#define PARALLEL_32_4CHAR 320
4084+#define PARALLEL_32_4CHARA 321
4085+#define PARALLEL_32_INT 322
4086+#define PARALLEL_64_8CHAR 640
4087+#define PARALLEL_64_8CHARA 641
4088+#define PARALLEL_64_2INT 642
4089+#define PARALLEL_64_LONG 643
4090+#define PARALLEL_64_MMX 644
4091+#define PARALLEL_128_16CHAR 1280
4092+#define PARALLEL_128_16CHARA 1281
4093+#define PARALLEL_128_4INT 1282
4094+#define PARALLEL_128_2LONG 1283
4095+#define PARALLEL_128_2MMX 1284
4096+#define PARALLEL_128_SSE 1285
4097+
4098+//////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
4099+#define PARALLEL_MODE PARALLEL_64_MMX
4100+//////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
4101+
4102+#include "parallel_generic.h"
4103+//// conditionals
4104+#if PARALLEL_MODE==PARALLEL_32_4CHAR
4105+#include "parallel_032_4char.h"
4106+#elif PARALLEL_MODE==PARALLEL_32_4CHARA
4107+#include "parallel_032_4charA.h"
4108+#elif PARALLEL_MODE==PARALLEL_32_INT
4109+#include "parallel_032_int.h"
4110+#elif PARALLEL_MODE==PARALLEL_64_8CHAR
4111+#include "parallel_064_8char.h"
4112+#elif PARALLEL_MODE==PARALLEL_64_8CHARA
4113+#include "parallel_064_8charA.h"
4114+#elif PARALLEL_MODE==PARALLEL_64_2INT
4115+#include "parallel_064_2int.h"
4116+#elif PARALLEL_MODE==PARALLEL_64_LONG
4117+#include "parallel_064_long.h"
4118+#elif PARALLEL_MODE==PARALLEL_64_MMX
4119+#include "parallel_064_mmx.h"
4120+#elif PARALLEL_MODE==PARALLEL_128_16CHAR
4121+#include "parallel_128_16char.h"
4122+#elif PARALLEL_MODE==PARALLEL_128_16CHARA
4123+#include "parallel_128_16charA.h"
4124+#elif PARALLEL_MODE==PARALLEL_128_4INT
4125+#include "parallel_128_4int.h"
4126+#elif PARALLEL_MODE==PARALLEL_128_2LONG
4127+#include "parallel_128_2long.h"
4128+#elif PARALLEL_MODE==PARALLEL_128_2MMX
4129+#include "parallel_128_2mmx.h"
4130+#elif PARALLEL_MODE==PARALLEL_128_SSE
4131+#include "parallel_128_sse.h"
4132+#else
4133+#error "unknown/undefined parallel mode"
4134+#endif
4135+
4136+// stuff depending on conditionals
4137+
4138+#define BYTES_PER_GROUP (GROUP_PARALLELISM/8)
4139+#define BYPG BYTES_PER_GROUP
4140+#define BITS_PER_GROUP GROUP_PARALLELISM
4141+#define BIPG BITS_PER_GROUP
4142+
4143+
4144+//// debug tool
4145+
4146+static void dump_mem(unsigned char *string, unsigned char *p, int len, int linelen){
4147+ int i;
4148+ for(i=0;i<len;i++){
4149+ if(i%linelen==0&&i) fprintf(stderr,"\n");
4150+ if(i%linelen==0) fprintf(stderr,"%s %08x:",string,i);
4151+ else{
4152+ if(i%8==0) fprintf(stderr," ");
4153+ if(i%4==0) fprintf(stderr," ");
4154+ }
4155+ fprintf(stderr," %02x",p[i]);
4156+ }
4157+ if(i%linelen==0) fprintf(stderr,"\n");
4158+}
4159+
4160+//////////////////////////////////////////////////////////////////////////////////
4161+
4162+struct csa_key_t{
4163+ unsigned char ck[8];
4164+// used by stream
4165+ int iA[8]; // iA[0] is for A1, iA[7] is for A8
4166+ int iB[8]; // iB[0] is for B1, iB[7] is for B8
4167+// used by stream (group)
4168+ group ck_g[8][8]; // [byte][bit:0=LSB,7=MSB]
4169+ group iA_g[8][4]; // [0 for A1][0 for LSB]
4170+ group iB_g[8][4]; // [0 for B1][0 for LSB]
4171+// used by block
4172+ unsigned char kk[56];
4173+// used by block (group)
4174+ __attribute__((aligned(16))) batch kkmulti[56]; // many times the same byte in every batch
4175+};
4176+
4177+static struct csa_keys_t{
4178+ struct csa_key_t even;
4179+ struct csa_key_t odd;
4180+} keys;
4181+
4182+
4183+//-----stream cypher
4184+
4185+//-----key schedule for stream decypher
4186+static void key_schedule_stream(
4187+ unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
4188+ int *iA, // [Out] iA[0]-iA[7] 8 nibbles | Key schedule.
4189+ int *iB) // [Out] iB[0]-iB[7] 8 nibbles | Key schedule.
4190+{
4191+ iA[0]=(ck[0]>>4)&0xf;
4192+ iA[1]=(ck[0] )&0xf;
4193+ iA[2]=(ck[1]>>4)&0xf;
4194+ iA[3]=(ck[1] )&0xf;
4195+ iA[4]=(ck[2]>>4)&0xf;
4196+ iA[5]=(ck[2] )&0xf;
4197+ iA[6]=(ck[3]>>4)&0xf;
4198+ iA[7]=(ck[3] )&0xf;
4199+ iB[0]=(ck[4]>>4)&0xf;
4200+ iB[1]=(ck[4] )&0xf;
4201+ iB[2]=(ck[5]>>4)&0xf;
4202+ iB[3]=(ck[5] )&0xf;
4203+ iB[4]=(ck[6]>>4)&0xf;
4204+ iB[5]=(ck[6] )&0xf;
4205+ iB[6]=(ck[7]>>4)&0xf;
4206+ iB[7]=(ck[7] )&0xf;
4207+}
4208+
4209+//----- stream main function
4210+
4211+#define STREAM_INIT
4212+#include "tmp_autogenerated_stuff_stream.c"
4213+#undef STREAM_INIT
4214+
4215+#define STREAM_NORMAL
4216+#include "tmp_autogenerated_stuff_stream.c"
4217+#undef STREAM_NORMAL
4218+
4219+
4220+//-----block decypher
4221+
4222+//-----key schedule for block decypher
4223+
4224+static void key_schedule_block(
4225+ unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
4226+ unsigned char *kk) // [Out] kk[0]-kk[55] 56 bytes | Key schedule.
4227+{
4228+ static const unsigned char key_perm[0x40] = {
4229+ 0x12,0x24,0x09,0x07,0x2A,0x31,0x1D,0x15, 0x1C,0x36,0x3E,0x32,0x13,0x21,0x3B,0x40,
4230+ 0x18,0x14,0x25,0x27,0x02,0x35,0x1B,0x01, 0x22,0x04,0x0D,0x0E,0x39,0x28,0x1A,0x29,
4231+ 0x33,0x23,0x34,0x0C,0x16,0x30,0x1E,0x3A, 0x2D,0x1F,0x08,0x19,0x17,0x2F,0x3D,0x11,
4232+ 0x3C,0x05,0x38,0x2B,0x0B,0x06,0x0A,0x2C, 0x20,0x3F,0x2E,0x0F,0x03,0x26,0x10,0x37,
4233+ };
4234+
4235+ int i,j,k;
4236+ int bit[64];
4237+ int newbit[64];
4238+ int kb[7][8];
4239+
4240+ // 56 steps
4241+ // 56 key bytes kk(55)..kk(0) by key schedule from ck
4242+
4243+ // kb(6,0) .. kb(6,7) = ck(0) .. ck(7)
4244+ kb[6][0] = ck[0];
4245+ kb[6][1] = ck[1];
4246+ kb[6][2] = ck[2];
4247+ kb[6][3] = ck[3];
4248+ kb[6][4] = ck[4];
4249+ kb[6][5] = ck[5];
4250+ kb[6][6] = ck[6];
4251+ kb[6][7] = ck[7];
4252+
4253+ // calculate kb[5] .. kb[0]
4254+ for(i=5; i>=0; i--){
4255+ // 64 bit perm on kb
4256+ for(j=0; j<8; j++){
4257+ for(k=0; k<8; k++){
4258+ bit[j*8+k] = (kb[i+1][j] >> (7-k)) & 1;
4259+ newbit[key_perm[j*8+k]-1] = bit[j*8+k];
4260+ }
4261+ }
4262+ for(j=0; j<8; j++){
4263+ kb[i][j] = 0;
4264+ for(k=0; k<8; k++){
4265+ kb[i][j] |= newbit[j*8+k] << (7-k);
4266+ }
4267+ }
4268+ }
4269+
4270+ // xor to give kk
4271+ for(i=0; i<7; i++){
4272+ for(j=0; j<8; j++){
4273+ kk[i*8+j] = kb[i][j] ^ i;
4274+ }
4275+ }
4276+
4277+}
4278+
4279+//-----block utils
4280+
4281+static inline __attribute__((always_inline)) void trasp_N_8 (unsigned char *in,unsigned char* out,int count){
4282+ int *ri=(int *)in;
4283+ int *ibi=(int *)out;
4284+ int j,i,k,g;
4285+ // copy and first step
4286+ for(g=0;g<count;g++){
4287+ ri[g]=ibi[2*g];
4288+ ri[GROUP_PARALLELISM+g]=ibi[2*g+1];
4289+ }
4290+//dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
4291+// now 01230123
4292+#define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
4293+ for(j=0;j<8;j+=4){
4294+ for(i=0;i<2;i++){
4295+ for(k=0;k<INTS_PER_ROW;k++){
4296+ unsigned int t,b;
4297+ t=ri[INTS_PER_ROW*(j+i)+k];
4298+ b=ri[INTS_PER_ROW*(j+i+2)+k];
4299+ ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
4300+ ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
4301+ }
4302+ }
4303+ }
4304+//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
4305+// now 01010101
4306+ for(j=0;j<8;j+=2){
4307+ for(i=0;i<1;i++){
4308+ for(k=0;k<INTS_PER_ROW;k++){
4309+ unsigned int t,b;
4310+ t=ri[INTS_PER_ROW*(j+i)+k];
4311+ b=ri[INTS_PER_ROW*(j+i+1)+k];
4312+ ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
4313+ ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
4314+ }
4315+ }
4316+ }
4317+//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
4318+// now 00000000
4319+}
4320+
4321+static inline __attribute__((always_inline)) void trasp_8_N (unsigned char *in,unsigned char* out,int count){
4322+ int *ri=(int *)in;
4323+ int *bdi=(int *)out;
4324+ int j,i,k,g;
4325+#define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
4326+//dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
4327+// now 00000000
4328+ for(j=0;j<8;j+=2){
4329+ for(i=0;i<1;i++){
4330+ for(k=0;k<INTS_PER_ROW;k++){
4331+ unsigned int t,b;
4332+ t=ri[INTS_PER_ROW*(j+i)+k];
4333+ b=ri[INTS_PER_ROW*(j+i+1)+k];
4334+ ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
4335+ ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
4336+ }
4337+ }
4338+ }
4339+//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
4340+// now 01010101
4341+ for(j=0;j<8;j+=4){
4342+ for(i=0;i<2;i++){
4343+ for(k=0;k<INTS_PER_ROW;k++){
4344+ unsigned int t,b;
4345+ t=ri[INTS_PER_ROW*(j+i)+k];
4346+ b=ri[INTS_PER_ROW*(j+i+2)+k];
4347+ ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
4348+ ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
4349+ }
4350+ }
4351+ }
4352+//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
4353+// now 01230123
4354+ for(g=0;g<count;g++){
4355+ bdi[2*g]=ri[g];
4356+ bdi[2*g+1]=ri[GROUP_PARALLELISM+g];
4357+ }
4358+}
4359+
4360+//-----block main function
4361+
4362+// block group
4363+static void block_decypher_group(
4364+ batch *kkmulti, // [In] kkmulti[0]-kkmulti[55] 56 batches | Key schedule (each batch has repeated equal bytes).
4365+ unsigned char *ib, // [In] (ib0,ib1,...ib7)...x32 32*8 bytes | Initialization vector.
4366+ unsigned char *bd, // [Out] (bd0,bd1,...bd7)...x32 32*8 bytes | Block decipher.
4367+ int count)
4368+{
4369+ // int is faster than unsigned char. apparently not
4370+ static const unsigned char block_sbox[0x100] = {
4371+ 0x3A,0xEA,0x68,0xFE,0x33,0xE9,0x88,0x1A, 0x83,0xCF,0xE1,0x7F,0xBA,0xE2,0x38,0x12,
4372+ 0xE8,0x27,0x61,0x95,0x0C,0x36,0xE5,0x70, 0xA2,0x06,0x82,0x7C,0x17,0xA3,0x26,0x49,
4373+ 0xBE,0x7A,0x6D,0x47,0xC1,0x51,0x8F,0xF3, 0xCC,0x5B,0x67,0xBD,0xCD,0x18,0x08,0xC9,
4374+ 0xFF,0x69,0xEF,0x03,0x4E,0x48,0x4A,0x84, 0x3F,0xB4,0x10,0x04,0xDC,0xF5,0x5C,0xC6,
4375+ 0x16,0xAB,0xAC,0x4C,0xF1,0x6A,0x2F,0x3C, 0x3B,0xD4,0xD5,0x94,0xD0,0xC4,0x63,0x62,
4376+ 0x71,0xA1,0xF9,0x4F,0x2E,0xAA,0xC5,0x56, 0xE3,0x39,0x93,0xCE,0x65,0x64,0xE4,0x58,
4377+ 0x6C,0x19,0x42,0x79,0xDD,0xEE,0x96,0xF6, 0x8A,0xEC,0x1E,0x85,0x53,0x45,0xDE,0xBB,
4378+ 0x7E,0x0A,0x9A,0x13,0x2A,0x9D,0xC2,0x5E, 0x5A,0x1F,0x32,0x35,0x9C,0xA8,0x73,0x30,
4379+
4380+ 0x29,0x3D,0xE7,0x92,0x87,0x1B,0x2B,0x4B, 0xA5,0x57,0x97,0x40,0x15,0xE6,0xBC,0x0E,
4381+ 0xEB,0xC3,0x34,0x2D,0xB8,0x44,0x25,0xA4, 0x1C,0xC7,0x23,0xED,0x90,0x6E,0x50,0x00,
4382+ 0x99,0x9E,0x4D,0xD9,0xDA,0x8D,0x6F,0x5F, 0x3E,0xD7,0x21,0x74,0x86,0xDF,0x6B,0x05,
4383+ 0x8E,0x5D,0x37,0x11,0xD2,0x28,0x75,0xD6, 0xA7,0x77,0x24,0xBF,0xF0,0xB0,0x02,0xB7,
4384+ 0xF8,0xFC,0x81,0x09,0xB1,0x01,0x76,0x91, 0x7D,0x0F,0xC8,0xA0,0xF2,0xCB,0x78,0x60,
4385+ 0xD1,0xF7,0xE0,0xB5,0x98,0x22,0xB3,0x20, 0x1D,0xA6,0xDB,0x7B,0x59,0x9F,0xAE,0x31,
4386+ 0xFB,0xD3,0xB6,0xCA,0x43,0x72,0x07,0xF4, 0xD8,0x41,0x14,0x55,0x0D,0x54,0x8B,0xB9,
4387+ 0xAD,0x46,0x0B,0xAF,0x80,0x52,0x2C,0xFA, 0x8C,0x89,0x66,0xFD,0xB2,0xA9,0x9B,0xC0,
4388+ };
4389+ unsigned char r[GROUP_PARALLELISM*(8+56)]; /* 56 because we will move back in memory while looping */
4390+ unsigned char sbox_in[GROUP_PARALLELISM],sbox_out[GROUP_PARALLELISM],perm_out[GROUP_PARALLELISM];
4391+ int roff;
4392+ int i,g,count_all=GROUP_PARALLELISM;
4393+
4394+ roff=GROUP_PARALLELISM*56;
4395+
4396+#define FASTTRASP1
4397+#ifndef FASTTRASP1
4398+ for(g=0;g<count;g++){
4399+ // Init registers
4400+ int j;
4401+ for(j=0;j<8;j++){
4402+ r[roff+GROUP_PARALLELISM*j+g]=ib[8*g+j];
4403+ }
4404+ }
4405+#else
4406+ trasp_N_8((unsigned char *)&r[roff],(unsigned char *)ib,count);
4407+#endif
4408+//dump_mem("OLD r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
4409+
4410+ // loop over kk[55]..kk[0]
4411+ for(i=55;i>=0;i--){
4412+ {
4413+ batch tkkmulti=kkmulti[i];
4414+ batch *si=(batch *)sbox_in;
4415+ batch *r6_N=(batch *)(r+roff+GROUP_PARALLELISM*6);
4416+ for(g=0;g<count_all/BYTES_PER_BATCH;g++){
4417+ si[g]=B_FFXOR(tkkmulti,r6_N[g]); //FIXME: introduce FASTBATCH?
4418+ }
4419+ }
4420+
4421+ // table lookup, this works on only one byte at a time
4422+ // most difficult part of all
4423+ // - can't be parallelized
4424+ // - can't be synthetized through boolean terms (8 input bits are too many)
4425+ for(g=0;g<count_all;g++){
4426+ sbox_out[g]=block_sbox[sbox_in[g]];
4427+ }
4428+
4429+ // bit permutation
4430+ {
4431+ unsigned char *po=(unsigned char *)perm_out;
4432+ unsigned char *so=(unsigned char *)sbox_out;
4433+//dump_mem("pre perm ",(unsigned char *)so,GROUP_PARALLELISM,GROUP_PARALLELISM);
4434+ for(g=0;g<count_all;g+=BYTES_PER_BATCH){
4435+ batch in,out;
4436+ in=*(batch *)&so[g];
4437+
4438+ out=B_FFOR(
4439+ B_FFOR(
4440+ B_FFOR(
4441+ B_FFOR(
4442+ B_FFOR(
4443+ B_FFSH8L(B_FFAND(in,B_FFN_ALL_29()),1),
4444+ B_FFSH8L(B_FFAND(in,B_FFN_ALL_02()),6)),
4445+ B_FFSH8L(B_FFAND(in,B_FFN_ALL_04()),3)),
4446+ B_FFSH8R(B_FFAND(in,B_FFN_ALL_10()),2)),
4447+ B_FFSH8R(B_FFAND(in,B_FFN_ALL_40()),6)),
4448+ B_FFSH8R(B_FFAND(in,B_FFN_ALL_80()),4));
4449+
4450+ *(batch *)&po[g]=out;
4451+ }
4452+//dump_mem("post perm",(unsigned char *)po,GROUP_PARALLELISM,GROUP_PARALLELISM);
4453+ }
4454+
4455+ roff-=GROUP_PARALLELISM; /* virtual shift of registers */
4456+
4457+#if 0
4458+/* one by one */
4459+ for(g=0;g<count_all;g++){
4460+ r[roff+GROUP_PARALLELISM*0+g]=r[roff+GROUP_PARALLELISM*8+g]^sbox_out[g];
4461+ r[roff+GROUP_PARALLELISM*6+g]^=perm_out[g];
4462+ r[roff+GROUP_PARALLELISM*4+g]^=r[roff+GROUP_PARALLELISM*0+g];
4463+ r[roff+GROUP_PARALLELISM*3+g]^=r[roff+GROUP_PARALLELISM*0+g];
4464+ r[roff+GROUP_PARALLELISM*2+g]^=r[roff+GROUP_PARALLELISM*0+g];
4465+ }
4466+#else
4467+ for(g=0;g<count_all;g+=BEST_SPAN){
4468+ XOR_BEST_BY(&r[roff+GROUP_PARALLELISM*0+g],&r[roff+GROUP_PARALLELISM*8+g],&sbox_out[g]);
4469+ XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*6+g],&perm_out[g]);
4470+ XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*4+g],&r[roff+GROUP_PARALLELISM*0+g]);
4471+ XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*3+g],&r[roff+GROUP_PARALLELISM*0+g]);
4472+ XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*2+g],&r[roff+GROUP_PARALLELISM*0+g]);
4473+ }
4474+#endif
4475+ }
4476+
4477+#define FASTTRASP2
4478+#ifndef FASTTRASP2
4479+ for(g=0;g<count;g++){
4480+ // Copy results
4481+ int j;
4482+ for(j=0;j<8;j++){
4483+ bd[8*g+j]=r[roff+GROUP_PARALLELISM*j+g];
4484+ }
4485+ }
4486+#else
4487+ trasp_8_N((unsigned char *)&r[roff],(unsigned char *)bd,count);
4488+#endif
4489+}
4490+
4491+//-----------------------------------EXTERNAL INTERFACE
4492+
4493+//-----get internal parallelism
4494+
4495+int get_internal_parallelism(void){
4496+ return GROUP_PARALLELISM;
4497+}
4498+
4499+//-----get suggested cluster size
4500+
4501+int get_suggested_cluster_size(void){
4502+ int r;
4503+ r=GROUP_PARALLELISM+GROUP_PARALLELISM/10;
4504+ if(r<GROUP_PARALLELISM+5) r=GROUP_PARALLELISM+5;
4505+ return r;
4506+}
4507+
4508+//-----set control words
4509+
4510+void set_control_words(unsigned char *ev, unsigned char *od){
4511+ // could be made faster, but is not run often
4512+ int bi,by;
4513+ int i,j;
4514+// key
4515+ memcpy(keys.even.ck,ev,8);
4516+ memcpy(keys.odd.ck,od,8);
4517+// precalculations for stream
4518+ key_schedule_stream(keys.even.ck,keys.even.iA,keys.even.iB);
4519+ key_schedule_stream(keys.odd.ck,keys.odd.iA,keys.odd.iB);
4520+ for(by=0;by<8;by++){
4521+ for(bi=0;bi<8;bi++){
4522+ keys.even.ck_g[by][bi]=(keys.even.ck[by]&(1<<bi))?FF1():FF0();
4523+ keys.odd.ck_g[by][bi]=(keys.odd.ck[by]&(1<<bi))?FF1():FF0();
4524+ }
4525+ }
4526+ for(by=0;by<8;by++){
4527+ for(bi=0;bi<4;bi++){
4528+ keys.even.iA_g[by][bi]=(keys.even.iA[by]&(1<<bi))?FF1():FF0();
4529+ keys.odd.iA_g[by][bi]=(keys.odd.iA[by]&(1<<bi))?FF1():FF0();
4530+ keys.even.iB_g[by][bi]=(keys.even.iB[by]&(1<<bi))?FF1():FF0();
4531+ keys.odd.iB_g[by][bi]=(keys.odd.iB[by]&(1<<bi))?FF1():FF0();
4532+ }
4533+ }
4534+// precalculations for block
4535+ key_schedule_block(keys.even.ck,keys.even.kk);
4536+ key_schedule_block(keys.odd.ck,keys.odd.kk);
4537+ for(i=0;i<56;i++){
4538+ for(j=0;j<BYTES_PER_BATCH;j++){
4539+ *(((unsigned char *)&keys.even.kkmulti[i])+j)=keys.even.kk[i];
4540+ *(((unsigned char *)&keys.odd.kkmulti[i])+j)=keys.odd.kk[i];
4541+ }
4542+ }
4543+}
4544+
4545+//-----get control words
4546+
4547+void get_control_words(unsigned char *even, unsigned char *odd){
4548+ memcpy(even,keys.even.ck,8);
4549+ memcpy(odd,keys.odd.ck,8);
4550+}
4551+
4552+//----- decrypt
4553+
4554+int decrypt_packets(unsigned char **cluster){
4555+ // statistics, currently unused
4556+ int stat_no_scramble=0;
4557+ int stat_reserved=0;
4558+ int stat_decrypted[2]={0,0};
4559+ int stat_decrypted_mini=0;
4560+ unsigned char **clst;
4561+ unsigned char **clst2;
4562+ int grouped;
4563+ int group_ev_od;
4564+ int advanced;
4565+ int can_advance;
4566+ unsigned char *g_pkt[GROUP_PARALLELISM];
4567+ int g_len[GROUP_PARALLELISM];
4568+ int g_offset[GROUP_PARALLELISM];
4569+ int g_n[GROUP_PARALLELISM];
4570+ int g_residue[GROUP_PARALLELISM];
4571+ unsigned char *pkt;
4572+ int xc0,ev_od,len,offset,n,residue;
4573+ struct csa_key_t* k;
4574+ int i,j,iter,g;
4575+ int t23,tsmall;
4576+ int alive[24];
4577+//icc craziness int pad1=0; //////////align! FIXME
4578+ unsigned char *encp[GROUP_PARALLELISM];
4579+ unsigned char stream_in[GROUP_PARALLELISM*8];
4580+ unsigned char stream_out[GROUP_PARALLELISM*8];
4581+ unsigned char ib[GROUP_PARALLELISM*8];
4582+ unsigned char block_out[GROUP_PARALLELISM*8];
4583+
4584+//icc craziness i=(int)&pad1;//////////align!!! FIXME
4585+
4586+ // build a list of packets to be processed
4587+ clst=cluster;
4588+ grouped=0;
4589+ advanced=0;
4590+ can_advance=1;
4591+ group_ev_od=-1; // silence incorrect compiler warning
4592+ pkt=*clst;
4593+ do{ // find a new packet
4594+ if(grouped==GROUP_PARALLELISM){
4595+ // full
4596+ break;
4597+ }
4598+ if(pkt==NULL){
4599+ // no more ranges
4600+ break;
4601+ }
4602+ if(pkt>=*(clst+1)){
4603+ // out of this range, try next
4604+ clst++;clst++;
4605+ pkt=*clst;
4606+ continue;
4607+ }
4608+
4609+ do{ // handle this packet
4610+ xc0=pkt[3]&0xc0;
4611+ if(xc0==0x00){
4612+ advanced+=can_advance;
4613+ stat_no_scramble++;
4614+ break;
4615+ }
4616+ if(xc0==0x40){
4617+ advanced+=can_advance;
4618+ stat_reserved++;
4619+ break;
4620+ }
4621+ if(xc0==0x80||xc0==0xc0){ // encrypted
4622+ ev_od=(xc0&0x40)>>6; // 0 even, 1 odd
4623+ if(grouped==0) group_ev_od=ev_od; // this group will be all even (or odd)
4624+ if(group_ev_od==ev_od){ // could be added to group
4625+ pkt[3]&=0x3f; // consider it decrypted now
4626+ if(pkt[3]&0x20){ // incomplete packet
4627+ offset=4+pkt[4]+1;
4628+ len=188-offset;
4629+ n=len>>3;
4630+ residue=len-(n<<3);
4631+ if(n==0){ // decrypted==encrypted!
4632+ advanced+=can_advance;
4633+ stat_decrypted_mini++;
4634+ break; // this doesn't need more processing
4635+ }
4636+ }else{
4637+ len=184;
4638+ offset=4;
4639+ n=23;
4640+ residue=0;
4641+ }
4642+ g_pkt[grouped]=pkt;
4643+ g_len[grouped]=len;
4644+ g_offset[grouped]=offset;
4645+ g_n[grouped]=n;
4646+ g_residue[grouped]=residue;
4647+ grouped++;
4648+ advanced+=can_advance;
4649+ stat_decrypted[ev_od]++;
4650+ }
4651+ else{
4652+ can_advance=0;
4653+ break; // skip and go on
4654+ }
4655+ }
4656+ } while(0);
4657+
4658+ if(can_advance){
4659+ // move range start forward
4660+ *clst+=188;
4661+ }
4662+ // next packet, if there is one
4663+ pkt+=188;
4664+ } while(1);
4665+
4666+ // delete empty ranges and compact list
4667+ clst2=cluster;
4668+ for(clst=cluster;*clst!=NULL;clst+=2){
4669+ // if not empty
4670+ if(*clst<*(clst+1)){
4671+ // it will remain
4672+ *clst2=*clst;
4673+ *(clst2+1)=*(clst+1);
4674+ clst2+=2;
4675+ }
4676+ }
4677+ *clst2=NULL;
4678+
4679+ if(grouped==0){
4680+ // no processing needed
4681+ return advanced;
4682+ }
4683+
4684+ // sort them, longest payload first
4685+ // we expect many n=23 packets and a few n<23
4686+ // grouped is always <= GROUP_PARALLELISM
4687+
4688+#define g_swap(a,b) \
4689+ pkt=g_pkt[a]; \
4690+ g_pkt[a]=g_pkt[b]; \
4691+ g_pkt[b]=pkt; \
4692+\
4693+ len=g_len[a]; \
4694+ g_len[a]=g_len[b]; \
4695+ g_len[b]=len; \
4696+\
4697+ offset=g_offset[a]; \
4698+ g_offset[a]=g_offset[b]; \
4699+ g_offset[b]=offset; \
4700+\
4701+ n=g_n[a]; \
4702+ g_n[a]=g_n[b]; \
4703+ g_n[b]=n; \
4704+\
4705+ residue=g_residue[a]; \
4706+ g_residue[a]=g_residue[b]; \
4707+ g_residue[b]=residue;
4708+
4709+ // step 1: move n=23 packets before small packets
4710+ t23=0;
4711+ tsmall=grouped-1;
4712+ for(;;){
4713+ for(;t23<grouped;t23++){
4714+ if(g_n[t23]!=23) break;
4715+ }
4716+
4717+ for(;tsmall>=0;tsmall--){
4718+ if(g_n[tsmall]==23) break;
4719+ }
4720+
4721+ if(tsmall-t23<1) break;
4722+
4723+
4724+ g_swap(t23,tsmall);
4725+
4726+ t23++;
4727+ tsmall--;
4728+ }
4729+
4730+ // step 2: sort small packets in decreasing order of n (bubble sort is enough)
4731+ for(i=t23;i<grouped;i++){
4732+ for(j=i+1;j<grouped;j++){
4733+ if(g_n[j]>g_n[i]){
4734+ g_swap(i,j);
4735+ }
4736+ }
4737+ }
4738+
4739+ // we need to know how many packets need 23 iterations, how many 22...
4740+ for(i=0;i<=23;i++){
4741+ alive[i]=0;
4742+ }
4743+ // count
4744+ alive[23-1]=t23;
4745+ for(i=t23;i<grouped;i++){
4746+ alive[g_n[i]-1]++;
4747+ }
4748+ // integrate
4749+ for(i=22;i>=0;i--){
4750+ alive[i]+=alive[i+1];
4751+ }
4752+
4753+ // choose key
4754+ if(group_ev_od==0){
4755+ k=&keys.even;
4756+ }
4757+ else{
4758+ k=&keys.odd;
4759+ }
4760+
4761+ //INIT
4762+#define INITIALIZE_UNUSED_INPUT
4763+#ifdef INITIALIZE_UNUSED_INPUT
4764+// unnecessary zeroing.
4765+// without this, we operate on uninitialized memory
4766+// when grouped<GROUP_PARALLELISM, but it's not a problem,
4767+// as final results will be discarded.
4768+// random data makes debugging sessions difficult.
4769+ for(j=0;j<GROUP_PARALLELISM*8;j++) stream_in[j]=0;
4770+#else
4771+#endif
4772+
4773+ for(g=0;g<grouped;g++){
4774+ encp[g]=g_pkt[g];
4775+ encp[g]+=g_offset[g]; // skip header
4776+ FFTABLEIN(stream_in,g,encp[g]);
4777+ }
4778+//dump_mem("stream_in",stream_in,GROUP_PARALLELISM*8,BYPG);
4779+
4780+
4781+ // ITER 0
4782+ iter=0;
4783+ stream_cypher_group_init(k->iA_g,k->iB_g,stream_in);
4784+ // fill first ib
4785+ for(g=0;g<alive[iter];g++){
4786+ COPY_8_BY(ib+8*g,encp[g]);
4787+ }
4788+ // ITER 1..N-1
4789+ for (iter=1;iter<23&&alive[iter-1]>0;iter++){
4790+ // alive and just dead packets: calc block
4791+ block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
4792+ // all packets (dead too): calc stream
4793+ stream_cypher_group_normal(stream_out);
4794+//dump_mem("stream_out",stream_out,GROUP_PARALLELISM*8,BYPG);
4795+
4796+ // alive packets: calc ib
4797+ for(g=0;g<alive[iter];g++){
4798+ FFTABLEOUT(ib+8*g,stream_out,g);
4799+// XOREQ8BY gcc bug? 2x4 ok, 8 ko UPDATE: result ok but speed 1-2% slower (!!!???)
4800+#if 1
4801+ XOREQ_4_BY(ib+8*g,encp[g]+8);
4802+ XOREQ_4_BY(ib+8*g+4,encp[g]+8+4);
4803+#else
4804+ XOREQ_8_BY(ib+8*g,encp[g]+8);
4805+#endif
4806+ }
4807+ // alive packets: decrypt data
4808+ for(g=0;g<alive[iter];g++){
4809+ XOR_8_BY(encp[g],ib+8*g,block_out+8*g);
4810+ }
4811+ // just dead packets: write decrypted data
4812+ for(g=alive[iter];g<alive[iter-1];g++){
4813+ COPY_8_BY(encp[g],block_out+8*g);
4814+ }
4815+ // just dead packets: decrypt residue
4816+ for(g=alive[iter];g<alive[iter-1];g++){
4817+ FFTABLEOUTXORNBY(g_residue[g],encp[g]+8,stream_out,g);
4818+ }
4819+ // alive packets: pointers++
4820+ for(g=0;g<alive[iter];g++) encp[g]+=8;
4821+ };
4822+ // ITER N
4823+ iter=23;
4824+ // calc block
4825+ block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
4826+ // just dead packets: write decrypted data
4827+ for(g=alive[iter];g<alive[iter-1];g++){
4828+ COPY_8_BY(encp[g],block_out+8*g);
4829+ }
4830+ // no residue possible
4831+ // so do nothing
4832+
4833+
4834+ M_EMPTY(); // restore CPU multimedia state
4835+
4836+ return advanced;
4837+}
4838Index: libs/libmythtv/FFdecsa/FFdecsa.c
4839===================================================================
4840--- /dev/null 1970-01-01 00:00:00.000000000 +0000
4841+++ libs/libmythtv/FFdecsa/FFdecsa.c 2006-06-20 17:36:06.000000000 -0400
4842@@ -0,0 +1,866 @@
4843+/* FFdecsa -- fast decsa algorithm
4844+ *
4845+ * Copyright (C) 2003-2004 fatih89r
4846+ *
4847+ * This program is free software; you can redistribute it and/or modify
4848+ * it under the terms of the GNU General Public License as published by
4849+ * the Free Software Foundation; either version 2 of the License, or
4850+ * (at your option) any later version.
4851+ *
4852+ * This program is distributed in the hope that it will be useful,
4853+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4854+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4855+ * GNU General Public License for more details.
4856+ *
4857+ * You should have received a copy of the GNU General Public License
4858+ * along with this program; if not, write to the Free Software
4859+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
4860+ */
4861+
4862+
4863+#include <sys/types.h>
4864+#include <string.h>
4865+#include <stdio.h>
4866+#include <stdlib.h>
4867+
4868+#include "FFdecsa.h"
4869+
4870+#ifndef NULL
4871+#define NULL 0
4872+#endif
4873+
4874+//#define DEBUG
4875+#ifdef DEBUG
4876+#define DBG(a) a
4877+#else
4878+#define DBG(a)
4879+#endif
4880+
4881+//// parallelization stuff, large speed differences are possible
4882+// possible choices
4883+#define PARALLEL_32_4CHAR 320
4884+#define PARALLEL_32_4CHARA 321
4885+#define PARALLEL_32_INT 322
4886+#define PARALLEL_64_8CHAR 640
4887+#define PARALLEL_64_8CHARA 641
4888+#define PARALLEL_64_2INT 642
4889+#define PARALLEL_64_LONG 643
4890+#define PARALLEL_64_MMX 644
4891+#define PARALLEL_128_16CHAR 1280
4892+#define PARALLEL_128_16CHARA 1281
4893+#define PARALLEL_128_4INT 1282
4894+#define PARALLEL_128_2LONG 1283
4895+#define PARALLEL_128_2MMX 1284
4896+#define PARALLEL_128_SSE 1285
4897+
4898+//////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
4899+#ifndef PARALLEL_MODE
4900+#define PARALLEL_MODE PARALLEL_32_INT
4901+#endif
4902+//////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
4903+
4904+#include "parallel_generic.h"
4905+//// conditionals
4906+#if PARALLEL_MODE==PARALLEL_32_4CHAR
4907+#include "parallel_032_4char.h"
4908+#elif PARALLEL_MODE==PARALLEL_32_4CHARA
4909+#include "parallel_032_4charA.h"
4910+#elif PARALLEL_MODE==PARALLEL_32_INT
4911+#include "parallel_032_int.h"
4912+#elif PARALLEL_MODE==PARALLEL_64_8CHAR
4913+#include "parallel_064_8char.h"
4914+#elif PARALLEL_MODE==PARALLEL_64_8CHARA
4915+#include "parallel_064_8charA.h"
4916+#elif PARALLEL_MODE==PARALLEL_64_2INT
4917+#include "parallel_064_2int.h"
4918+#elif PARALLEL_MODE==PARALLEL_64_LONG
4919+#include "parallel_064_long.h"
4920+#elif PARALLEL_MODE==PARALLEL_64_MMX
4921+#include "parallel_064_mmx.h"
4922+#elif PARALLEL_MODE==PARALLEL_128_16CHAR
4923+#include "parallel_128_16char.h"
4924+#elif PARALLEL_MODE==PARALLEL_128_16CHARA
4925+#include "parallel_128_16charA.h"
4926+#elif PARALLEL_MODE==PARALLEL_128_4INT
4927+#include "parallel_128_4int.h"
4928+#elif PARALLEL_MODE==PARALLEL_128_2LONG
4929+#include "parallel_128_2long.h"
4930+#elif PARALLEL_MODE==PARALLEL_128_2MMX
4931+#include "parallel_128_2mmx.h"
4932+#elif PARALLEL_MODE==PARALLEL_128_SSE
4933+#include "parallel_128_sse.h"
4934+#else
4935+#error "unknown/undefined parallel mode"
4936+#endif
4937+
4938+// stuff depending on conditionals
4939+
4940+#define BYTES_PER_GROUP (GROUP_PARALLELISM/8)
4941+#define BYPG BYTES_PER_GROUP
4942+#define BITS_PER_GROUP GROUP_PARALLELISM
4943+#define BIPG BITS_PER_GROUP
4944+
4945+
4946+//// debug tool
4947+
4948+static void dump_mem(const char *string, const unsigned char *p, int len, int linelen){
4949+ int i;
4950+ for(i=0;i<len;i++){
4951+ if(i%linelen==0&&i) fprintf(stderr,"\n");
4952+ if(i%linelen==0) fprintf(stderr,"%s %08x:",string,i);
4953+ else{
4954+ if(i%8==0) fprintf(stderr," ");
4955+ if(i%4==0) fprintf(stderr," ");
4956+ }
4957+ fprintf(stderr," %02x",p[i]);
4958+ }
4959+ if(i%linelen==0) fprintf(stderr,"\n");
4960+}
4961+
4962+//////////////////////////////////////////////////////////////////////////////////
4963+
4964+struct csa_key_t{
4965+ unsigned char ck[8];
4966+// used by stream
4967+ int iA[8]; // iA[0] is for A1, iA[7] is for A8
4968+ int iB[8]; // iB[0] is for B1, iB[7] is for B8
4969+// used by stream (group)
4970+ group ck_g[8][8]; // [byte][bit:0=LSB,7=MSB]
4971+ group iA_g[8][4]; // [0 for A1][0 for LSB]
4972+ group iB_g[8][4]; // [0 for B1][0 for LSB]
4973+// used by block
4974+ unsigned char kk[56];
4975+// used by block (group)
4976+ __attribute__((aligned(16))) batch kkmulti[56]; // many times the same byte in every batch
4977+};
4978+
4979+struct csa_keys_t{
4980+ struct csa_key_t even;
4981+ struct csa_key_t odd;
4982+};
4983+
4984+//-----stream cypher
4985+
4986+//-----key schedule for stream decypher
4987+static void key_schedule_stream(
4988+ unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
4989+ int *iA, // [Out] iA[0]-iA[7] 8 nibbles | Key schedule.
4990+ int *iB) // [Out] iB[0]-iB[7] 8 nibbles | Key schedule.
4991+{
4992+ iA[0]=(ck[0]>>4)&0xf;
4993+ iA[1]=(ck[0] )&0xf;
4994+ iA[2]=(ck[1]>>4)&0xf;
4995+ iA[3]=(ck[1] )&0xf;
4996+ iA[4]=(ck[2]>>4)&0xf;
4997+ iA[5]=(ck[2] )&0xf;
4998+ iA[6]=(ck[3]>>4)&0xf;
4999+ iA[7]=(ck[3] )&0xf;
5000+ iB[0]=(ck[4]>>4)&0xf;
5001+ iB[1]=(ck[4] )&0xf;
5002+ iB[2]=(ck[5]>>4)&0xf;
5003+ iB[3]=(ck[5] )&0xf;
5004+ iB[4]=(ck[6]>>4)&0xf;
5005+ iB[5]=(ck[6] )&0xf;
5006+ iB[6]=(ck[7]>>4)&0xf;
5007+ iB[7]=(ck[7] )&0xf;
5008+}
5009+
5010+//----- stream main function
5011+
5012+#define STREAM_INIT
5013+#include "stream.c"
5014+#undef STREAM_INIT
5015+
5016+#define STREAM_NORMAL
5017+#include "stream.c"
5018+#undef STREAM_NORMAL
5019+
5020+
5021+//-----block decypher
5022+
5023+//-----key schedule for block decypher
5024+
5025+static void key_schedule_block(
5026+ unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
5027+ unsigned char *kk) // [Out] kk[0]-kk[55] 56 bytes | Key schedule.
5028+{
5029+ static const unsigned char key_perm[0x40] = {
5030+ 0x12,0x24,0x09,0x07,0x2A,0x31,0x1D,0x15, 0x1C,0x36,0x3E,0x32,0x13,0x21,0x3B,0x40,
5031+ 0x18,0x14,0x25,0x27,0x02,0x35,0x1B,0x01, 0x22,0x04,0x0D,0x0E,0x39,0x28,0x1A,0x29,
5032+ 0x33,0x23,0x34,0x0C,0x16,0x30,0x1E,0x3A, 0x2D,0x1F,0x08,0x19,0x17,0x2F,0x3D,0x11,
5033+ 0x3C,0x05,0x38,0x2B,0x0B,0x06,0x0A,0x2C, 0x20,0x3F,0x2E,0x0F,0x03,0x26,0x10,0x37,
5034+ };
5035+
5036+ int i,j,k;
5037+ int bit[64];
5038+ int newbit[64];
5039+ int kb[7][8];
5040+
5041+ // 56 steps
5042+ // 56 key bytes kk(55)..kk(0) by key schedule from ck
5043+
5044+ // kb(6,0) .. kb(6,7) = ck(0) .. ck(7)
5045+ kb[6][0] = ck[0];
5046+ kb[6][1] = ck[1];
5047+ kb[6][2] = ck[2];
5048+ kb[6][3] = ck[3];
5049+ kb[6][4] = ck[4];
5050+ kb[6][5] = ck[5];
5051+ kb[6][6] = ck[6];
5052+ kb[6][7] = ck[7];
5053+
5054+ // calculate kb[5] .. kb[0]
5055+ for(i=5; i>=0; i--){
5056+ // 64 bit perm on kb
5057+ for(j=0; j<8; j++){
5058+ for(k=0; k<8; k++){
5059+ bit[j*8+k] = (kb[i+1][j] >> (7-k)) & 1;
5060+ newbit[key_perm[j*8+k]-1] = bit[j*8+k];
5061+ }
5062+ }
5063+ for(j=0; j<8; j++){
5064+ kb[i][j] = 0;
5065+ for(k=0; k<8; k++){
5066+ kb[i][j] |= newbit[j*8+k] << (7-k);
5067+ }
5068+ }
5069+ }
5070+
5071+ // xor to give kk
5072+ for(i=0; i<7; i++){
5073+ for(j=0; j<8; j++){
5074+ kk[i*8+j] = kb[i][j] ^ i;
5075+ }
5076+ }
5077+
5078+}
5079+
5080+//-----block utils
5081+
5082+static inline __attribute__((always_inline)) void trasp_N_8 (unsigned char *in,unsigned char* out,int count){
5083+ int *ri=(int *)in;
5084+ int *ibi=(int *)out;
5085+ int j,i,k,g;
5086+ // copy and first step
5087+ for(g=0;g<count;g++){
5088+ ri[g]=ibi[2*g];
5089+ ri[GROUP_PARALLELISM+g]=ibi[2*g+1];
5090+ }
5091+//dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
5092+// now 01230123
5093+#define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
5094+ for(j=0;j<8;j+=4){
5095+ for(i=0;i<2;i++){
5096+ for(k=0;k<INTS_PER_ROW;k++){
5097+ unsigned int t,b;
5098+ t=ri[INTS_PER_ROW*(j+i)+k];
5099+ b=ri[INTS_PER_ROW*(j+i+2)+k];
5100+ ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
5101+ ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
5102+ }
5103+ }
5104+ }
5105+//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
5106+// now 01010101
5107+ for(j=0;j<8;j+=2){
5108+ for(i=0;i<1;i++){
5109+ for(k=0;k<INTS_PER_ROW;k++){
5110+ unsigned int t,b;
5111+ t=ri[INTS_PER_ROW*(j+i)+k];
5112+ b=ri[INTS_PER_ROW*(j+i+1)+k];
5113+ ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
5114+ ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
5115+ }
5116+ }
5117+ }
5118+//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
5119+// now 00000000
5120+}
5121+
5122+static inline __attribute__((always_inline)) void trasp_8_N (unsigned char *in,unsigned char* out,int count){
5123+ int *ri=(int *)in;
5124+ int *bdi=(int *)out;
5125+ int j,i,k,g;
5126+#define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
5127+//dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
5128+// now 00000000
5129+ for(j=0;j<8;j+=2){
5130+ for(i=0;i<1;i++){
5131+ for(k=0;k<INTS_PER_ROW;k++){
5132+ unsigned int t,b;
5133+ t=ri[INTS_PER_ROW*(j+i)+k];
5134+ b=ri[INTS_PER_ROW*(j+i+1)+k];
5135+ ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
5136+ ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
5137+ }
5138+ }
5139+ }
5140+//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
5141+// now 01010101
5142+ for(j=0;j<8;j+=4){
5143+ for(i=0;i<2;i++){
5144+ for(k=0;k<INTS_PER_ROW;k++){
5145+ unsigned int t,b;
5146+ t=ri[INTS_PER_ROW*(j+i)+k];
5147+ b=ri[INTS_PER_ROW*(j+i+2)+k];
5148+ ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
5149+ ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
5150+ }
5151+ }
5152+ }
5153+//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
5154+// now 01230123
5155+ for(g=0;g<count;g++){
5156+ bdi[2*g]=ri[g];
5157+ bdi[2*g+1]=ri[GROUP_PARALLELISM+g];
5158+ }
5159+}
5160+
5161+//-----block main function
5162+
5163+// block group
5164+static void block_decypher_group(
5165+ batch *kkmulti, // [In] kkmulti[0]-kkmulti[55] 56 batches | Key schedule (each batch has repeated equal bytes).
5166+ unsigned char *ib, // [In] (ib0,ib1,...ib7)...x32 32*8 bytes | Initialization vector.
5167+ unsigned char *bd, // [Out] (bd0,bd1,...bd7)...x32 32*8 bytes | Block decipher.
5168+ int count)
5169+{
5170+ // int is faster than unsigned char. apparently not
5171+ static const unsigned char block_sbox[0x100] = {
5172+ 0x3A,0xEA,0x68,0xFE,0x33,0xE9,0x88,0x1A, 0x83,0xCF,0xE1,0x7F,0xBA,0xE2,0x38,0x12,
5173+ 0xE8,0x27,0x61,0x95,0x0C,0x36,0xE5,0x70, 0xA2,0x06,0x82,0x7C,0x17,0xA3,0x26,0x49,
5174+ 0xBE,0x7A,0x6D,0x47,0xC1,0x51,0x8F,0xF3, 0xCC,0x5B,0x67,0xBD,0xCD,0x18,0x08,0xC9,
5175+ 0xFF,0x69,0xEF,0x03,0x4E,0x48,0x4A,0x84, 0x3F,0xB4,0x10,0x04,0xDC,0xF5,0x5C,0xC6,
5176+ 0x16,0xAB,0xAC,0x4C,0xF1,0x6A,0x2F,0x3C, 0x3B,0xD4,0xD5,0x94,0xD0,0xC4,0x63,0x62,
5177+ 0x71,0xA1,0xF9,0x4F,0x2E,0xAA,0xC5,0x56, 0xE3,0x39,0x93,0xCE,0x65,0x64,0xE4,0x58,
5178+ 0x6C,0x19,0x42,0x79,0xDD,0xEE,0x96,0xF6, 0x8A,0xEC,0x1E,0x85,0x53,0x45,0xDE,0xBB,
5179+ 0x7E,0x0A,0x9A,0x13,0x2A,0x9D,0xC2,0x5E, 0x5A,0x1F,0x32,0x35,0x9C,0xA8,0x73,0x30,
5180+
5181+ 0x29,0x3D,0xE7,0x92,0x87,0x1B,0x2B,0x4B, 0xA5,0x57,0x97,0x40,0x15,0xE6,0xBC,0x0E,
5182+ 0xEB,0xC3,0x34,0x2D,0xB8,0x44,0x25,0xA4, 0x1C,0xC7,0x23,0xED,0x90,0x6E,0x50,0x00,
5183+ 0x99,0x9E,0x4D,0xD9,0xDA,0x8D,0x6F,0x5F, 0x3E,0xD7,0x21,0x74,0x86,0xDF,0x6B,0x05,
5184+ 0x8E,0x5D,0x37,0x11,0xD2,0x28,0x75,0xD6, 0xA7,0x77,0x24,0xBF,0xF0,0xB0,0x02,0xB7,
5185+ 0xF8,0xFC,0x81,0x09,0xB1,0x01,0x76,0x91, 0x7D,0x0F,0xC8,0xA0,0xF2,0xCB,0x78,0x60,
5186+ 0xD1,0xF7,0xE0,0xB5,0x98,0x22,0xB3,0x20, 0x1D,0xA6,0xDB,0x7B,0x59,0x9F,0xAE,0x31,
5187+ 0xFB,0xD3,0xB6,0xCA,0x43,0x72,0x07,0xF4, 0xD8,0x41,0x14,0x55,0x0D,0x54,0x8B,0xB9,
5188+ 0xAD,0x46,0x0B,0xAF,0x80,0x52,0x2C,0xFA, 0x8C,0x89,0x66,0xFD,0xB2,0xA9,0x9B,0xC0,
5189+ };
5190+ unsigned char r[GROUP_PARALLELISM*(8+56)]; /* 56 because we will move back in memory while looping */
5191+ unsigned char sbox_in[GROUP_PARALLELISM],sbox_out[GROUP_PARALLELISM],perm_out[GROUP_PARALLELISM];
5192+ int roff;
5193+ int i,g,count_all=GROUP_PARALLELISM;
5194+
5195+ roff=GROUP_PARALLELISM*56;
5196+
5197+#define FASTTRASP1
5198+#ifndef FASTTRASP1
5199+ for(g=0;g<count;g++){
5200+ // Init registers
5201+ int j;
5202+ for(j=0;j<8;j++){
5203+ r[roff+GROUP_PARALLELISM*j+g]=ib[8*g+j];
5204+ }
5205+ }
5206+#else
5207+ trasp_N_8((unsigned char *)&r[roff],(unsigned char *)ib,count);
5208+#endif
5209+//dump_mem("OLD r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
5210+
5211+ // loop over kk[55]..kk[0]
5212+ for(i=55;i>=0;i--){
5213+ {
5214+ batch tkkmulti=kkmulti[i];
5215+ batch *si=(batch *)sbox_in;
5216+ batch *r6_N=(batch *)(r+roff+GROUP_PARALLELISM*6);
5217+ for(g=0;g<count_all/BYTES_PER_BATCH;g++){
5218+ si[g]=B_FFXOR(tkkmulti,r6_N[g]); //FIXME: introduce FASTBATCH?
5219+ }
5220+ }
5221+
5222+ // table lookup, this works on only one byte at a time
5223+ // most difficult part of all
5224+ // - can't be parallelized
5225+ // - can't be synthetized through boolean terms (8 input bits are too many)
5226+ for(g=0;g<count_all;g++){
5227+ sbox_out[g]=block_sbox[sbox_in[g]];
5228+ }
5229+
5230+ // bit permutation
5231+ {
5232+ unsigned char *po=(unsigned char *)perm_out;
5233+ unsigned char *so=(unsigned char *)sbox_out;
5234+//dump_mem("pre perm ",(unsigned char *)so,GROUP_PARALLELISM,GROUP_PARALLELISM);
5235+ for(g=0;g<count_all;g+=BYTES_PER_BATCH){
5236+ batch in,out;
5237+ in=*(batch *)&so[g];
5238+
5239+ out=B_FFOR(
5240+ B_FFOR(
5241+ B_FFOR(
5242+ B_FFOR(
5243+ B_FFOR(
5244+ B_FFSH8L(B_FFAND(in,B_FFN_ALL_29()),1),
5245+ B_FFSH8L(B_FFAND(in,B_FFN_ALL_02()),6)),
5246+ B_FFSH8L(B_FFAND(in,B_FFN_ALL_04()),3)),
5247+ B_FFSH8R(B_FFAND(in,B_FFN_ALL_10()),2)),
5248+ B_FFSH8R(B_FFAND(in,B_FFN_ALL_40()),6)),
5249+ B_FFSH8R(B_FFAND(in,B_FFN_ALL_80()),4));
5250+
5251+ *(batch *)&po[g]=out;
5252+ }
5253+//dump_mem("post perm",(unsigned char *)po,GROUP_PARALLELISM,GROUP_PARALLELISM);
5254+ }
5255+
5256+ roff-=GROUP_PARALLELISM; /* virtual shift of registers */
5257+
5258+#if 0
5259+/* one by one */
5260+ for(g=0;g<count_all;g++){
5261+ r[roff+GROUP_PARALLELISM*0+g]=r[roff+GROUP_PARALLELISM*8+g]^sbox_out[g];
5262+ r[roff+GROUP_PARALLELISM*6+g]^=perm_out[g];
5263+ r[roff+GROUP_PARALLELISM*4+g]^=r[roff+GROUP_PARALLELISM*0+g];
5264+ r[roff+GROUP_PARALLELISM*3+g]^=r[roff+GROUP_PARALLELISM*0+g];
5265+ r[roff+GROUP_PARALLELISM*2+g]^=r[roff+GROUP_PARALLELISM*0+g];
5266+ }
5267+#else
5268+ for(g=0;g<count_all;g+=BEST_SPAN){
5269+ XOR_BEST_BY(&r[roff+GROUP_PARALLELISM*0+g],&r[roff+GROUP_PARALLELISM*8+g],&sbox_out[g]);
5270+ XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*6+g],&perm_out[g]);
5271+ XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*4+g],&r[roff+GROUP_PARALLELISM*0+g]);
5272+ XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*3+g],&r[roff+GROUP_PARALLELISM*0+g]);
5273+ XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*2+g],&r[roff+GROUP_PARALLELISM*0+g]);
5274+ }
5275+#endif
5276+ }
5277+
5278+#define FASTTRASP2
5279+#ifndef FASTTRASP2
5280+ for(g=0;g<count;g++){
5281+ // Copy results
5282+ int j;
5283+ for(j=0;j<8;j++){
5284+ bd[8*g+j]=r[roff+GROUP_PARALLELISM*j+g];
5285+ }
5286+ }
5287+#else
5288+ trasp_8_N((unsigned char *)&r[roff],(unsigned char *)bd,count);
5289+#endif
5290+}
5291+
5292+//-----------------------------------EXTERNAL INTERFACE
5293+
5294+//-----get internal parallelism
5295+
5296+int get_internal_parallelism(void){
5297+ return GROUP_PARALLELISM;
5298+}
5299+
5300+//-----get suggested cluster size
5301+
5302+int get_suggested_cluster_size(void){
5303+ int r;
5304+ r=GROUP_PARALLELISM+GROUP_PARALLELISM/10;
5305+ if(r<GROUP_PARALLELISM+5) r=GROUP_PARALLELISM+5;
5306+ return r;
5307+}
5308+
5309+//-----key structure
5310+
5311+void *get_key_struct(void){
5312+ struct csa_keys_t *keys=(struct csa_keys_t *)malloc(sizeof(struct csa_keys_t));
5313+ if(keys) {
5314+ static const unsigned char pk[8] = { 0,0,0,0,0,0,0,0 };
5315+ set_control_words(keys,pk,pk);
5316+ }
5317+ return keys;
5318+}
5319+
5320+void free_key_struct(void *keys){
5321+ return free(keys);
5322+}
5323+
5324+//-----set control words
5325+
5326+static void schedule_key(struct csa_key_t *key, const unsigned char *pk){
5327+ // could be made faster, but is not run often
5328+ int bi,by;
5329+ int i,j;
5330+// key
5331+ memcpy(key->ck,pk,8);
5332+// precalculations for stream
5333+ key_schedule_stream(key->ck,key->iA,key->iB);
5334+ for(by=0;by<8;by++){
5335+ for(bi=0;bi<8;bi++){
5336+ key->ck_g[by][bi]=(key->ck[by]&(1<<bi))?FF1():FF0();
5337+ }
5338+ }
5339+ for(by=0;by<8;by++){
5340+ for(bi=0;bi<4;bi++){
5341+ key->iA_g[by][bi]=(key->iA[by]&(1<<bi))?FF1():FF0();
5342+ key->iB_g[by][bi]=(key->iB[by]&(1<<bi))?FF1():FF0();
5343+ }
5344+ }
5345+// precalculations for block
5346+ key_schedule_block(key->ck,key->kk);
5347+ for(i=0;i<56;i++){
5348+ for(j=0;j<BYTES_PER_BATCH;j++){
5349+ *(((unsigned char *)&key->kkmulti[i])+j)=key->kk[i];
5350+ }
5351+ }
5352+}
5353+
5354+void set_control_words(void *keys, const unsigned char *ev, const unsigned char *od){
5355+ schedule_key(&((struct csa_keys_t *)keys)->even,ev);
5356+ schedule_key(&((struct csa_keys_t *)keys)->odd,od);
5357+}
5358+
5359+void set_even_control_word(void *keys, const unsigned char *pk){
5360+ schedule_key(&((struct csa_keys_t *)keys)->even,pk);
5361+}
5362+
5363+void set_odd_control_word(void *keys, const unsigned char *pk){
5364+ schedule_key(&((struct csa_keys_t *)keys)->odd,pk);
5365+}
5366+
5367+//-----get control words
5368+
5369+void get_control_words(void *keys, unsigned char *even, unsigned char *odd){
5370+ memcpy(even,&((struct csa_keys_t *)keys)->even.ck,8);
5371+ memcpy(odd,&((struct csa_keys_t *)keys)->odd.ck,8);
5372+}
5373+
5374+//----- decrypt
5375+
5376+int decrypt_packets(void *keys, unsigned char **cluster){
5377+ // statistics, currently unused
5378+ int stat_no_scramble=0;
5379+ int stat_reserved=0;
5380+ int stat_decrypted[2]={0,0};
5381+ int stat_decrypted_mini=0;
5382+ unsigned char **clst;
5383+ unsigned char **clst2;
5384+ int grouped;
5385+ int group_ev_od;
5386+ int advanced;
5387+ int can_advance;
5388+ unsigned char *g_pkt[GROUP_PARALLELISM];
5389+ int g_len[GROUP_PARALLELISM];
5390+ int g_offset[GROUP_PARALLELISM];
5391+ int g_n[GROUP_PARALLELISM];
5392+ int g_residue[GROUP_PARALLELISM];
5393+ unsigned char *pkt;
5394+ int xc0,ev_od,len,offset,n,residue;
5395+ struct csa_key_t* k;
5396+ int i,j,iter,g;
5397+ int t23,tsmall;
5398+ int alive[24];
5399+//icc craziness int pad1=0; //////////align! FIXME
5400+ unsigned char *encp[GROUP_PARALLELISM];
5401+ unsigned char stream_in[GROUP_PARALLELISM*8];
5402+ unsigned char stream_out[GROUP_PARALLELISM*8];
5403+ unsigned char ib[GROUP_PARALLELISM*8];
5404+ unsigned char block_out[GROUP_PARALLELISM*8];
5405+ struct stream_regs regs;
5406+
5407+//icc craziness i=(int)&pad1;//////////align!!! FIXME
5408+
5409+ // build a list of packets to be processed
5410+ clst=cluster;
5411+ grouped=0;
5412+ advanced=0;
5413+ can_advance=1;
5414+ group_ev_od=-1; // silence incorrect compiler warning
5415+ pkt=*clst;
5416+ do{ // find a new packet
5417+ if(grouped==GROUP_PARALLELISM){
5418+ // full
5419+ break;
5420+ }
5421+ if(pkt==NULL){
5422+ // no more ranges
5423+ break;
5424+ }
5425+ if(pkt>=*(clst+1)){
5426+ // out of this range, try next
5427+ clst++;clst++;
5428+ pkt=*clst;
5429+ continue;
5430+ }
5431+
5432+ do{ // handle this packet
5433+ xc0=pkt[3]&0xc0;
5434+ DBG(fprintf(stderr," exam pkt=%p, xc0=%02x, can_adv=%i\n",pkt,xc0,can_advance));
5435+ if(xc0==0x00){
5436+ DBG(fprintf(stderr,"skip clear pkt %p (can_advance is %i)\n",pkt,can_advance));
5437+ advanced+=can_advance;
5438+ stat_no_scramble++;
5439+ break;
5440+ }
5441+ if(xc0==0x40){
5442+ DBG(fprintf(stderr,"skip reserved pkt %p (can_advance is %i)\n",pkt,can_advance));
5443+ advanced+=can_advance;
5444+ stat_reserved++;
5445+ break;
5446+ }
5447+ if(xc0==0x80||xc0==0xc0){ // encrypted
5448+ ev_od=(xc0&0x40)>>6; // 0 even, 1 odd
5449+ if(grouped==0) group_ev_od=ev_od; // this group will be all even (or odd)
5450+ if(group_ev_od==ev_od){ // could be added to group
5451+ pkt[3]&=0x3f; // consider it decrypted now
5452+ if(pkt[3]&0x20){ // incomplete packet
5453+ offset=4+pkt[4]+1;
5454+ len=188-offset;
5455+ n=len>>3;
5456+ residue=len-(n<<3);
5457+ if(n==0){ // decrypted==encrypted!
5458+ DBG(fprintf(stderr,"DECRYPTED MINI! (can_advance is %i)\n",can_advance));
5459+ advanced+=can_advance;
5460+ stat_decrypted_mini++;
5461+ break; // this doesn't need more processing
5462+ }
5463+ }else{
5464+ len=184;
5465+ offset=4;
5466+ n=23;
5467+ residue=0;
5468+ }
5469+ g_pkt[grouped]=pkt;
5470+ g_len[grouped]=len;
5471+ g_offset[grouped]=offset;
5472+ g_n[grouped]=n;
5473+ g_residue[grouped]=residue;
5474+ DBG(fprintf(stderr,"%2i: eo=%i pkt=%p len=%03i n=%2i residue=%i\n",grouped,ev_od,pkt,len,n,residue));
5475+ grouped++;
5476+ advanced+=can_advance;
5477+ stat_decrypted[ev_od]++;
5478+ }
5479+ else{
5480+ can_advance=0;
5481+ DBG(fprintf(stderr,"skip pkt %p and can_advance set to 0\n",pkt));
5482+ break; // skip and go on
5483+ }
5484+ }
5485+ } while(0);
5486+
5487+ if(can_advance){
5488+ // move range start forward
5489+ *clst+=188;
5490+ }
5491+ // next packet, if there is one
5492+ pkt+=188;
5493+ } while(1);
5494+ DBG(fprintf(stderr,"-- result: grouped %i pkts, advanced %i pkts\n",grouped,advanced));
5495+
5496+ // delete empty ranges and compact list
5497+ clst2=cluster;
5498+ for(clst=cluster;*clst!=NULL;clst+=2){
5499+ // if not empty
5500+ if(*clst<*(clst+1)){
5501+ // it will remain
5502+ *clst2=*clst;
5503+ *(clst2+1)=*(clst+1);
5504+ clst2+=2;
5505+ }
5506+ }
5507+ *clst2=NULL;
5508+
5509+ if(grouped==0){
5510+ // no processing needed
5511+ return advanced;
5512+ }
5513+
5514+ // sort them, longest payload first
5515+ // we expect many n=23 packets and a few n<23
5516+ DBG(fprintf(stderr,"PRESORTING\n"));
5517+ for(i=0;i<grouped;i++){
5518+ DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
5519+ }
5520+ // grouped is always <= GROUP_PARALLELISM
5521+
5522+#define g_swap(a,b) \
5523+ pkt=g_pkt[a]; \
5524+ g_pkt[a]=g_pkt[b]; \
5525+ g_pkt[b]=pkt; \
5526+\
5527+ len=g_len[a]; \
5528+ g_len[a]=g_len[b]; \
5529+ g_len[b]=len; \
5530+\
5531+ offset=g_offset[a]; \
5532+ g_offset[a]=g_offset[b]; \
5533+ g_offset[b]=offset; \
5534+\
5535+ n=g_n[a]; \
5536+ g_n[a]=g_n[b]; \
5537+ g_n[b]=n; \
5538+\
5539+ residue=g_residue[a]; \
5540+ g_residue[a]=g_residue[b]; \
5541+ g_residue[b]=residue;
5542+
5543+ // step 1: move n=23 packets before small packets
5544+ t23=0;
5545+ tsmall=grouped-1;
5546+ for(;;){
5547+ for(;t23<grouped;t23++){
5548+ if(g_n[t23]!=23) break;
5549+ }
5550+DBG(fprintf(stderr,"t23 after for =%i\n",t23));
5551+
5552+ for(;tsmall>=0;tsmall--){
5553+ if(g_n[tsmall]==23) break;
5554+ }
5555+DBG(fprintf(stderr,"tsmall after for =%i\n",tsmall));
5556+
5557+ if(tsmall-t23<1) break;
5558+
5559+DBG(fprintf(stderr,"swap t23=%i,tsmall=%i\n",t23,tsmall));
5560+
5561+ g_swap(t23,tsmall);
5562+
5563+ t23++;
5564+ tsmall--;
5565+DBG(fprintf(stderr,"new t23=%i,tsmall=%i\n\n",t23,tsmall));
5566+ }
5567+ DBG(fprintf(stderr,"packets with n=23, t23=%i grouped=%i\n",t23,grouped));
5568+ DBG(fprintf(stderr,"MIDSORTING\n"));
5569+ for(i=0;i<grouped;i++){
5570+ DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
5571+ }
5572+
5573+ // step 2: sort small packets in decreasing order of n (bubble sort is enough)
5574+ for(i=t23;i<grouped;i++){
5575+ for(j=i+1;j<grouped;j++){
5576+ if(g_n[j]>g_n[i]){
5577+ g_swap(i,j);
5578+ }
5579+ }
5580+ }
5581+ DBG(fprintf(stderr,"POSTSORTING\n"));
5582+ for(i=0;i<grouped;i++){
5583+ DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
5584+ }
5585+
5586+ // we need to know how many packets need 23 iterations, how many 22...
5587+ for(i=0;i<=23;i++){
5588+ alive[i]=0;
5589+ }
5590+ // count
5591+ alive[23-1]=t23;
5592+ for(i=t23;i<grouped;i++){
5593+ alive[g_n[i]-1]++;
5594+ }
5595+ // integrate
5596+ for(i=22;i>=0;i--){
5597+ alive[i]+=alive[i+1];
5598+ }
5599+ DBG(fprintf(stderr,"ALIVE\n"));
5600+ for(i=0;i<=23;i++){
5601+ DBG(fprintf(stderr,"alive%2i=%i\n",i,alive[i]));
5602+ }
5603+
5604+ // choose key
5605+ if(group_ev_od==0){
5606+ k=&((struct csa_keys_t *)keys)->even;
5607+ }
5608+ else{
5609+ k=&((struct csa_keys_t *)keys)->odd;
5610+ }
5611+
5612+ //INIT
5613+//#define INITIALIZE_UNUSED_INPUT
5614+#ifdef INITIALIZE_UNUSED_INPUT
5615+// unnecessary zeroing.
5616+// without this, we operate on uninitialized memory
5617+// when grouped<GROUP_PARALLELISM, but it's not a problem,
5618+// as final results will be discarded.
5619+// random data makes debugging sessions difficult.
5620+ for(j=0;j<GROUP_PARALLELISM*8;j++) stream_in[j]=0;
5621+DBG(fprintf(stderr,"--- WARNING: you could gain speed by not initializing unused memory ---\n"));
5622+#else
5623+DBG(fprintf(stderr,"--- WARNING: DEBUGGING IS MORE DIFFICULT WHEN PROCESSING RANDOM DATA CHANGING AT EVERY RUN! ---\n"));
5624+#endif
5625+
5626+ for(g=0;g<grouped;g++){
5627+ encp[g]=g_pkt[g];
5628+ DBG(fprintf(stderr,"header[%i]=%p (%02x)\n",g,encp[g],*(encp[g])));
5629+ encp[g]+=g_offset[g]; // skip header
5630+ FFTABLEIN(stream_in,g,encp[g]);
5631+ }
5632+//dump_mem("stream_in",stream_in,GROUP_PARALLELISM*8,BYPG);
5633+
5634+
5635+ // ITER 0
5636+DBG(fprintf(stderr,">>>>>ITER 0\n"));
5637+ iter=0;
5638+ stream_cypher_group_init(&regs,k->iA_g,k->iB_g,stream_in);
5639+ // fill first ib
5640+ for(g=0;g<alive[iter];g++){
5641+ COPY_8_BY(ib+8*g,encp[g]);
5642+ }
5643+DBG(dump_mem("IB ",ib,8*alive[iter],8));
5644+ // ITER 1..N-1
5645+ for (iter=1;iter<23&&alive[iter-1]>0;iter++){
5646+DBG(fprintf(stderr,">>>>>ITER %i\n",iter));
5647+ // alive and just dead packets: calc block
5648+ block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
5649+DBG(dump_mem("BLO_ib ",block_out,8*alive[iter-1],8));
5650+ // all packets (dead too): calc stream
5651+ stream_cypher_group_normal(&regs,stream_out);
5652+//dump_mem("stream_out",stream_out,GROUP_PARALLELISM*8,BYPG);
5653+
5654+ // alive packets: calc ib
5655+ for(g=0;g<alive[iter];g++){
5656+ FFTABLEOUT(ib+8*g,stream_out,g);
5657+DBG(dump_mem("stream_out_ib ",ib+8*g,8,8));
5658+// XOREQ8BY gcc bug? 2x4 ok, 8 ko UPDATE: result ok but speed 1-2% slower (!!!???)
5659+#if 1
5660+ XOREQ_4_BY(ib+8*g,encp[g]+8);
5661+ XOREQ_4_BY(ib+8*g+4,encp[g]+8+4);
5662+#else
5663+ XOREQ_8_BY(ib+8*g,encp[g]+8);
5664+#endif
5665+DBG(dump_mem("after_stream_xor_ib ",ib+8*g,8,8));
5666+ }
5667+ // alive packets: decrypt data
5668+ for(g=0;g<alive[iter];g++){
5669+DBG(dump_mem("before_ib_decrypt_data ",encp[g],8,8));
5670+ XOR_8_BY(encp[g],ib+8*g,block_out+8*g);
5671+DBG(dump_mem("after_ib_decrypt_data ",encp[g],8,8));
5672+ }
5673+ // just dead packets: write decrypted data
5674+ for(g=alive[iter];g<alive[iter-1];g++){
5675+DBG(dump_mem("jd_before_ib_decrypt_data ",encp[g],8,8));
5676+ COPY_8_BY(encp[g],block_out+8*g);
5677+DBG(dump_mem("jd_after_ib_decrypt_data ",encp[g],8,8));
5678+ }
5679+ // just dead packets: decrypt residue
5680+ for(g=alive[iter];g<alive[iter-1];g++){
5681+DBG(dump_mem("jd_before_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
5682+ FFTABLEOUTXORNBY(g_residue[g],encp[g]+8,stream_out,g);
5683+DBG(dump_mem("jd_after_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
5684+ }
5685+ // alive packets: pointers++
5686+ for(g=0;g<alive[iter];g++) encp[g]+=8;
5687+ };
5688+ // ITER N
5689+DBG(fprintf(stderr,">>>>>ITER 23\n"));
5690+ iter=23;
5691+ // calc block
5692+ block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
5693+DBG(dump_mem("23BLO_ib ",block_out,8*alive[iter-1],8));
5694+ // just dead packets: write decrypted data
5695+ for(g=alive[iter];g<alive[iter-1];g++){
5696+DBG(dump_mem("23jd_before_ib_decrypt_data ",encp[g],8,8));
5697+ COPY_8_BY(encp[g],block_out+8*g);
5698+DBG(dump_mem("23jd_after_ib_decrypt_data ",encp[g],8,8));
5699+ }
5700+ // no residue possible
5701+ // so do nothing
5702+
5703+ DBG(fprintf(stderr,"returning advanced=%i\n",advanced));
5704+
5705+ M_EMPTY(); // restore CPU multimedia state
5706+
5707+ return advanced;
5708+}
5709Index: libs/libmythtv/FFdecsa/README
5710===================================================================
5711--- /dev/null 1970-01-01 00:00:00.000000000 +0000
5712+++ libs/libmythtv/FFdecsa/README 2006-06-20 17:36:06.000000000 -0400
5713@@ -0,0 +1,50 @@
5714+-------
5715+FFdecsa
5716+-------
5717+version 1.0
5718+Copyright 2003-2004 fatih89r
5719+released under GPL
5720+
5721+
5722+FFdecsa is a fast implementation of a CSA decryption algorithm for MPEG
5723+TS packets. It is shockingly fast, more than 800% the speed of the
5724+fastest implementation I can find around. (read the docs to know what FF
5725+stands for)
5726+
5727+On an AthlonXP 2400 (2000MHz) it achieves 165Mbit/s; the previous record
5728+was around 20Mbit/s.
5729+
5730+This means that:
5731+- decrypting a 8Mbit/s stream takes 5% of CPU instead of 40%
5732+- decrypting a full transponder (with all its channels or with a big
5733+ HDTV stream) carrying 38Mbit/s takes 23% of CPU instead of 190%
5734+ (>100%, so undecryptable in real time)
5735+- a very slow processor can decrypt one channel with no problems
5736+- offline decoding of one hour of a 5Mbit/s channel takes less than
5737+ two minutes (30x than realtime)
5738+- offline decoding will work at more than 20MB/s (megabytes/s),
5739+ nearly as fast as a file copy
5740+
5741+The docs directory contains useful stuff:
5742+
5743+ FAQ.txt
5744+ to know something more about this software
5745+
5746+ how_to_compile.txt
5747+ if you want to compile this code (and get optimal speed)
5748+
5749+ how_to_use.txt
5750+ if you want to use this code
5751+
5752+ technical_background.txt
5753+ if you want to understand how this code works or you want to
5754+ modify/improve it
5755+
5756+ how_to_understand.txt
5757+ if you want to understand the code to make modifications
5758+
5759+ how_to_release.txt
5760+ if you want to release modified versions of the code
5761+
5762+
5763+fatih89r
5764Index: libs/libmythtv/FFdecsa/FFdecsa.h
5765===================================================================
5766--- /dev/null 1970-01-01 00:00:00.000000000 +0000
5767+++ libs/libmythtv/FFdecsa/FFdecsa.h 2006-06-20 17:36:06.000000000 -0400
5768@@ -0,0 +1,62 @@
5769+/* FFdecsa -- fast decsa algorithm
5770+ *
5771+ * Copyright (C) 2003-2004 fatih89r
5772+ *
5773+ * This program is free software; you can redistribute it and/or modify
5774+ * it under the terms of the GNU General Public License as published by
5775+ * the Free Software Foundation; either version 2 of the License, or
5776+ * (at your option) any later version.
5777+ *
5778+ * This program is distributed in the hope that it will be useful,
5779+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5780+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5781+ * GNU General Public License for more details.
5782+ *
5783+ * You should have received a copy of the GNU General Public License
5784+ * along with this program; if not, write to the Free Software
5785+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
5786+ */
5787+
5788+
5789+#ifndef FFDECSA_H
5790+#define FFDECSA_H
5791+
5792+//----- public interface
5793+
5794+// -- how many packets can be decrypted at the same time
5795+// This is an info about internal decryption parallelism.
5796+// You should try to call decrypt_packets with more packets than the number
5797+// returned here for performance reasons (use get_suggested_cluster_size to know
5798+// how many).
5799+int get_internal_parallelism(void);
5800+
5801+// -- how many packets you should have in a cluster when calling decrypt_packets
5802+// This is a suggestion to achieve optimal performance; typically a little
5803+// higher than what get_internal_parallelism returns.
5804+// Passing less packets could slow down the decryption.
5805+// Passing more packets is never bad (if you don't spend a lot of time building
5806+// the list).
5807+int get_suggested_cluster_size(void);
5808+
5809+// -- alloc & free the key structure
5810+void *get_key_struct(void);
5811+void free_key_struct(void *keys);
5812+
5813+// -- set control words, 8 bytes each
5814+void set_control_words(void *keys, const unsigned char *even, const unsigned char *odd);
5815+
5816+// -- set even control word, 8 bytes
5817+void set_even_control_word(void *keys, const unsigned char *even);
5818+
5819+// -- set odd control word, 8 bytes
5820+void set_odd_control_word(void *keys, const unsigned char *odd);
5821+
5822+// -- get control words, 8 bytes each
5823+//void get_control_words(void *keys, unsigned char *even, unsigned char *odd);
5824+
5825+// -- decrypt many TS packets
5826+// This interface is a bit complicated because it is designed for maximum speed.
5827+// Please read doc/how_to_use.txt.
5828+int decrypt_packets(void *keys, unsigned char **cluster);
5829+
5830+#endif
5831Index: libs/libmythtv/FFdecsa/docs/how_to_understand.txt
5832===================================================================
5833--- /dev/null 1970-01-01 00:00:00.000000000 +0000
5834+++ libs/libmythtv/FFdecsa/docs/how_to_understand.txt 2006-06-20 17:36:06.000000000 -0400
5835@@ -0,0 +1,15 @@
5836+-------
5837+FFdecsa
5838+-------
5839+
5840+First, you need to know how decsa works, study the source of a classical
5841+implementation. Then you have to understand how things are done in
5842+slicing mode. Read all the documentation and have a working classical
5843+implementation to compare partial results. There are comments spread
5844+around the code. Some things are difficult to understand without paper
5845+notes; for example the matrix transpositions and meaning of array
5846+indices.
5847+
5848+Sorry, it is hard to understand and modify ...
5849+
5850+... but it was harder to design and implement!!!
5851Index: libs/libmythtv/FFdecsa/parallel_064_mmx.h
5852===================================================================
5853--- /dev/null 1970-01-01 00:00:00.000000000 +0000
5854+++ libs/libmythtv/FFdecsa/parallel_064_mmx.h 2006-06-20 17:36:06.000000000 -0400
5855@@ -0,0 +1,106 @@
5856+/* FFdecsa -- fast decsa algorithm
5857+ *
5858+ * Copyright (C) 2003-2004 fatih89r
5859+ *
5860+ * This program is free software; you can redistribute it and/or modify
5861+ * it under the terms of the GNU General Public License as published by
5862+ * the Free Software Foundation; either version 2 of the License, or
5863+ * (at your option) any later version.
5864+ *
5865+ * This program is distributed in the hope that it will be useful,
5866+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5867+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5868+ * GNU General Public License for more details.
5869+ *
5870+ * You should have received a copy of the GNU General Public License
5871+ * along with this program; if not, write to the Free Software
5872+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
5873+ */
5874+
5875+#include <mmintrin.h>
5876+
5877+#if 0 // some older gcc version need this
5878+#define _m_pand _mm_and_si64
5879+#define _m_por _mm_or_si64
5880+#define _m_pxor _mm_xor_si64
5881+#define _m_psllqi _mm_slli_si64
5882+#define _m_psrlqi _mm_srli_si64
5883+#define _m_empty _mm_empty
5884+#endif
5885+
5886+typedef __m64 group;
5887+#define GROUP_PARALLELISM 64
5888+#define FF0() ((__m64)0x0ULL)
5889+#define FF1() ((__m64)0xffffffffffffffffULL)
5890+#define FFAND(a,b) _m_pand((a),(b))
5891+#define FFOR(a,b) _m_por((a),(b))
5892+#define FFXOR(a,b) _m_pxor((a),(b))
5893+#define FFNOT(a) _m_pxor((a),FF1())
5894+
5895+/* 64 rows of 64 bits */
5896+
5897+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
5898+#if 1
5899+ *(((int *)tab)+2*g)=*((int *)data);
5900+ *(((int *)tab)+2*g+1)=*(((int *)data)+1);
5901+#else
5902+ *(((long long int *)tab)+g)=*((long long int *)data);
5903+#endif
5904+}
5905+
5906+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
5907+#if 1
5908+ *((int *)data)=*(((int *)tab)+2*g);
5909+ *(((int *)data)+1)=*(((int *)tab)+2*g+1);
5910+#else
5911+ *((long long int *)data)=*(((long long int *)tab)+g);
5912+#endif
5913+}
5914+
5915+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
5916+ int j;
5917+ for(j=0;j<n;j++){
5918+ *(data+j)^=*(tab+8*g+j);
5919+ }
5920+}
5921+
5922+typedef __m64 batch;
5923+#define BYTES_PER_BATCH 8
5924+#define B_FFAND(a,b) _m_pand((a),(b))
5925+#define B_FFOR(a,b) _m_por((a),(b))
5926+#define B_FFXOR(a,b) _m_pxor((a),(b))
5927+#define B_FFN_ALL_29() ((__m64)0x2929292929292929ULL)
5928+#define B_FFN_ALL_02() ((__m64)0x0202020202020202ULL)
5929+#define B_FFN_ALL_04() ((__m64)0x0404040404040404ULL)
5930+#define B_FFN_ALL_10() ((__m64)0x1010101010101010ULL)
5931+#define B_FFN_ALL_40() ((__m64)0x4040404040404040ULL)
5932+#define B_FFN_ALL_80() ((__m64)0x8080808080808080ULL)
5933+#define B_FFSH8L(a,n) _m_psllqi((a),(n))
5934+#define B_FFSH8R(a,n) _m_psrlqi((a),(n))
5935+
5936+#define M_EMPTY() _m_empty()
5937+
5938+
5939+#undef XOR_8_BY
5940+#define XOR_8_BY(d,s1,s2) do{ __m64 *pd=(__m64 *)(d), *ps1=(__m64 *)(s1), *ps2=(__m64 *)(s2); \
5941+ *pd = _m_pxor( *ps1 , *ps2 ); }while(0)
5942+
5943+#undef XOREQ_8_BY
5944+#define XOREQ_8_BY(d,s) do{ __m64 *pd=(__m64 *)(d), *ps=(__m64 *)(s); \
5945+ *pd = _m_pxor( *ps, *pd ); }while(0)
5946+
5947+#undef COPY_8_BY
5948+#define COPY_8_BY(d,s) do{ __m64 *pd=(__m64 *)(d), *ps=(__m64 *)(s); \
5949+ *pd = *ps; }while(0)
5950+
5951+#undef BEST_SPAN
5952+#define BEST_SPAN 8
5953+
5954+#undef XOR_BEST_BY
5955+#define XOR_BEST_BY(d,s1,s2) do{ XOR_8_BY(d,s1,s2); }while(0);
5956+
5957+#undef XOREQ_BEST_BY
5958+#define XOREQ_BEST_BY(d,s) do{ XOREQ_8_BY(d,s); }while(0);
5959+
5960+#undef COPY_BEST_BY
5961+#define COPY_BEST_BY(d,s) do{ COPY_8_BY(d,s); }while(0);
5962Index: libs/libmythtv/dvbcam.h
5963===================================================================
5964--- libs/libmythtv/dvbcam.h.orig 2006-06-20 17:36:05.000000000 -0400
5965+++ libs/libmythtv/dvbcam.h 2006-06-20 17:36:06.000000000 -0400
5966@@ -50,6 +50,8 @@
5967 void sendExternalCam(const PMTObject *pmt);
5968 bool useExternalCam();
5969 void stopExternalCam();
5970+ void ExternalCamSoftkeyLoop();
5971+ static void *ExternalCamSoftkeyHelper(void*self);
5972
5973 };
5974
5975Index: libs/libmythtv/FFdecsa/logic/Makefile
5976===================================================================
5977--- /dev/null 1970-01-01 00:00:00.000000000 +0000
5978+++ libs/libmythtv/FFdecsa/logic/Makefile 2006-06-20 17:36:06.000000000 -0400
5979@@ -0,0 +1,10 @@
5980+all: logic
5981+
5982+logic: logic.o
5983+ gcc -o logic logic.o
5984+
5985+logic.o: logic.c
5986+ gcc -O3 -march=athlon-xp -c logic.c
5987+
5988+clean:
5989+ rm logic *.o
5990Index: libs/libmythtv/FFdecsa/ChangeLog
5991===================================================================
5992--- /dev/null 1970-01-01 00:00:00.000000000 +0000
5993+++ libs/libmythtv/FFdecsa/ChangeLog 2006-06-20 17:36:06.000000000 -0400
5994@@ -0,0 +1,206 @@
5995+- created
5996+
5997+- released 0.0.1
5998+
5999+- simplified s, A, B
6000+
6001+- released 0.0.2
6002+
6003+- simplified nxt=
6004+
6005+- released 0.0.3
6006+
6007+- removed commented code
6008+- code formatting
6009+
6010+- released 0.0.4
6011+
6012+- kk now unsigned char
6013+- removed 64 bit ints
6014+
6015+- released 0.0.5
6016+
6017+- created decrypt_2ts
6018+
6019+- released 0.0.6
6020+
6021+- renamed files
6022+- created decrypt_many_ts, removed others
6023+- external interface has 2 functions only: set_cws() and decrypt_many_ts()
6024+- reformatted code
6025+- reimplemented s12,s34,s56,s7
6026+- unsigned char become int for table optimization
6027+
6028+- released 0.0.7
6029+
6030+- optional icc compiler
6031+- kk now 0..55
6032+- decrypt_many_ts really works (no parallelism yet)
6033+- added get_cws() to interface
6034+- created stream.c
6035+- created key_schedule_stream, using iA[] and iB[]
6036+
6037+- released 0.0.8
6038+
6039+- decrypt_many_ts() makes a group, sorts the packets, processes them
6040+- preliminar stream_cypher_group() created
6041+- parallel computing activated
6042+- huge speed increase (+500%) thanks to stream_cypher_group()
6043+
6044+- released 0.0.9
6045+
6046+- block_cypher_group() created (no parallelism yet)
6047+
6048+- released 0.0.10
6049+
6050+- block_cypher_group() has 56 simple iterations
6051+- block_cypher_group() doesn't shift registers anymore
6052+
6053+- released 0.0.11
6054+
6055+- some parallelization on block_cypher_group()
6056+
6057+- released 0.0.12
6058+
6059+- better parallelization of block_cypher_group()
6060+
6061+- released 0.0.13
6062+
6063+- block_cypher() was still called by error when N=23
6064+- speed is now 109Mbit/s on AMD XP2000+ CPU
6065+
6066+- released 0.0.14
6067+
6068+- stream_cypher_group() has a init and normal variant
6069+- A[0]-A[9] instead of A[1]-A[10], same for B
6070+- implemented virtual shift of A and B
6071+- speed is now 117Mbit/s on AMD XP2000+ CPU
6072+
6073+- released 0.0.15
6074+
6075+- better optimization of E and F in the stream cypher
6076+- speed is now 119Mbit/s on AMD XP2000+ CPU
6077+
6078+- released 0.0.16
6079+
6080+- removed some debug overhead
6081+- speed is now 120Mbit/s on AMD XP2000+ CPU
6082+
6083+- released 0.0.17
6084+
6085+- don't move packets with residue anymore
6086+- speed is now 123Mbit/s on AMD XP2000+ CPU
6087+
6088+- released 0.0.18
6089+
6090+- solved alignment problems
6091+- search groupable packets even beyond ungroupable ones
6092+ (more speed in some real world cases)
6093+- created decrypt_many_ts2(), useful with circular buffers
6094+
6095+- released 0.0.19
6096+
6097+- removed old code
6098+
6099+- released 0.0.20
6100+
6101+- partially converted code to size-independent group
6102+- icc doesn't work with optimizations on
6103+
6104+- released 0.1.1
6105+
6106+- merge loops on block_decypher (speed++ gcc, speed-- icc)
6107+- transposition are now functions (speed-- icc)
6108+- icc works again (compiler bug work around?)
6109+
6110+- released 0.1.2
6111+
6112+- better use of COPY8 &co
6113+- better flags for gcc
6114+- removed old code
6115+
6116+- released 0.1.3
6117+
6118+- int and not char in block cypher (speed++++++ gcc, speed-- icc)
6119+
6120+- released 0.1.4
6121+
6122+- group abstraction finally implemented
6123+- support for group width 64
6124+
6125+- released 0.1.5
6126+
6127+- group 64 mmx implemented (speed++ gcc)
6128+
6129+- released 0.1.6
6130+
6131+- more parallelism in block cypher (speed++ gcc)
6132+- transposition before and after block (disabled because of no speed gain yet)
6133+
6134+- released 0.1.7
6135+
6136+- more parallelism in block cypher (speed++ gcc)
6137+- transposition before and after block enabled (speed++ gcc)
6138+- gcc options (unrolled 500) speed gcc++
6139+
6140+- released 0.1.8
6141+
6142+- reworked FFN_ALL_* constants (speed++++ gcc)
6143+
6144+- released 0.1.9
6145+
6146+- transposition in block as inlined functions
6147+- group abstraction working well
6148+
6149+- released 0.1.10
6150+
6151+- group 128 sse implemented, but batch is 64 mmx (not faster than group 64 mmx)
6152+
6153+- released 0.1.11
6154+
6155+- lot of code polishing and dead code elimination
6156+- better and more debug output
6157+
6158+- released 0.1.12
6159+
6160+- name change: FFdecsa
6161+
6162+- released 0.2.0
6163+
6164+- separated test cases
6165+- corrected all group_modes (now called parallel_modes)
6166+- parallel 128 8 char implemented
6167+- parallel 64 long implemented
6168+- parallel 128 2 long implemented
6169+- parallel 128 2 mmx implemented (incredibly slow, the compiler is very confused)
6170+- parallel 128 16 charA implemented (very slow compilation)
6171+- parallel 128 16 char implemented
6172+- renamed softcsa* to FFdecsa*
6173+
6174+- released 0.2.1
6175+
6176+- new external interface (based on ranges)
6177+
6178+- released 0.2.2
6179+
6180+- can be compiled with g++ too
6181+- using g++ the code is 3% faster!
6182+- external interface: function name changing and new functions
6183+- a group of ranges is now called a cluster
6184+- renamed autogenerated files
6185+
6186+- released 0.2.3
6187+
6188+- written docs
6189+- removed unneeded files
6190+- added Copyright and license notes
6191+- reworked "logic"
6192+
6193+- released 0.3.0
6194+
6195+- Makefile reworked
6196+- misc fixes
6197+- added vdr patch
6198+
6199+- released 1.0.0 (public release)
6200+
6201Index: libs/libmythtv/FFdecsa/parallel_064_8char.h
6202===================================================================
6203--- /dev/null 1970-01-01 00:00:00.000000000 +0000
6204+++ libs/libmythtv/FFdecsa/parallel_064_8char.h 2006-06-20 17:36:06.000000000 -0400
6205@@ -0,0 +1,274 @@
6206+/* FFdecsa -- fast decsa algorithm
6207+ *
6208+ * Copyright (C) 2003-2004 fatih89r
6209+ *
6210+ * This program is free software; you can redistribute it and/or modify
6211+ * it under the terms of the GNU General Public License as published by
6212+ * the Free Software Foundation; either version 2 of the License, or
6213+ * (at your option) any later version.
6214+ *
6215+ * This program is distributed in the hope that it will be useful,
6216+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6217+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6218+ * GNU General Public License for more details.
6219+ *
6220+ * You should have received a copy of the GNU General Public License
6221+ * along with this program; if not, write to the Free Software
6222+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
6223+ */
6224+
6225+
6226+struct group_t{
6227+ unsigned char s1,s2,s3,s4,s5,s6,s7,s8;
6228+};
6229+typedef struct group_t group;
6230+
6231+#define GROUP_PARALLELISM 64
6232+
6233+group static inline FF0(){
6234+ group res;
6235+ res.s1=0x0;
6236+ res.s2=0x0;
6237+ res.s3=0x0;
6238+ res.s4=0x0;
6239+ res.s5=0x0;
6240+ res.s6=0x0;
6241+ res.s7=0x0;
6242+ res.s8=0x0;
6243+ return res;
6244+}
6245+
6246+group static inline FF1(){
6247+ group res;
6248+ res.s1=0xff;
6249+ res.s2=0xff;
6250+ res.s3=0xff;
6251+ res.s4=0xff;
6252+ res.s5=0xff;
6253+ res.s6=0xff;
6254+ res.s7=0xff;
6255+ res.s8=0xff;
6256+ return res;
6257+}
6258+
6259+group static inline FFAND(group a,group b){
6260+ group res;
6261+ res.s1=a.s1&b.s1;
6262+ res.s2=a.s2&b.s2;
6263+ res.s3=a.s3&b.s3;
6264+ res.s4=a.s4&b.s4;
6265+ res.s5=a.s5&b.s5;
6266+ res.s6=a.s6&b.s6;
6267+ res.s7=a.s7&b.s7;
6268+ res.s8=a.s8&b.s8;
6269+ return res;
6270+}
6271+
6272+group static inline FFOR(group a,group b){
6273+ group res;
6274+ res.s1=a.s1|b.s1;
6275+ res.s2=a.s2|b.s2;
6276+ res.s3=a.s3|b.s3;
6277+ res.s4=a.s4|b.s4;
6278+ res.s5=a.s5|b.s5;
6279+ res.s6=a.s6|b.s6;
6280+ res.s7=a.s7|b.s7;
6281+ res.s8=a.s8|b.s8;
6282+ return res;
6283+}
6284+
6285+group static inline FFXOR(group a,group b){
6286+ group res;
6287+ res.s1=a.s1^b.s1;
6288+ res.s2=a.s2^b.s2;
6289+ res.s3=a.s3^b.s3;
6290+ res.s4=a.s4^b.s4;
6291+ res.s5=a.s5^b.s5;
6292+ res.s6=a.s6^b.s6;
6293+ res.s7=a.s7^b.s7;
6294+ res.s8=a.s8^b.s8;
6295+ return res;
6296+}
6297+
6298+group static inline FFNOT(group a){
6299+ group res;
6300+ res.s1=~a.s1;
6301+ res.s2=~a.s2;
6302+ res.s3=~a.s3;
6303+ res.s4=~a.s4;
6304+ res.s5=~a.s5;
6305+ res.s6=~a.s6;
6306+ res.s7=~a.s7;
6307+ res.s8=~a.s8;
6308+ return res;
6309+}
6310+
6311+
6312+/* 64 rows of 64 bits */
6313+
6314+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
6315+ *(((int *)tab)+2*g)=*((int *)data);
6316+ *(((int *)tab)+2*g+1)=*(((int *)data)+1);
6317+}
6318+
6319+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
6320+ *((int *)data)=*(((int *)tab)+2*g);
6321+ *(((int *)data)+1)=*(((int *)tab)+2*g+1);
6322+}
6323+
6324+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
6325+ int j;
6326+ for(j=0;j<n;j++){
6327+ *(data+j)^=*(tab+8*g+j);
6328+ }
6329+}
6330+
6331+struct batch_t{
6332+ unsigned char s1,s2,s3,s4,s5,s6,s7,s8;
6333+};
6334+typedef struct batch_t batch;
6335+
6336+#define BYTES_PER_BATCH 8
6337+
6338+batch static inline B_FFAND(batch a,batch b){
6339+ batch res;
6340+ res.s1=a.s1&b.s1;
6341+ res.s2=a.s2&b.s2;
6342+ res.s3=a.s3&b.s3;
6343+ res.s4=a.s4&b.s4;
6344+ res.s5=a.s5&b.s5;
6345+ res.s6=a.s6&b.s6;
6346+ res.s7=a.s7&b.s7;
6347+ res.s8=a.s8&b.s8;
6348+ return res;
6349+}
6350+
6351+batch static inline B_FFOR(batch a,batch b){
6352+ batch res;
6353+ res.s1=a.s1|b.s1;
6354+ res.s2=a.s2|b.s2;
6355+ res.s3=a.s3|b.s3;
6356+ res.s4=a.s4|b.s4;
6357+ res.s5=a.s5|b.s5;
6358+ res.s6=a.s6|b.s6;
6359+ res.s7=a.s7|b.s7;
6360+ res.s8=a.s8|b.s8;
6361+ return res;
6362+}
6363+
6364+batch static inline B_FFXOR(batch a,batch b){
6365+ batch res;
6366+ res.s1=a.s1^b.s1;
6367+ res.s2=a.s2^b.s2;
6368+ res.s3=a.s3^b.s3;
6369+ res.s4=a.s4^b.s4;
6370+ res.s5=a.s5^b.s5;
6371+ res.s6=a.s6^b.s6;
6372+ res.s7=a.s7^b.s7;
6373+ res.s8=a.s8^b.s8;
6374+ return res;
6375+}
6376+
6377+
6378+batch static inline B_FFN_ALL_29(){
6379+ batch res;
6380+ res.s1=0x29;
6381+ res.s2=0x29;
6382+ res.s3=0x29;
6383+ res.s4=0x29;
6384+ res.s5=0x29;
6385+ res.s6=0x29;
6386+ res.s7=0x29;
6387+ res.s8=0x29;
6388+ return res;
6389+}
6390+batch static inline B_FFN_ALL_02(){
6391+ batch res;
6392+ res.s1=0x02;
6393+ res.s2=0x02;
6394+ res.s3=0x02;
6395+ res.s4=0x02;
6396+ res.s5=0x02;
6397+ res.s6=0x02;
6398+ res.s7=0x02;
6399+ res.s8=0x02;
6400+ return res;
6401+}
6402+batch static inline B_FFN_ALL_04(){
6403+ batch res;
6404+ res.s1=0x04;
6405+ res.s2=0x04;
6406+ res.s3=0x04;
6407+ res.s4=0x04;
6408+ res.s5=0x04;
6409+ res.s6=0x04;
6410+ res.s7=0x04;
6411+ res.s8=0x04;
6412+ return res;
6413+}
6414+batch static inline B_FFN_ALL_10(){
6415+ batch res;
6416+ res.s1=0x10;
6417+ res.s2=0x10;
6418+ res.s3=0x10;
6419+ res.s4=0x10;
6420+ res.s5=0x10;
6421+ res.s6=0x10;
6422+ res.s7=0x10;
6423+ res.s8=0x10;
6424+ return res;
6425+}
6426+batch static inline B_FFN_ALL_40(){
6427+ batch res;
6428+ res.s1=0x40;
6429+ res.s2=0x40;
6430+ res.s3=0x40;
6431+ res.s4=0x40;
6432+ res.s5=0x40;
6433+ res.s6=0x40;
6434+ res.s7=0x40;
6435+ res.s8=0x40;
6436+ return res;
6437+}
6438+batch static inline B_FFN_ALL_80(){
6439+ batch res;
6440+ res.s1=0x80;
6441+ res.s2=0x80;
6442+ res.s3=0x80;
6443+ res.s4=0x80;
6444+ res.s5=0x80;
6445+ res.s6=0x80;
6446+ res.s7=0x80;
6447+ res.s8=0x80;
6448+ return res;
6449+}
6450+
6451+batch static inline B_FFSH8L(batch a,int n){
6452+ batch res;
6453+ res.s1=a.s1<<n;
6454+ res.s2=a.s2<<n;
6455+ res.s3=a.s3<<n;
6456+ res.s4=a.s4<<n;
6457+ res.s5=a.s5<<n;
6458+ res.s6=a.s6<<n;
6459+ res.s7=a.s7<<n;
6460+ res.s8=a.s8<<n;
6461+ return res;
6462+}
6463+
6464+batch static inline B_FFSH8R(batch a,int n){
6465+ batch res;
6466+ res.s1=a.s1>>n;
6467+ res.s2=a.s2>>n;
6468+ res.s3=a.s3>>n;
6469+ res.s4=a.s4>>n;
6470+ res.s5=a.s5>>n;
6471+ res.s6=a.s6>>n;
6472+ res.s7=a.s7>>n;
6473+ res.s8=a.s8>>n;
6474+ return res;
6475+}
6476+
6477+
6478+void static inline M_EMPTY(void){
6479+}
6480Index: libs/libmythtv/FFdecsa/docs/how_to_release.txt
6481===================================================================
6482--- /dev/null 1970-01-01 00:00:00.000000000 +0000
6483+++ libs/libmythtv/FFdecsa/docs/how_to_release.txt 2006-06-20 17:36:06.000000000 -0400
6484@@ -0,0 +1,21 @@
6485+-------
6486+FFdecsa
6487+-------
6488+
6489+Please use the name of the release you're basing on as a base name and
6490+add your suffix.
6491+
6492+For example if john modifies
6493+ FFdecsa-1.0.0
6494+he should release
6495+ FFdecsa-1.0.0-john_0.3
6496+or
6497+ FFdecsa-1.0.0-john_0.4
6498+
6499+If paul modifies john's version the correct name would be like
6500+ FFdecsa-1.0.0-john_0.4-paul_0.1
6501+
6502+This is to avoid many different versions with random version numbers, as
6503+development is not centralized.
6504+
6505+Thank you.
6506Index: libs/libmythtv/FFdecsa/parallel_064_long.h
6507===================================================================
6508--- /dev/null 1970-01-01 00:00:00.000000000 +0000
6509+++ libs/libmythtv/FFdecsa/parallel_064_long.h 2006-06-20 17:36:06.000000000 -0400
6510@@ -0,0 +1,56 @@
6511+/* FFdecsa -- fast decsa algorithm
6512+ *
6513+ * Copyright (C) 2003-2004 fatih89r
6514+ *
6515+ * This program is free software; you can redistribute it and/or modify
6516+ * it under the terms of the GNU General Public License as published by
6517+ * the Free Software Foundation; either version 2 of the License, or
6518+ * (at your option) any later version.
6519+ *
6520+ * This program is distributed in the hope that it will be useful,
6521+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6522+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6523+ * GNU General Public License for more details.
6524+ *
6525+ * You should have received a copy of the GNU General Public License
6526+ * along with this program; if not, write to the Free Software
6527+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
6528+ */
6529+
6530+#include "parallel_std_def.h"
6531+
6532+typedef unsigned long long group;
6533+#define GROUP_PARALLELISM 64
6534+#define FF0() 0x0ULL
6535+#define FF1() 0xffffffffffffffffULL
6536+
6537+/* 64 rows of 64 bits */
6538+
6539+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
6540+ *(((int *)tab)+2*g)=*((int *)data);
6541+ *(((int *)tab)+2*g+1)=*(((int *)data)+1);
6542+}
6543+
6544+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
6545+ *((int *)data)=*(((int *)tab)+2*g);
6546+ *(((int *)data)+1)=*(((int *)tab)+2*g+1);
6547+}
6548+
6549+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
6550+ int j;
6551+ for(j=0;j<n;j++){
6552+ *(data+j)^=*(tab+8*g+j);
6553+ }
6554+}
6555+
6556+typedef unsigned long long int batch;
6557+#define BYTES_PER_BATCH 8
6558+#define B_FFN_ALL_29() 0x2929292929292929ULL
6559+#define B_FFN_ALL_02() 0x0202020202020202ULL
6560+#define B_FFN_ALL_04() 0x0404040404040404ULL
6561+#define B_FFN_ALL_10() 0x1010101010101010ULL
6562+#define B_FFN_ALL_40() 0x4040404040404040ULL
6563+#define B_FFN_ALL_80() 0x8080808080808080ULL
6564+
6565+#define M_EMPTY()
6566+
6567Index: libs/libmythtv/FFdecsa/parallel_128_2long.h
6568===================================================================
6569--- /dev/null 1970-01-01 00:00:00.000000000 +0000
6570+++ libs/libmythtv/FFdecsa/parallel_128_2long.h 2006-06-20 17:36:06.000000000 -0400
6571@@ -0,0 +1,175 @@
6572+/* FFdecsa -- fast decsa algorithm
6573+ *
6574+ * Copyright (C) 2003-2004 fatih89r
6575+ *
6576+ * This program is free software; you can redistribute it and/or modify
6577+ * it under the terms of the GNU General Public License as published by
6578+ * the Free Software Foundation; either version 2 of the License, or
6579+ * (at your option) any later version.
6580+ *
6581+ * This program is distributed in the hope that it will be useful,
6582+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6583+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6584+ * GNU General Public License for more details.
6585+ *
6586+ * You should have received a copy of the GNU General Public License
6587+ * along with this program; if not, write to the Free Software
6588+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
6589+ */
6590+
6591+
6592+struct group_t{
6593+ unsigned long long int s1;
6594+ unsigned long long int s2;
6595+};
6596+typedef struct group_t group;
6597+
6598+#define GROUP_PARALLELISM 128
6599+
6600+group static inline FF0(){
6601+ group res;
6602+ res.s1=0x0ULL;
6603+ res.s2=0x0ULL;
6604+ return res;
6605+}
6606+
6607+group static inline FF1(){
6608+ group res;
6609+ res.s1=0xffffffffffffffffULL;
6610+ res.s2=0xffffffffffffffffULL;
6611+ return res;
6612+}
6613+
6614+group static inline FFAND(group a,group b){
6615+ group res;
6616+ res.s1=a.s1&b.s1;
6617+ res.s2=a.s2&b.s2;
6618+ return res;
6619+}
6620+
6621+group static inline FFOR(group a,group b){
6622+ group res;
6623+ res.s1=a.s1|b.s1;
6624+ res.s2=a.s2|b.s2;
6625+ return res;
6626+}
6627+
6628+group static inline FFXOR(group a,group b){
6629+ group res;
6630+ res.s1=a.s1^b.s1;
6631+ res.s2=a.s2^b.s2;
6632+ return res;
6633+}
6634+
6635+group static inline FFNOT(group a){
6636+ group res;
6637+ res.s1=~a.s1;
6638+ res.s2=~a.s2;
6639+ return res;
6640+}
6641+
6642+
6643+/* 64 rows of 128 bits */
6644+
6645+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
6646+ *(((int *)tab)+2*g)=*((int *)data);
6647+ *(((int *)tab)+2*g+1)=*(((int *)data)+1);
6648+}
6649+
6650+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
6651+ *((int *)data)=*(((int *)tab)+2*g);
6652+ *(((int *)data)+1)=*(((int *)tab)+2*g+1);
6653+}
6654+
6655+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
6656+ int j;
6657+ for(j=0;j<n;j++){
6658+ *(data+j)^=*(tab+8*g+j);
6659+ }
6660+}
6661+
6662+
6663+struct batch_t{
6664+ unsigned long long int s1;
6665+ unsigned long long int s2;
6666+};
6667+typedef struct batch_t batch;
6668+
6669+#define BYTES_PER_BATCH 16
6670+
6671+batch static inline B_FFAND(batch a,batch b){
6672+ batch res;
6673+ res.s1=a.s1&b.s1;
6674+ res.s2=a.s2&b.s2;
6675+ return res;
6676+}
6677+
6678+batch static inline B_FFOR(batch a,batch b){
6679+ batch res;
6680+ res.s1=a.s1|b.s1;
6681+ res.s2=a.s2|b.s2;
6682+ return res;
6683+}
6684+
6685+batch static inline B_FFXOR(batch a,batch b){
6686+ batch res;
6687+ res.s1=a.s1^b.s1;
6688+ res.s2=a.s2^b.s2;
6689+ return res;
6690+}
6691+
6692+
6693+batch static inline B_FFN_ALL_29(){
6694+ batch res;
6695+ res.s1=0x2929292929292929ULL;
6696+ res.s2=0x2929292929292929ULL;
6697+ return res;
6698+}
6699+
6700+batch static inline B_FFN_ALL_02(){
6701+ batch res;
6702+ res.s1=0x0202020202020202ULL;
6703+ res.s2=0x0202020202020202ULL;
6704+ return res;
6705+}
6706+batch static inline B_FFN_ALL_04(){
6707+ batch res;
6708+ res.s1=0x0404040404040404ULL;
6709+ res.s2=0x0404040404040404ULL;
6710+ return res;
6711+}
6712+batch static inline B_FFN_ALL_10(){
6713+ batch res;
6714+ res.s1=0x1010101010101010ULL;
6715+ res.s2=0x1010101010101010ULL;
6716+ return res;
6717+}
6718+batch static inline B_FFN_ALL_40(){
6719+ batch res;
6720+ res.s1=0x4040404040404040ULL;
6721+ res.s2=0x4040404040404040ULL;
6722+ return res;
6723+}
6724+batch static inline B_FFN_ALL_80(){
6725+ batch res;
6726+ res.s1=0x8080808080808080ULL;
6727+ res.s2=0x8080808080808080ULL;
6728+ return res;
6729+}
6730+batch static inline B_FFSH8L(batch a,int n){
6731+ batch res;
6732+ res.s1=a.s1<<n;
6733+ res.s2=a.s2<<n;
6734+ return res;
6735+}
6736+
6737+batch static inline B_FFSH8R(batch a,int n){
6738+ batch res;
6739+ res.s1=a.s1>>n;
6740+ res.s2=a.s2>>n;
6741+ return res;
6742+}
6743+
6744+
6745+void static inline M_EMPTY(void){
6746+}
6747Index: libs/libmythtv/FFdecsa/parallel_std_def.h
6748===================================================================
6749--- /dev/null 1970-01-01 00:00:00.000000000 +0000
6750+++ libs/libmythtv/FFdecsa/parallel_std_def.h 2006-06-20 17:36:06.000000000 -0400
6751@@ -0,0 +1,29 @@
6752+/* FFdecsa -- fast decsa algorithm
6753+ *
6754+ * Copyright (C) 2003-2004 fatih89r
6755+ *
6756+ * This program is free software; you can redistribute it and/or modify
6757+ * it under the terms of the GNU General Public License as published by
6758+ * the Free Software Foundation; either version 2 of the License, or
6759+ * (at your option) any later version.
6760+ *
6761+ * This program is distributed in the hope that it will be useful,
6762+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6763+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6764+ * GNU General Public License for more details.
6765+ *
6766+ * You should have received a copy of the GNU General Public License
6767+ * along with this program; if not, write to the Free Software
6768+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
6769+ */
6770+
6771+#define FFXOR(a,b) ((a)^(b))
6772+#define FFAND(a,b) ((a)&(b))
6773+#define FFOR(a,b) ((a)|(b))
6774+#define FFNOT(a) (~(a))
6775+
6776+#define B_FFAND(a,b) ((a)&(b))
6777+#define B_FFOR(a,b) ((a)|(b))
6778+#define B_FFXOR(a,b) ((a)^(b))
6779+#define B_FFSH8L(a,n) ((a)<<(n))
6780+#define B_FFSH8R(a,n) ((a)>>(n))
6781Index: libs/libmythtv/FFdecsa/docs/technical_background.txt
6782===================================================================
6783--- /dev/null 1970-01-01 00:00:00.000000000 +0000
6784+++ libs/libmythtv/FFdecsa/docs/technical_background.txt 2006-06-20 17:36:06.000000000 -0400
6785@@ -0,0 +1,341 @@
6786+-------
6787+FFdecsa
6788+-------
6789+
6790+This doc is for people who looked into the source code and found it
6791+difficult to believe that this is a decsa algorithm, as it appears
6792+completely different from other decsa implementations.
6793+
6794+It appears different because it is different. Being different is what
6795+enables it to be a lot faster than all the others (currently it has more
6796+than 800% the speed of the best version I was able to find)
6797+
6798+The csa algo was designed to be run in hardware, but people are now
6799+running it in software.
6800+
6801+Hardware has data lines carrying bits and functional blocks doing
6802+calculations (logic operations, adders, shifters, table lookup, ...),
6803+software instead uses memory to contain data values and executes a
6804+sequence of instructions to transform the values. As a consequence,
6805+writing a software implementation of a hardware algorithm can be
6806+inefficient.
6807+
6808+For example, if you have 32 data lines, you can permutate the bits with
6809+zero cost in hardware (you just permute the physical traces), but if you
6810+have the bits in a 32 bit variable you have to use 32 "and" operations
6811+with 32 different masks, 32 shifts and 31 "or" operations (if you
6812+suggest using "if"s testing the bits one by one you know nothing about
6813+how jump prediction works in modern processors).
6814+
6815+So the approach is *emulating the hardware*.
6816+
6817+Then there are some additional cool tricks.
6818+
6819+TRICK NUMBER 0: emulate the hardware
6820+------------------------------------
6821+We will work on bits one by one, that is a 4 bit word is now four
6822+variables. In this way we revert complex software operations into
6823+hardware emulation:
6824+
6825+ software hardware
6826+ -------------------------------------------
6827+ copy values copy values
6828+ logic op logic op
6829+ (bit permut.) ands+shifts+ors copy values
6830+ additions logic op emulating adders
6831+ (comparisons) if logic op selecting one of the two results
6832+ lookup tables logic op synthetizing a ROM (*)
6833+
6834+(*) sometimes lookup tables can be converted to logic expressions
6835+
6836+The sbox in the stream cypher have been converted to efficient logic
6837+operations using a custom written software (look into logic directory)
6838+and is responsible for a lot of speed increase. Maybe there exists a
6839+slightly better way to express the sbox as logical expressions, but it
6840+would be a minuscule improvement. The sbox in the block cypher can't be
6841+converted to efficient logic operations (8 bits of inputs are just too
6842+much) and is implemeted with a traditional lookup in an array.
6843+
6844+But there is a problem; if we want to process bits, but our external
6845+input and output wants bytes. We need conversion routines. Conversion
6846+routines are similar to the awful permutations we described before, so
6847+this has to be done efficiently someway.
6848+
6849+
6850+TRICK NUMBER 1: virtual shift registers
6851+---------------------------------------
6852+Shift registers are normally implemented by moving all data around.
6853+Better leave the data in the same memory locations and redefine where
6854+the start of the register is (updating a pointer). That is called
6855+virtual shift register.
6856+
6857+
6858+TRICK NUMBER 2: parallel bitslice
6859+---------------------------------
6860+Implementing the algorithm as described in tricks 1 and 2 give us about
6861+15% of the speed of a traditional implementation. This happens because
6862+we work on only one bit, even if our CPU is 32 bit wide. But *we can
6863+process 32 different packets at the same time*. This is called
6864+"bitslice" method. It can be done only if the program flow is not
6865+dependent of the data (if, while,...). Luckily this is true.
6866+Things like
6867+ if(a){
6868+ b=c&d;
6869+ }
6870+ else{
6871+ b=e&f;
6872+ }
6873+can be coded as (think of how hardware would implement this)
6874+ b1=c&d;
6875+ b2=e&f;
6876+ b=b2^(a&(b1^b2));
6877+and things like
6878+ if(a){
6879+ b=c&d
6880+ }
6881+can be transformed in the same way, as they may be written as
6882+ if(a){
6883+ b=c&d
6884+ }
6885+ else{
6886+ b=b;
6887+ }
6888+It could look wasteful, but it is not; and destroys data dependency.
6889+
6890+Our codes takes the same time as before, but produces 32 results, so
6891+speed is now 480% the speed of a traditional implementation.
6892+
6893+
6894+TRICK NUMBER 3: multimedia instructions
6895+---------------------------------------
6896+If our CPU is 32 bit but it can also process larger blocks of data
6897+efficiently (multimedia instructions), we can use them. We only need
6898+logic ops and these are typically available.
6899+
6900+We can use MMX and work on 64 packets, or SSE and work on 128 packets.
6901+The speed doesn't automatically double going from 32 to 64 because the
6902+integer registers of the processor are normally faster. However, some
6903+speed is gained in this way.
6904+
6905+Multimedia instructions are often used by writing assembler by hand, but
6906+compilers are very good in doing register allocation, loop unrolling and
6907+instruction scheduling, so it is better to write the code in C and use
6908+native multimedia data types (intrinsics).
6909+
6910+Depending on number of available registers, execution latency, number of
6911+execution units in the CPU, it may be good to process more than one data
6912+block at the same time, for example 2 64bit MMX values. In this case we
6913+work on 128 bits by simulating a 128 bit op with two consecutive 64 bit
6914+op. This may or may not help (apparently not because x86 architecture
6915+has a small number of registers).
6916+
6917+We can also try working on 96 bit, pairing a MMX and an int op, or 192
6918+bit by using MMX and SSE. While this is doable in theory and could
6919+exploit different execution units in the CPU, speed doesn't improve
6920+(because of cache line handling problems inside the CPU, maybe).
6921+
6922+Besides int, MMX, SSE, we can use long long int (64 bit) and, why not,
6923+unsigned char.
6924+
6925+Using groups of unsigned chars (8 or 16) could give the compiler an
6926+opportunity to insert multimedia instructions automatically. For
6927+example, icc can use one MMX istruction to do
6928+ unsigned char a[8],b[8],c[8];
6929+ for(i=0;i<8;i++){
6930+ a[i]=b[i]&c[i];
6931+ }
6932+Some compilers (like icc) are efficient in this case, but using
6933+intrinsics manually is generally faster.
6934+
6935+All these experiments can be easily done if the code is written in a way
6936+which abstracts the data type used. This is not easy but doable, all the
6937+operations on data become (inlined) function calls or preprocessor
6938+macros. Good compilers are able to simplify all the abstraction at
6939+compile time and generate perfect code (gcc is great).
6940+
6941+The data abstraction used in the code is called "group".
6942+
6943+
6944+TRICK NUMBER 4: parallel byteslice
6945+----------------------------------
6946+The bitslice method works wonderfully on the stream cypher, but can't be
6947+applied to the block cypher because of the evil big look up table.
6948+
6949+As we have to convert input data from normal to bitslice before starting
6950+processing and from bitslice to normal before output, we convert the
6951+stream cypher output to normal before the block calculations and do the
6952+block stage in a traditional way.
6953+
6954+There are some xors in the block cypher; so we arrange bytes from
6955+different packets side by side and use multimedia instructions to work
6956+on many bytes at the same time. This is not exactly bitslice, maybe it
6957+is called byteslice. The conversion routines are similar (just a bit
6958+simpler).
6959+
6960+The data type we use to do this in the code is called "batch".
6961+
6962+The virtual shift register described in trick number 2 is useful too.
6963+
6964+The look up table is the only thing which is done serially one byte at a
6965+time. Luckily if we do it on 32 or 64 bytes the loop is heavily
6966+unrolled, and the compiler and the CPU manage to get a good speed
6967+because there is little dependency between instructions.
6968+
6969+
6970+TRICK NUMBER 5: efficient bit permutation
6971+-----------------------------------------
6972+The block cypher has a bit permutation part. As we are not in a bit
6973+sliced form at that point, permuting bits in a byte takes 8 masks, 8
6974+and, 7 or; but three bits move in the same direction, so we make it with
6975+6 masks, 6 and, 5 or. Batch processing through multimedia instructions
6976+is applicable too.
6977+
6978+
6979+TRICK NUMBER 6: efficient normal<->slice conversion
6980+---------------------------------------------------
6981+The bitslice<->normal conversion routines are a sort of transposition
6982+operation, that is you have bits in rows and want them in columns. This
6983+can be done efficiently. For example, transposition of 8 bytes (matrix
6984+of 8x8=64 bits) can be done this way (we want to exchange bit[i][j] with
6985+bit[j][i] and we assume bit 0 is the MSB in the byte):
6986+
6987+ // untested code, may be bugged
6988+ unsigned char a[8];
6989+ unsigned char b[8];
6990+ for(i=0;i<8;i++) b[i]=0;
6991+ for(i=0;i<8;i++){
6992+ for(j=0;j<8;j++){
6993+ b[i]|=((a[j]>>(7-i)&1))<<(7-j);
6994+ }
6995+ }
6996+
6997+but it is slow (128 shifts, 64 and, 64 or), or
6998+
6999+ // untested code, may be bugged
7000+ unsigned char a[8];
7001+ unsigned char b[8];
7002+ for(i=0;i<8;i++) b[i]=0;
7003+ for(i=0;i<8;i++){
7004+ for(j=0;j<8;j++){
7005+ if(a[j]&(1<<(7-i))) b[i]|=1<<(7-j);
7006+ }
7007+ }
7008+
7009+but is very very slow (128 shifts, 64 and, 64 or, 128 unpredictable
7010+if!), or using a>>=1 and b<<=1, which gains you nothing, or
7011+
7012+ // untested code, may be bugged
7013+ unsigned char a[8];
7014+ unsigned char b[8];
7015+ unsigned char top,bottom;
7016+ for(j=0;j<1;j++){
7017+ for(i=0;i<4;i++){
7018+ top= a[8*j+i];
7019+ bottom=a[8*j+4+i];
7020+ a[8*j+i]= (top&0xf0) |((bottom&0xf0)>>4);
7021+ a[8*j+4+i]=((top&0x0f)<<4)| (bottom&0x0f);
7022+ }
7023+ }
7024+ for(j=0;j<2;j++){
7025+ for(i=0;i<2;i++){
7026+ top= a[4*j+i];
7027+ bottom=a[4*j+2+i];
7028+ a[4*j+i] = (top&0xcc) |((bottom&0xcc)>>2);
7029+ a[4*j+2+i]=((top&0x33)<<2)| (bottom&0x33);
7030+ }
7031+ }
7032+ for(j=0;j<4;j++){
7033+ for(i=0;i<1;i++){
7034+ top= a[2*j+i];
7035+ bottom=a[2*j+1+i];
7036+ a[2*j+i] = (top&0xaa) |((bottom&0xaa)>>1);
7037+ a[2*j+1+i]=((top&0x55)<<1)| (bottom&0x55);
7038+ }
7039+ }
7040+ for(i=0;i<8;i++) b[i]=a[i]; //easy to integrate into one of the stages above
7041+
7042+which is very fast (24 shifts, 48 and, 24 or) and has redundant loops
7043+and address calculations which will be optimized away by the compiler.
7044+It can be written as 3 nested loops but it becomes less readable and
7045+makes it difficult to have results in b without an extra copy. The
7046+compiler always unrolls heavily.
7047+
7048+The gain is much bigger when operating with 32 bit or 64 bit values (we
7049+are going from N^2 to Nlog(N)). This method is used for rectangular
7050+matrixes too (they have to be seen as square matrixes side by side).
7051+Warning: this code is not *endian independent* if you use ints to work
7052+on 4 bytes. Running it on a big endian processor will give you a
7053+different and strange kind of bit rotation if you don't modify masks and
7054+shifts.
7055+
7056+This is done in the code using int or long long int. It should be
7057+possible to use MMX instead of long long int and it could be faster, but
7058+this code doesn't cost a great fraction of the total time. There are
7059+problems with the shifts, as multimedia instructions do not have all
7060+possible kind of shift we need (SSE has none!).
7061+
7062+
7063+TRICK NUMBER 7: try hard to process packets together
7064+----------------------------------------------------
7065+As we are able to process many packets together, we have to avoid
7066+running with many slots empty. Processing one packet or 64 packets takes
7067+the same time if the internal parallelism is 64! So we try hard to
7068+aggregate packets that can be processed together; for simplicity reasons
7069+we don't mix packets with even and odd parity (different keys), even if
7070+it should be doable with a little effort. Sometimes the transition from
7071+even to odd parity and viceversa is not sharp, but there are sequences
7072+like EEEEEOEEOEEOOOO. We try to group all the E together even if there
7073+are O between them. This out-of-order processing complicates the
7074+interface to the applications a bit but saves us three or four runs with
7075+many empty slots.
7076+
7077+We have also logic to process together packets with a different size of
7078+the payload, which is not always 184 bytes. This involves sorting the
7079+packets by size before processing and careful operation of the 23
7080+iteration loop to exclude some packets from the calculations. It is not
7081+CPU heavy.
7082+
7083+Packets with payload <8 bytes are identical before and after decryption
7084+(!), so we skip them without using a slot. (according to DVB specs these
7085+kind of packets shouldn't happen, but they are used in the real world).
7086+
7087+
7088+TRICK NUMBER 8: try to avoid doing the same thing many times
7089+------------------------------------------------------------
7090+Some calculations related to keys are only done when the keys are set,
7091+then all the values depending on keys are stored in a convenient form
7092+and used everytime we convert a group of packets.
7093+
7094+
7095+TRICK NUMBER 9: compiler
7096+------------------------
7097+
7098+Compilers have a lot of optimization options. I used -march to target my
7099+CPU and played with unsual options. In particular
7100+ "--param max-unrolled-insns=500"
7101+does a good job on the tricky table lookup in the block cypher. Bigger
7102+values unroll too much somewhere and loose speed. All the testing has
7103+been done on an AthlonXP CPU with a specific version of gcc
7104+ gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
7105+Other combinations of CPU and compiler can give different speeds. If the
7106+compiler is not able to simplify the group and batch structures and
7107+stores everything in memory instead of registers, performance will be
7108+low.
7109+
7110+Absolutely use a good compiler!
7111+
7112+Note: the same code can be compiled in C or C++ mode. g++ gives a 3%
7113+speed increase compared to gcc (I suppose some stricter constraint on
7114+array and pointers in C++ mode gives the optimizer more freedom).
7115+
7116+
7117+TRICK NUMBER a: a lot of brain work
7118+-----------------------------------
7119+The code started as very slow but correct implementation and was then
7120+tweaked for months with a lot of experimentation and by adding all the
7121+good ideas one after another to achieve little steps toward the best
7122+speed possible, while continously testing that nothing had been broken.
7123+
7124+Many hours were spent on this code.
7125+
7126+Enjoy the result.
7127Index: libs/libmythtv/dvbchannel.cpp
7128===================================================================
7129--- libs/libmythtv/dvbchannel.cpp.orig 2006-06-11 12:41:25.000000000 -0400
7130+++ libs/libmythtv/dvbchannel.cpp 2006-06-20 17:36:06.000000000 -0400
7131@@ -658,6 +658,9 @@
7132
7133 retune_tuning = channel.tuning;
7134
7135+ GENERAL(QString("invalidating DeCSA for card %1").arg(cardnum));
7136+ DVBRecorder::UpdateDeCSAKeys(cardnum, 'I', 0, NULL, 0);
7137+
7138 if (fd_frontend < 0)
7139 {
7140 ERROR("DVBChannel::Tune: Card not open!");
7141Index: libs/libmythtv/FFdecsa/parallel_128_16charA.h
7142===================================================================
7143--- /dev/null 1970-01-01 00:00:00.000000000 +0000
7144+++ libs/libmythtv/FFdecsa/parallel_128_16charA.h 2006-06-20 17:36:06.000000000 -0400
7145@@ -0,0 +1,172 @@
7146+/* FFdecsa -- fast decsa algorithm
7147+ *
7148+ * Copyright (C) 2003-2004 fatih89r
7149+ *
7150+ * This program is free software; you can redistribute it and/or modify
7151+ * it under the terms of the GNU General Public License as published by
7152+ * the Free Software Foundation; either version 2 of the License, or
7153+ * (at your option) any later version.
7154+ *
7155+ * This program is distributed in the hope that it will be useful,
7156+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7157+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7158+ * GNU General Public License for more details.
7159+ *
7160+ * You should have received a copy of the GNU General Public License
7161+ * along with this program; if not, write to the Free Software
7162+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
7163+ */
7164+
7165+
7166+struct group_t{
7167+ unsigned char s1[16];
7168+};
7169+typedef struct group_t group;
7170+
7171+#define GROUP_PARALLELISM 128
7172+
7173+group static inline FF0(){
7174+ group res;
7175+ int i;
7176+ for(i=0;i<16;i++) res.s1[i]=0x0;
7177+ return res;
7178+}
7179+
7180+group static inline FF1(){
7181+ group res;
7182+ int i;
7183+ for(i=0;i<16;i++) res.s1[i]=0xff;
7184+ return res;
7185+}
7186+
7187+group static inline FFAND(group a,group b){
7188+ group res;
7189+ int i;
7190+ for(i=0;i<16;i++) res.s1[i]=a.s1[i]&b.s1[i];
7191+ return res;
7192+}
7193+
7194+group static inline FFOR(group a,group b){
7195+ group res;
7196+ int i;
7197+ for(i=0;i<16;i++) res.s1[i]=a.s1[i]|b.s1[i];
7198+ return res;
7199+}
7200+
7201+group static inline FFXOR(group a,group b){
7202+ group res;
7203+ int i;
7204+ for(i=0;i<16;i++) res.s1[i]=a.s1[i]^b.s1[i];
7205+ return res;
7206+}
7207+
7208+group static inline FFNOT(group a){
7209+ group res;
7210+ int i;
7211+ for(i=0;i<16;i++) res.s1[i]=~a.s1[i];
7212+ return res;
7213+}
7214+
7215+
7216+/* 64 rows of 128 bits */
7217+
7218+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
7219+ *(((int *)tab)+2*g)=*((int *)data);
7220+ *(((int *)tab)+2*g+1)=*(((int *)data)+1);
7221+}
7222+
7223+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
7224+ *((int *)data)=*(((int *)tab)+2*g);
7225+ *(((int *)data)+1)=*(((int *)tab)+2*g+1);
7226+}
7227+
7228+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
7229+ int j;
7230+ for(j=0;j<n;j++){
7231+ *(data+j)^=*(tab+8*g+j);
7232+ }
7233+}
7234+
7235+
7236+struct batch_t{
7237+ unsigned char s1[16];
7238+};
7239+typedef struct batch_t batch;
7240+
7241+#define BYTES_PER_BATCH 16
7242+
7243+batch static inline B_FFAND(batch a,batch b){
7244+ batch res;
7245+ int i;
7246+ for(i=0;i<16;i++) res.s1[i]=a.s1[i]&b.s1[i];
7247+ return res;
7248+}
7249+
7250+batch static inline B_FFOR(batch a,batch b){
7251+ batch res;
7252+ int i;
7253+ for(i=0;i<16;i++) res.s1[i]=a.s1[i]|b.s1[i];
7254+ return res;
7255+}
7256+
7257+batch static inline B_FFXOR(batch a,batch b){
7258+ batch res;
7259+ int i;
7260+ for(i=0;i<16;i++) res.s1[i]=a.s1[i]^b.s1[i];
7261+ return res;
7262+}
7263+
7264+
7265+batch static inline B_FFN_ALL_29(){
7266+ batch res;
7267+ int i;
7268+ for(i=0;i<16;i++) res.s1[i]=0x29;
7269+ return res;
7270+}
7271+batch static inline B_FFN_ALL_02(){
7272+ batch res;
7273+ int i;
7274+ for(i=0;i<16;i++) res.s1[i]=0x02;
7275+ return res;
7276+}
7277+batch static inline B_FFN_ALL_04(){
7278+ batch res;
7279+ int i;
7280+ for(i=0;i<16;i++) res.s1[i]=0x04;
7281+ return res;
7282+}
7283+batch static inline B_FFN_ALL_10(){
7284+ batch res;
7285+ int i;
7286+ for(i=0;i<16;i++) res.s1[i]=0x10;
7287+ return res;
7288+}
7289+batch static inline B_FFN_ALL_40(){
7290+ batch res;
7291+ int i;
7292+ for(i=0;i<16;i++) res.s1[i]=0x40;
7293+ return res;
7294+}
7295+batch static inline B_FFN_ALL_80(){
7296+ batch res;
7297+ int i;
7298+ for(i=0;i<16;i++) res.s1[i]=0x80;
7299+ return res;
7300+}
7301+
7302+batch static inline B_FFSH8L(batch a,int n){
7303+ batch res;
7304+ int i;
7305+ for(i=0;i<16;i++) res.s1[i]=a.s1[i]<<n;
7306+ return res;
7307+}
7308+
7309+batch static inline B_FFSH8R(batch a,int n){
7310+ batch res;
7311+ int i;
7312+ for(i=0;i<16;i++) res.s1[i]=a.s1[i]>>n;
7313+ return res;
7314+}
7315+
7316+void static inline M_EMPTY(void){
7317+}
7318Index: libs/libmythtv/FFdecsa/parallel_generic.h
7319===================================================================
7320--- /dev/null 1970-01-01 00:00:00.000000000 +0000
7321+++ libs/libmythtv/FFdecsa/parallel_generic.h 2006-06-20 17:36:06.000000000 -0400
7322@@ -0,0 +1,102 @@
7323+/* FFdecsa -- fast decsa algorithm
7324+ *
7325+ * Copyright (C) 2003-2004 fatih89r
7326+ *
7327+ * This program is free software; you can redistribute it and/or modify
7328+ * it under the terms of the GNU General Public License as published by
7329+ * the Free Software Foundation; either version 2 of the License, or
7330+ * (at your option) any later version.
7331+ *
7332+ * This program is distributed in the hope that it will be useful,
7333+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7334+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7335+ * GNU General Public License for more details.
7336+ *
7337+ * You should have received a copy of the GNU General Public License
7338+ * along with this program; if not, write to the Free Software
7339+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
7340+ */
7341+
7342+
7343+
7344+#if 0
7345+//// generics
7346+#define COPY4BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \
7347+ *pd = *ps; }while(0)
7348+#define COPY8BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
7349+ *pd = *ps; }while(0)
7350+#define COPY16BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
7351+ *pd = *ps; \
7352+ *(pd+1) = *(ps+1); }while(0)
7353+#define COPY32BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
7354+ *pd = *ps; \
7355+ *(pd+1) = *(ps+1) \
7356+ *(pd+2) = *(ps+2) \
7357+ *(pd+3) = *(ps+3); }while(0)
7358+#define XOR4BY(d,s1,s2) do{ int *pd=(int *)(d), *ps1=(int *)(s1), *ps2=(int *)(s2); \
7359+ *pd = *ps1 ^ *ps2; }while(0)
7360+#define XOR8BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \
7361+ *pd = *ps1 ^ *ps2; }while(0)
7362+#define XOR16BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \
7363+ *pd = *ps1 ^ *ps2; \
7364+ *(pd+8) = *(ps1+8) ^ *(ps2+8); }while(0)
7365+#define XOR32BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \
7366+ *pd = *ps1 ^ *ps2; \
7367+ *(pd+1) = *(ps1+1) ^ *(ps2+1); \
7368+ *(pd+2) = *(ps1+2) ^ *(ps2+2); \
7369+ *(pd+3) = *(ps1+3) ^ *(ps2+3); }while(0)
7370+#define XOR32BV(d,s1,s2) do{ int *const pd=(int *const)(d), *ps1=(const int *const)(s1), *ps2=(const int *const)(s2); \
7371+ int z; \
7372+ for(z=0;z<8;z++){ \
7373+ pd[z]=ps1[z]^ps2[z]; \
7374+ } \
7375+ }while(0)
7376+#define XOREQ4BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \
7377+ *pd ^= *ps; }while(0)
7378+#define XOREQ8BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
7379+ *pd ^= *ps; }while(0)
7380+#define XOREQ16BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
7381+ *pd ^= *ps; \
7382+ *(pd+1) ^=*(ps+1); }while(0)
7383+#define XOREQ32BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
7384+ *pd ^= *ps; \
7385+ *(pd+1) ^=*(ps+1); \
7386+ *(pd+2) ^=*(ps+2); \
7387+ *(pd+3) ^=*(ps+3); }while(0)
7388+#define XOREQ32BY4(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \
7389+ *pd ^= *ps; \
7390+ *(pd+1) ^=*(ps+1); \
7391+ *(pd+2) ^=*(ps+2); \
7392+ *(pd+3) ^=*(ps+3); \
7393+ *(pd+4) ^=*(ps+4); \
7394+ *(pd+5) ^=*(ps+5); \
7395+ *(pd+6) ^=*(ps+6); \
7396+ *(pd+7) ^=*(ps+7); }while(0)
7397+#define XOREQ32BV(d,s) do{ unsigned char *pd=(unsigned char *)(d), *ps=(unsigned char *)(s); \
7398+ int z; \
7399+ for(z=0;z<32;z++){ \
7400+ pd[z]^=ps[z]; \
7401+ } \
7402+ }while(0)
7403+
7404+#else
7405+#define XOR_4_BY(d,s1,s2) do{ int *pd=(int *)(d), *ps1=(int *)(s1), *ps2=(int *)(s2); \
7406+ *pd = *ps1 ^ *ps2; }while(0)
7407+#define XOR_8_BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \
7408+ *pd = *ps1 ^ *ps2; }while(0)
7409+#define XOREQ_4_BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \
7410+ *pd ^= *ps; }while(0)
7411+#define XOREQ_8_BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
7412+ *pd ^= *ps; }while(0)
7413+#define COPY_4_BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \
7414+ *pd = *ps; }while(0)
7415+#define COPY_8_BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
7416+ *pd = *ps; }while(0)
7417+
7418+#define BEST_SPAN 8
7419+#define XOR_BEST_BY(d,s1,s2) do{ XOR_8_BY(d,s1,s2); }while(0);
7420+#define XOREQ_BEST_BY(d,s) do{ XOREQ_8_BY(d,s); }while(0);
7421+#define COPY_BEST_BY(d,s) do{ COPY_8_BY(d,s); }while(0);
7422+
7423+#define END_MM do{ }while(0);
7424+#endif
7425Index: libs/libmythtv/FFdecsa/docs/how_to_use.txt
7426===================================================================
7427--- /dev/null 1970-01-01 00:00:00.000000000 +0000
7428+++ libs/libmythtv/FFdecsa/docs/how_to_use.txt 2006-06-20 17:36:06.000000000 -0400
7429@@ -0,0 +1,239 @@
7430+-------
7431+FFdecsa
7432+-------
7433+
7434+This code is able to decrypt MPEG TS packets with the CSA algorithm. To
7435+achieve high speed, the decryption core works on many packets at the
7436+same time, so the interface is more complicated than usual decsa
7437+implementations.
7438+
7439+The FFdecsa.h file defines the external interface of this code.
7440+
7441+Basically:
7442+
7443+1) you use get_suggested_cluster_size to know the optimal number of
7444+packets you have to pass for decryption
7445+
7446+2) you use set_control_words to set the decryption keys
7447+
7448+3) you use decrypt_packets to do the actual decryption
7449+
7450+You don't need to always use set_control_words before decrypt_packets,
7451+if keys aren't changed.
7452+
7453+
7454+The decrypt_packets function call decrypts many packets at the same
7455+time. The interface is complicated because the only design goal was
7456+speed, so it implements zero-copying of packets, out-of-order decryption
7457+and optimal packet aggregation for better parallelism. This part is the
7458+most difficult to understand.
7459+
7460+--- HOW TO USE int decrypt_packets(unsigned char **cluster); ---
7461+
7462+PARAMETERS
7463+ cluster points to an array of pointers, representing zero or more
7464+ ranges. Every range has a start and end pointer; a start pointer==NULL
7465+ terminates the array.
7466+ So, an array of pointers has this content:
7467+ start_of_buffer_1, end_of_buffer_1, ... start_of_buffer_N,
7468+ end_of_buffer_N, NULL
7469+ example:
7470+ 0x12340000, 0x123400bc, 0x56780a00, 0x5678b78, NULL
7471+ has two ranges (0x12340000 - 0x123400bc and 0x56780a00 - 0x5678b78),
7472+ for a total of three packets (starting at 0x12340000, 0x56780a00,
7473+ 0x5678abc)
7474+RETURNS
7475+ How many packets can now be consumed by the caller, this is always >=
7476+ 1, unless the cluster contained zero packets (in that case it's
7477+ obviously zero).
7478+MODIFIES
7479+ The cluster is modified to try to exclude packets which shouldn't be
7480+ submitted again for decryption (because just decrypted or originally
7481+ not crypted). "Try to exclude" because the returned array will never
7482+ be bigger than what was passed, so if you passed only a range and some
7483+ packets in the middle were decrypted making "holes" into the range,
7484+ the range would have to be split into several ranges, and that will
7485+ not be done. If you want a strict description of what has to be passed
7486+ again to decrypt_packets, you have to use ranges with only one packet
7487+ inside. Note that the first packet will certainly be eliminated from
7488+ the returned cluster (see also RETURNS).
7489+
7490+You can now read the detailed description of operation or just skip to
7491+the API examples.
7492+
7493+
7494+---------------------------------
7495+DETAILED DESCRIPTION OF OPERATION
7496+---------------------------------
7497+ consider a sequence of packets like this:
7498+ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ...
7499+ E E E E E E E E E E E O E O E O O 0 0 0 0 0 0 0 0 c O O O O O O O O O O O ...
7500+ where
7501+ E = encrypted_even,
7502+ O = encrypted_odd,
7503+ e = clear_was_encrypted_even,
7504+ o = clear_was_encrypted_odd,
7505+ c = clear
7506+ and suppose the suggested cluster size is 10 (this could be for a function with internal parallelism 8)
7507+
7508+ 1) we define the cluster to include packets 0-9 and
7509+ call decrypt_packets
7510+ a possible result is that the function call
7511+ - returns 8 (8 packets available)
7512+ - the buffer contains now this
7513+ -----------------------------
7514+ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ...
7515+ e e e e e e e e E E E O E O E O O 0 0 0 0 0 0 0 0 c O O O O O O O O O O O ...
7516+ -----
7517+ - the modified cluster covers 8-9 [continue reading, but then see note 1 below]
7518+ so, we can use the first 8 packets of the original cluster (0-7)
7519+
7520+ 2) now, we define cluster over 8-17 and call decrypt_packets
7521+ a possible result is:
7522+ - returns 3 (3 packets available)
7523+ - the buffer contains now this (!!!)
7524+ -----------------------------
7525+ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ...
7526+ e e e e e e e e e e e O e O e O O 0 0 0 0 0 0 0 0 c O O O O O O O O O O O ...
7527+ -- -- --------
7528+ - the modified cluster covers 11-11,13-13,15-17 [continue reading, but then see note 1 below]
7529+ so, we can use the first 3 packets of the original cluster (8-10)
7530+
7531+ 3) now, we define cluster over 11-20 and call decrypt packets (defining a cluster 11-11,13-13,15-22 would be better)
7532+ a possible result is:
7533+ - returns 10 (10 packets available)
7534+ - the buffer contains now this
7535+ -----------------------------
7536+ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ...
7537+ e e e e e e e e e e e o e o e o o o o o o 0 0 0 0 c O O O O O O O O O O O ...
7538+
7539+ - the modified cluster is empty
7540+ so, we can use the first 10 packets of the original cluster (11-20)
7541+ What it happened is that the second call decrypted packets 12 and 14 but they were
7542+ not made available because packet 11 was still encrypted,
7543+ the third call decrypted 11,13,15-20 and included 12 and 14 as available too.
7544+
7545+ 4) now, we define cluster over 21-30 and call decrypt packets
7546+ a possible result is:
7547+ - returns 9 (9 packets available)
7548+ - the buffer contains now this
7549+ -----------------------------
7550+ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ...
7551+ e e e e e e e e e e e o e o e o o o o o o o o o o c o o o o O O O O O O O ...
7552+ --
7553+ - the modified cluster covers 30-30
7554+ so, we can use the first 9 packets of the original cluster (21-29)
7555+ What happened is that packet 25 could be skipped because it is in clear.
7556+
7557+ Note that the suggested cluster size (10) is higher than the maximum number
7558+ of packets that can be really decrypted (8), but we are able to skip 12 and 14
7559+ in step 3) and run the decryption on a full 8 packets group.
7560+ In the same way, we were able to skip 25 in step 4).
7561+ There are three kinds of "free" packets we can skip:
7562+ - packets decrypted in a previous call (as 12 and 14)
7563+ - packets already in clear (as 25)
7564+ - packets with a payload of less than 8 bytes (clear==encrypted!)
7565+
7566+ Note also that we could have defined a better cluster in step 3
7567+ (11-11,13-13,15-22), using what step 2 had returned. The risk of not
7568+ having 8 packets to decrypt would have been smaller (consider the case
7569+ where 19 and 20 were "c").
7570+
7571+ Final considerations:
7572+ - you can use a bigger or smaller cluster than the suggested number of packets
7573+ - every call to decrypt_packets has a *fixed* CPU cost, so you should try to
7574+ not run it with a few packets, when possible
7575+ - decrypt_packets can't decrypt even and odd at the same time; it guarantees
7576+ that the first packet will be decrypted and tries to decrypt as many packets
7577+ as possible
7578+ - clear packets in the middle of encrypted packets don't happen in real world,
7579+ but E,E,E,O,E,O,O,O sequences do happen (audio/video muxing problems?) and
7580+ small packets (<8 bytes) happen frequently; the ability to skip is useful.
7581+
7582+ note 1:
7583+ As the returned cluster will not have more ranges than the passed one, what it is
7584+ described above is not actually true.
7585+ In the step 1) the returned cluster will cover 8-9, but in step 2) it will
7586+ cover 11-17 (some extra packets had to remain in); this lack of information
7587+ prevents us from using an optimal 11-11,13-13,15-22 in step 3). Note that
7588+ in any case step 3) will decrypt 11,13,15,16,17,18,19,20 thanks to the
7589+ extra margin we use (we put ten packets (including 19 and 20) even if the
7590+ parallelism was just 8, and it was a good idea; but if 19 and 20 were of
7591+ type c, we would have run the decryption with only 6/8 efficiency).
7592+ This problem can be prevented by using ranges with only one packet: in
7593+ step 2) we would have passed
7594+ 8-8,9-9,10-10,11-11,12-12,13-13,14-14,15-15,16-16,17-17
7595+ and got back
7596+ 11-11,13-13,15-17.
7597+
7598+
7599+------------
7600+API EXAMPLES
7601+------------
7602+
7603+Some examples of how the API can be used (this is not real code, so it
7604+may have typos or other bugs).
7605+
7606+
7607+Example 1: (big linear buffer, simple use of cluster)
7608+
7609+ unsigned char *p;
7610+ unsigned char *cluster[3];
7611+ for(p=start;p<end;){
7612+ cluster[0]=p;cluster[1]=end;
7613+ cluster[2]=NULL;
7614+ p+=188*decrypt_packets(cluster);
7615+ }
7616+ //consume(start,end);
7617+
7618+
7619+Example 2: (circular buffer, simple use of cluster)
7620+
7621+ unsigned char *p;
7622+ unsigned char *cluster[5];
7623+
7624+ while(1){
7625+ if(read==write){
7626+ //buffer is empty
7627+ //write=refill_buffer(write,start,end);
7628+ continue;
7629+ }
7630+ else if(read<write){
7631+ cluster[0]=read;cluster[1]=write;
7632+ cluster[2]=NULL;
7633+ }
7634+ else{
7635+ cluster[0]=read;cluster[1]=end;
7636+ cluster[2]=start;cluster[3]=write;
7637+ cluster[4]=NULL;
7638+ }
7639+ new_read=read+188*decrypt_packets(cluster);
7640+ if(new_read<=end){
7641+ //consume(read,new_read);
7642+ }
7643+ else{
7644+ new_read=start+(new_read-end);
7645+ //consume(read,end);
7646+ //consume(start,new_read);
7647+ }
7648+ read=new_read;
7649+ if(read==end) read=start;
7650+ }
7651+
7652+
7653+Example 3: (undefined buffer structure, advanced use of cluster)
7654+
7655+ unsigned char *packets[1000000];
7656+ unsigned char *cluster[142]; //if suggested packets is 70
7657+
7658+ cluster[0]=NULL;
7659+ for(n=0;n<1000000;){
7660+ i=0;
7661+ while(cluster[2*i]!=NULL) i++; //preserve returned ranges
7662+ for(k=i;k<70&&n<1000000;k++,n++){
7663+ cluster[2*k]=packets[n];cluster[2*k+1]=packets[n]+188;
7664+ }
7665+ cluster[2*k]=NULL;
7666+ decrypt_packets(cluster);
7667+ }
7668+ //consume_all_packets();
7669Index: libs/libmythtv/FFdecsa/parallel_032_4char.h
7670===================================================================
7671--- /dev/null 1970-01-01 00:00:00.000000000 +0000
7672+++ libs/libmythtv/FFdecsa/parallel_032_4char.h 2006-06-20 17:36:06.000000000 -0400
7673@@ -0,0 +1,206 @@
7674+/* FFdecsa -- fast decsa algorithm
7675+ *
7676+ * Copyright (C) 2003-2004 fatih89r
7677+ *
7678+ * This program is free software; you can redistribute it and/or modify
7679+ * it under the terms of the GNU General Public License as published by
7680+ * the Free Software Foundation; either version 2 of the License, or
7681+ * (at your option) any later version.
7682+ *
7683+ * This program is distributed in the hope that it will be useful,
7684+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7685+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7686+ * GNU General Public License for more details.
7687+ *
7688+ * You should have received a copy of the GNU General Public License
7689+ * along with this program; if not, write to the Free Software
7690+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
7691+ */
7692+
7693+
7694+struct group_t{
7695+ unsigned char s1,s2,s3,s4;
7696+};
7697+typedef struct group_t group;
7698+
7699+#define GROUP_PARALLELISM 32
7700+
7701+group static inline FF0(){
7702+ group res;
7703+ res.s1=0x0;
7704+ res.s2=0x0;
7705+ res.s3=0x0;
7706+ res.s4=0x0;
7707+ return res;
7708+}
7709+
7710+group static inline FF1(){
7711+ group res;
7712+ res.s1=0xff;
7713+ res.s2=0xff;
7714+ res.s3=0xff;
7715+ res.s4=0xff;
7716+ return res;
7717+}
7718+
7719+group static inline FFAND(group a,group b){
7720+ group res;
7721+ res.s1=a.s1&b.s1;
7722+ res.s2=a.s2&b.s2;
7723+ res.s3=a.s3&b.s3;
7724+ res.s4=a.s4&b.s4;
7725+ return res;
7726+}
7727+
7728+group static inline FFOR(group a,group b){
7729+ group res;
7730+ res.s1=a.s1|b.s1;
7731+ res.s2=a.s2|b.s2;
7732+ res.s3=a.s3|b.s3;
7733+ res.s4=a.s4|b.s4;
7734+ return res;
7735+}
7736+
7737+group static inline FFXOR(group a,group b){
7738+ group res;
7739+ res.s1=a.s1^b.s1;
7740+ res.s2=a.s2^b.s2;
7741+ res.s3=a.s3^b.s3;
7742+ res.s4=a.s4^b.s4;
7743+ return res;
7744+}
7745+
7746+group static inline FFNOT(group a){
7747+ group res;
7748+ res.s1=~a.s1;
7749+ res.s2=~a.s2;
7750+ res.s3=~a.s3;
7751+ res.s4=~a.s4;
7752+ return res;
7753+}
7754+
7755+
7756+/* 64 rows of 32 bits */
7757+
7758+void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
7759+ *(((int *)tab)+g)=*((int *)data);
7760+ *(((int *)tab)+32+g)=*(((int *)data)+1);
7761+}
7762+
7763+void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
7764+ *((int *)data)=*(((int *)tab)+g);
7765+ *(((int *)data)+1)=*(((int *)tab)+32+g);
7766+}
7767+
7768+void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
7769+ int j;
7770+ for(j=0;j<n;j++){
7771+ *(data+j)^=*(tab+4*(g+(j>=4?32-1:0))+j);
7772+ }
7773+}
7774+
7775+struct batch_t{
7776+ unsigned char s1,s2,s3,s4;
7777+};
7778+typedef struct batch_t batch;
7779+
7780+#define BYTES_PER_BATCH 4
7781+
7782+batch static inline B_FFAND(batch a,batch b){
7783+ batch res;
7784+ res.s1=a.s1&b.s1;
7785+ res.s2=a.s2&b.s2;
7786+ res.s3=a.s3&b.s3;
7787+ res.s4=a.s4&b.s4;
7788+ return res;
7789+}
7790+
7791+batch static inline B_FFOR(batch a,batch b){
7792+ batch res;
7793+ res.s1=a.s1|b.s1;
7794+ res.s2=a.s2|b.s2;
7795+ res.s3=a.s3|b.s3;
7796+ res.s4=a.s4|b.s4;
7797+ return res;
7798+}
7799+
7800+batch static inline B_FFXOR(batch a,batch b){
7801+ batch res;
7802+ res.s1=a.s1^b.s1;
7803+ res.s2=a.s2^b.s2;
7804+ res.s3=a.s3^b.s3;
7805+ res.s4=a.s4^b.s4;
7806+ return res;
7807+}
7808+
7809+
7810+batch static inline B_FFN_ALL_29(){
7811+ batch res;
7812+ res.s1=0x29;
7813+ res.s2=0x29;
7814+ res.s3=0x29;
7815+ res.s4=0x29;
7816+ return res;
7817+}
7818+batch static inline B_FFN_ALL_02(){
7819+ batch res;
7820+ res.s1=0x02;
7821+ res.s2=0x02;
7822+ res.s3=0x02;
7823+ res.s4=0x02;
7824+ return res;
7825+}
7826+batch static inline B_FFN_ALL_04(){
7827+ batch res;
7828+ res.s1=0x04;
7829+ res.s2=0x04;
7830+ res.s3=0x04;
7831+ res.s4=0x04;
7832+ return res;
7833+}
7834+batch static inline B_FFN_ALL_10(){
7835+ batch res;
7836+ res.s1=0x10;
7837+ res.s2=0x10;
7838+ res.s3=0x10;
7839+ res.s4=0x10;
7840+ return res;
7841+}
7842+batch static inline B_FFN_ALL_40(){
7843+ batch res;
7844+ res.s1=0x40;
7845+ res.s2=0x40;
7846+ res.s3=0x40;
7847+ res.s4=0x40;
7848+ return res;
7849+}
7850+batch static inline B_FFN_ALL_80(){
7851+ batch res;
7852+ res.s1=0x80;
7853+ res.s2=0x80;
7854+ res.s3=0x80;
7855+ res.s4=0x80;
7856+ return res;
7857+}
7858+
7859+batch static inline B_FFSH8L(batch a,int n){
7860+ batch res;
7861+ res.s1=a.s1<<n;
7862+ res.s2=a.s2<<n;
7863+ res.s3=a.s3<<n;
7864+ res.s4=a.s4<<n;
7865+ return res;
7866+}
7867+
7868+batch static inline B_FFSH8R(batch a,int n){
7869+ batch res;
7870+ res.s1=a.s1>>n;
7871+ res.s2=a.s2>>n;
7872+ res.s3=a.s3>>n;
7873+ res.s4=a.s4>>n;
7874+ return res;
7875+}
7876+
7877+
7878+void static inline M_EMPTY(void){
7879+}
7880Index: libs/libmythtv/FFdecsa/stream.c
7881===================================================================
7882--- /dev/null 1970-01-01 00:00:00.000000000 +0000
7883+++ libs/libmythtv/FFdecsa/stream.c 2006-06-20 17:36:06.000000000 -0400
7884@@ -0,0 +1,906 @@
7885+/* FFdecsa -- fast decsa algorithm
7886+ *
7887+ * Copyright (C) 2003-2004 fatih89r
7888+ *
7889+ * This program is free software; you can redistribute it and/or modify
7890+ * it under the terms of the GNU General Public License as published by
7891+ * the Free Software Foundation; either version 2 of the License, or
7892+ * (at your option) any later version.
7893+ *
7894+ * This program is distributed in the hope that it will be useful,
7895+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7896+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7897+ * GNU General Public License for more details.
7898+ *
7899+ * You should have received a copy of the GNU General Public License
7900+ * along with this program; if not, write to the Free Software
7901+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
7902+ */
7903+
7904+
7905+
7906+// define statics only once, when STREAM_INIT
7907+#ifdef STREAM_INIT
7908+struct stream_regs {
7909+ group A[32+10][4]; // 32 because we will move back (virtual shift register)
7910+ group B[32+10][4]; // 32 because we will move back (virtual shift register)
7911+ group X[4];
7912+ group Y[4];
7913+ group Z[4];
7914+ group D[4];
7915+ group E[4];
7916+ group F[4];
7917+ group p;
7918+ group q;
7919+ group r;
7920+ };
7921+
7922+static inline void trasp64_32_88ccw(unsigned char *data){
7923+/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
7924+#define row ((unsigned int *)data)
7925+ int i,j;
7926+ for(j=0;j<64;j+=32){
7927+ unsigned int t,b;
7928+ for(i=0;i<16;i++){
7929+ t=row[j+i];
7930+ b=row[j+16+i];
7931+ row[j+i] = (t&0x0000ffff) | ((b )<<16);
7932+ row[j+16+i]=((t )>>16) | (b&0xffff0000) ;
7933+ }
7934+ }
7935+ for(j=0;j<64;j+=16){
7936+ unsigned int t,b;
7937+ for(i=0;i<8;i++){
7938+ t=row[j+i];
7939+ b=row[j+8+i];
7940+ row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
7941+ row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00);
7942+ }
7943+ }
7944+ for(j=0;j<64;j+=8){
7945+ unsigned int t,b;
7946+ for(i=0;i<4;i++){
7947+ t=row[j+i];
7948+ b=row[j+4+i];
7949+ row[j+i] =((t&0x0f0f0f0f)<<4) | (b&0x0f0f0f0f);
7950+ row[j+4+i] = (t&0xf0f0f0f0) | ((b&0xf0f0f0f0)>>4);
7951+ }
7952+ }
7953+ for(j=0;j<64;j+=4){
7954+ unsigned int t,b;
7955+ for(i=0;i<2;i++){
7956+ t=row[j+i];
7957+ b=row[j+2+i];
7958+ row[j+i] =((t&0x33333333)<<2) | (b&0x33333333);
7959+ row[j+2+i] = (t&0xcccccccc) | ((b&0xcccccccc)>>2);
7960+ }
7961+ }
7962+ for(j=0;j<64;j+=2){
7963+ unsigned int t,b;
7964+ for(i=0;i<1;i++){
7965+ t=row[j+i];
7966+ b=row[j+1+i];
7967+ row[j+i] =((t&0x55555555)<<1) | (b&0x55555555);
7968+ row[j+1+i] = (t&0xaaaaaaaa) | ((b&0xaaaaaaaa)>>1);
7969+ }
7970+ }
7971+#undef row
7972+}
7973+
7974+static inline void trasp64_32_88cw(unsigned char *data){
7975+/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
7976+#define row ((unsigned int *)data)
7977+ int i,j;
7978+ for(j=0;j<64;j+=32){
7979+ unsigned int t,b;
7980+ for(i=0;i<16;i++){
7981+ t=row[j+i];
7982+ b=row[j+16+i];
7983+ row[j+i] = (t&0x0000ffff) | ((b )<<16);
7984+ row[j+16+i]=((t )>>16) | (b&0xffff0000) ;
7985+ }
7986+ }
7987+ for(j=0;j<64;j+=16){
7988+ unsigned int t,b;
7989+ for(i=0;i<8;i++){
7990+ t=row[j+i];
7991+ b=row[j+8+i];
7992+ row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
7993+ row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00);
7994+ }
7995+ }
7996+ for(j=0;j<64;j+=8){
7997+ unsigned int t,b;
7998+ for(i=0;i<4;i++){
7999+ t=row[j+i];
8000+ b=row[j+4+i];
8001+ row[j+i] =((t&0xf0f0f0f0)>>4) | (b&0xf0f0f0f0);
8002+ row[j+4+i]= (t&0x0f0f0f0f) | ((b&0x0f0f0f0f)<<4);
8003+ }
8004+ }
8005+ for(j=0;j<64;j+=4){
8006+ unsigned int t,b;
8007+ for(i=0;i<2;i++){
8008+ t=row[j+i];
8009+ b=row[j+2+i];
8010+ row[j+i] =((t&0xcccccccc)>>2) | (b&0xcccccccc);
8011+ row[j+2+i]= (t&0x33333333) | ((b&0x33333333)<<2);
8012+ }
8013+ }
8014+ for(j=0;j<64;j+=2){
8015+ unsigned int t,b;
8016+ for(i=0;i<1;i++){
8017+ t=row[j+i];
8018+ b=row[j+1+i];
8019+ row[j+i] =((t&0xaaaaaaaa)>>1) | (b&0xaaaaaaaa);
8020+ row[j+1+i]= (t&0x55555555) | ((b&0x55555555)<<1);
8021+ }
8022+ }
8023+#undef row
8024+}
8025+
8026+//64-64----------------------------------------------------------
8027+static inline void trasp64_64_88ccw(unsigned char *data){
8028+/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
8029+#define row ((unsigned long long int *)data)
8030+ int i,j;
8031+ for(j=0;j<64;j+=64){
8032+ unsigned long long int t,b;
8033+ for(i=0;i<32;i++){
8034+ t=row[j+i];
8035+ b=row[j+32+i];
8036+ row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32);
8037+ row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ;
8038+ }
8039+ }
8040+ for(j=0;j<64;j+=32){
8041+ unsigned long long int t,b;
8042+ for(i=0;i<16;i++){
8043+ t=row[j+i];
8044+ b=row[j+16+i];
8045+ row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
8046+ row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
8047+ }
8048+ }
8049+ for(j=0;j<64;j+=16){
8050+ unsigned long long int t,b;
8051+ for(i=0;i<8;i++){
8052+ t=row[j+i];
8053+ b=row[j+8+i];
8054+ row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
8055+ row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
8056+ }
8057+ }
8058+ for(j=0;j<64;j+=8){
8059+ unsigned long long int t,b;
8060+ for(i=0;i<4;i++){
8061+ t=row[j+i];
8062+ b=row[j+4+i];
8063+ row[j+i] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
8064+ row[j+4+i] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
8065+ }
8066+ }
8067+ for(j=0;j<64;j+=4){
8068+ unsigned long long int t,b;
8069+ for(i=0;i<2;i++){
8070+ t=row[j+i];
8071+ b=row[j+2+i];
8072+ row[j+i] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
8073+ row[j+2+i] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
8074+ }
8075+ }
8076+ for(j=0;j<64;j+=2){
8077+ unsigned long long int t,b;
8078+ for(i=0;i<1;i++){
8079+ t=row[j+i];
8080+ b=row[j+1+i];
8081+ row[j+i] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
8082+ row[j+1+i] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
8083+ }
8084+ }
8085+#undef row
8086+}
8087+
8088+static inline void trasp64_64_88cw(unsigned char *data){
8089+/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
8090+#define row ((unsigned long long int *)data)
8091+ int i,j;
8092+ for(j=0;j<64;j+=64){
8093+ unsigned long long int t,b;
8094+ for(i=0;i<32;i++){
8095+ t=row[j+i];
8096+ b=row[j+32+i];
8097+ row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32);
8098+ row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ;
8099+ }
8100+ }
8101+ for(j=0;j<64;j+=32){
8102+ unsigned long long int t,b;
8103+ for(i=0;i<16;i++){
8104+ t=row[j+i];
8105+ b=row[j+16+i];
8106+ row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
8107+ row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
8108+ }
8109+ }
8110+ for(j=0;j<64;j+=16){
8111+ unsigned long long int t,b;
8112+ for(i=0;i<8;i++){
8113+ t=row[j+i];
8114+ b=row[j+8+i];
8115+ row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
8116+ row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
8117+ }
8118+ }
8119+ for(j=0;j<64;j+=8){
8120+ unsigned long long int t,b;
8121+ for(i=0;i<4;i++){
8122+ t=row[j+i];
8123+ b=row[j+4+i];
8124+ row[j+i] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
8125+ row[j+4+i] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
8126+ }
8127+ }
8128+ for(j=0;j<64;j+=4){
8129+ unsigned long long int t,b;
8130+ for(i=0;i<2;i++){
8131+ t=row[j+i];
8132+ b=row[j+2+i];
8133+ row[j+i] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
8134+ row[j+2+i] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
8135+ }
8136+ }
8137+ for(j=0;j<64;j+=2){
8138+ unsigned long long int t,b;
8139+ for(i=0;i<1;i++){
8140+ t=row[j+i];
8141+ b=row[j+1+i];
8142+ row[j+i] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
8143+ row[j+1+i] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
8144+ }
8145+ }
8146+#undef row
8147+}
8148+
8149+//64-128----------------------------------------------------------
8150+static inline void trasp64_128_88ccw(unsigned char *data){
8151+/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
8152+#define halfrow ((unsigned long long int *)data)
8153+ int i,j;
8154+ for(j=0;j<64;j+=64){
8155+ unsigned long long int t,b;
8156+ for(i=0;i<32;i++){
8157+ t=halfrow[2*(j+i)];
8158+ b=halfrow[2*(j+32+i)];
8159+ halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32);
8160+ halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ;
8161+ t=halfrow[2*(j+i)+1];
8162+ b=halfrow[2*(j+32+i)+1];
8163+ halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32);
8164+ halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ;
8165+ }
8166+ }
8167+ for(j=0;j<64;j+=32){
8168+ unsigned long long int t,b;
8169+ for(i=0;i<16;i++){
8170+ t=halfrow[2*(j+i)];
8171+ b=halfrow[2*(j+16+i)];
8172+ halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
8173+ halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
8174+ t=halfrow[2*(j+i)+1];
8175+ b=halfrow[2*(j+16+i)+1];
8176+ halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
8177+ halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
8178+ }
8179+ }
8180+ for(j=0;j<64;j+=16){
8181+ unsigned long long int t,b;
8182+ for(i=0;i<8;i++){
8183+ t=halfrow[2*(j+i)];
8184+ b=halfrow[2*(j+8+i)];
8185+ halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
8186+ halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
8187+ t=halfrow[2*(j+i)+1];
8188+ b=halfrow[2*(j+8+i)+1];
8189+ halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
8190+ halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
8191+ }
8192+ }
8193+ for(j=0;j<64;j+=8){
8194+ unsigned long long int t,b;
8195+ for(i=0;i<4;i++){
8196+ t=halfrow[2*(j+i)];
8197+ b=halfrow[2*(j+4+i)];
8198+ halfrow[2*(j+i)] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
8199+ halfrow[2*(j+4+i)] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
8200+ t=halfrow[2*(j+i)+1];
8201+ b=halfrow[2*(j+4+i)+1];
8202+ halfrow[2*(j+i)+1] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
8203+ halfrow[2*(j+4+i)+1] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
8204+ }
8205+ }
8206+ for(j=0;j<64;j+=4){
8207+ unsigned long long int t,b;
8208+ for(i=0;i<2;i++){
8209+ t=halfrow[2*(j+i)];
8210+ b=halfrow[2*(j+2+i)];
8211+ halfrow[2*(j+i)] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
8212+ halfrow[2*(j+2+i)] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
8213+ t=halfrow[2*(j+i)+1];
8214+ b=halfrow[2*(j+2+i)+1];
8215+ halfrow[2*(j+i)+1] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
8216+ halfrow[2*(j+2+i)+1] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
8217+ }
8218+ }
8219+ for(j=0;j<64;j+=2){
8220+ unsigned long long int t,b;
8221+ for(i=0;i<1;i++){
8222+ t=halfrow[2*(j+i)];
8223+ b=halfrow[2*(j+1+i)];
8224+ halfrow[2*(j+i)] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
8225+ halfrow[2*(j+1+i)] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
8226+ t=halfrow[2*(j+i)+1];
8227+ b=halfrow[2*(j+1+i)+1];
8228+ halfrow[2*(j+i)+1] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
8229+ halfrow[2*(j+1+i)+1] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
8230+ }
8231+ }
8232+#undef halfrow
8233+}
8234+
8235+static inline void trasp64_128_88cw(unsigned char *data){
8236+/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
8237+#define halfrow ((unsigned long long int *)data)
8238+ int i,j;
8239+ for(j=0;j<64;j+=64){
8240+ unsigned long long int t,b;
8241+ for(i=0;i<32;i++){
8242+ t=halfrow[2*(j+i)];
8243+ b=halfrow[2*(j+32+i)];
8244+ halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32);
8245+ halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ;
8246+ t=halfrow[2*(j+i)+1];
8247+ b=halfrow[2*(j+32+i)+1];
8248+ halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32);
8249+ halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ;
8250+ }
8251+ }
8252+ for(j=0;j<64;j+=32){
8253+ unsigned long long int t,b;
8254+ for(i=0;i<16;i++){
8255+ t=halfrow[2*(j+i)];
8256+ b=halfrow[2*(j+16+i)];
8257+ halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
8258+ halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
8259+ t=halfrow[2*(j+i)+1];
8260+ b=halfrow[2*(j+16+i)+1];
8261+ halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
8262+ halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
8263+ }
8264+ }
8265+ for(j=0;j<64;j+=16){
8266+ unsigned long long int t,b;
8267+ for(i=0;i<8;i++){
8268+ t=halfrow[2*(j+i)];
8269+ b=halfrow[2*(j+8+i)];
8270+ halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
8271+ halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
8272+ t=halfrow[2*(j+i)+1];
8273+ b=halfrow[2*(j+8+i)+1];
8274+ halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
8275+ halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
8276+ }
8277+ }
8278+ for(j=0;j<64;j+=8){
8279+ unsigned long long int t,b;
8280+ for(i=0;i<4;i++){
8281+ t=halfrow[2*(j+i)];
8282+ b=halfrow[2*(j+4+i)];
8283+ halfrow[2*(j+i)] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
8284+ halfrow[2*(j+4+i)] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
8285+ t=halfrow[2*(j+i)+1];
8286+ b=halfrow[2*(j+4+i)+1];
8287+ halfrow[2*(j+i)+1] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
8288+ halfrow[2*(j+4+i)+1] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
8289+ }
8290+ }
8291+ for(j=0;j<64;j+=4){
8292+ unsigned long long int t,b;
8293+ for(i=0;i<2;i++){
8294+ t=halfrow[2*(j+i)];
8295+ b=halfrow[2*(j+2+i)];
8296+ halfrow[2*(j+i)] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
8297+ halfrow[2*(j+2+i)] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
8298+ t=halfrow[2*(j+i)+1];
8299+ b=halfrow[2*(j+2+i)+1];
8300+ halfrow[2*(j+i)+1] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
8301+ halfrow[2*(j+2+i)+1] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
8302+ }
8303+ }
8304+ for(j=0;j<64;j+=2){
8305+ unsigned long long int t,b;
8306+ for(i=0;i<1;i++){
8307+ t=halfrow[2*(j+i)];
8308+ b=halfrow[2*(j+1+i)];
8309+ halfrow[2*(j+i)] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
8310+ halfrow[2*(j+1+i)] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
8311+ t=halfrow[2*(j+i)+1];
8312+ b=halfrow[2*(j+1+i)+1];
8313+ halfrow[2*(j+i)+1] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
8314+ halfrow[2*(j+1+i)+1] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
8315+ }
8316+ }
8317+#undef halfrow
8318+}
8319+#endif
8320+
8321+
8322+#ifdef STREAM_INIT
8323+void stream_cypher_group_init(
8324+ struct stream_regs *regs,
8325+ group iA[8][4], // [In] iA00,iA01,...iA73 32 groups | Derived from key.
8326+ group iB[8][4], // [In] iB00,iB01,...iB73 32 groups | Derived from key.
8327+ unsigned char *sb) // [In] (SB0,SB1,...SB7)...x32 32*8 bytes | Extra input.
8328+#endif
8329+#ifdef STREAM_NORMAL
8330+void stream_cypher_group_normal(
8331+ struct stream_regs *regs,
8332+ unsigned char *cb) // [Out] (CB0,CB1,...CB7)...x32 32*8 bytes | Output.
8333+#endif
8334+{
8335+#ifdef STREAM_INIT
8336+ group in1[4];
8337+ group in2[4];
8338+#endif
8339+ group extra_B[4];
8340+ group fa,fb,fc,fd,fe;
8341+ group s1a,s1b,s2a,s2b,s3a,s3b,s4a,s4b,s5a,s5b,s6a,s6b,s7a,s7b;
8342+ group next_E[4];
8343+ group tmp0,tmp1,tmp2,tmp3,tmp4;
8344+#ifdef STREAM_INIT
8345+ group *sb_g=(group *)sb;
8346+#endif
8347+#ifdef STREAM_NORMAL
8348+ group *cb_g=(group *)cb;
8349+#endif
8350+ int aboff;
8351+ int i,j,k,b;
8352+ int dbg;
8353+
8354+#ifdef STREAM_INIT
8355+ DBG(fprintf(stderr,":::::::::: BEGIN STREAM INIT\n"));
8356+#endif
8357+#ifdef STREAM_NORMAL
8358+ DBG(fprintf(stderr,":::::::::: BEGIN STREAM NORMAL\n"));
8359+#endif
8360+#ifdef STREAM_INIT
8361+for(j=0;j<64;j++){
8362+ DBG(fprintf(stderr,"precall prerot stream_in[%2i]=",j));
8363+ DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG));
8364+}
8365+
8366+DBG(dump_mem("stream_prerot ",sb,GROUP_PARALLELISM*8,BYPG));
8367+#if GROUP_PARALLELISM==32
8368+trasp64_32_88ccw(sb);
8369+#endif
8370+#if GROUP_PARALLELISM==64
8371+trasp64_64_88ccw(sb);
8372+#endif
8373+#if GROUP_PARALLELISM==128
8374+trasp64_128_88ccw(sb);
8375+#endif
8376+DBG(dump_mem("stream_postrot",sb,GROUP_PARALLELISM*8,BYPG));
8377+
8378+for(j=0;j<64;j++){
8379+ DBG(fprintf(stderr,"precall stream_in[%2i]=",j));
8380+ DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG));
8381+}
8382+#endif
8383+
8384+ aboff=32;
8385+
8386+#ifdef STREAM_INIT
8387+ // load first 32 bits of ck into A[aboff+0]..A[aboff+7]
8388+ // load last 32 bits of ck into B[aboff+0]..B[aboff+7]
8389+ // all other regs = 0
8390+ for(i=0;i<8;i++){
8391+ for(b=0;b<4;b++){
8392+DBG(fprintf(stderr,"dbg from iA A[%i][%i]=",i,b));
8393+DBG(dump_mem("",(unsigned char *)&iA[i][b],BYPG,BYPG));
8394+DBG(fprintf(stderr," dbg from iB B[%i][%i]=",i,b));
8395+DBG(dump_mem("",(unsigned char *)&iB[i][b],BYPG,BYPG));
8396+ regs->A[aboff+i][b]=iA[i][b];
8397+ regs->B[aboff+i][b]=iB[i][b];
8398+ }
8399+ }
8400+ for(b=0;b<4;b++){
8401+ regs->A[aboff+8][b]=FF0();
8402+ regs->A[aboff+9][b]=FF0();
8403+ regs->B[aboff+8][b]=FF0();
8404+ regs->B[aboff+9][b]=FF0();
8405+ }
8406+ for(b=0;b<4;b++){
8407+ regs->X[b]=FF0();
8408+ regs->Y[b]=FF0();
8409+ regs->Z[b]=FF0();
8410+ regs->D[b]=FF0();
8411+ regs->E[b]=FF0();
8412+ regs->F[b]=FF0();
8413+ }
8414+ regs->p=FF0();
8415+ regs->q=FF0();
8416+ regs->r=FF0();
8417+#endif
8418+
8419+for(dbg=0;dbg<4;dbg++){
8420+ DBG(fprintf(stderr,"dbg A0[%i]=",dbg));
8421+ DBG(dump_mem("",(unsigned char *)&regs->A[aboff+0][dbg],BYPG,BYPG));
8422+ DBG(fprintf(stderr,"dbg B0[%i]=",dbg));
8423+ DBG(dump_mem("",(unsigned char *)&regs->B[aboff+0][dbg],BYPG,BYPG));
8424+}
8425+
8426+////////////////////////////////////////////////////////////////////////////////
8427+
8428+ // EXTERNAL LOOP - 8 bytes per operation
8429+ for(i=0;i<8;i++){
8430+
8431+ DBG(fprintf(stderr,"--BEGIN EXTERNAL LOOP %i\n",i));
8432+
8433+#ifdef STREAM_INIT
8434+ for(b=0;b<4;b++){
8435+ in1[b]=sb_g[8*i+4+b];
8436+ in2[b]=sb_g[8*i+b];
8437+ }
8438+#endif
8439+
8440+ // INTERNAL LOOP - 2 bits per iteration
8441+ for(j=0; j<4; j++){
8442+
8443+ DBG(fprintf(stderr,"---BEGIN INTERNAL LOOP %i (EXT %i, INT %i)\n",j,i,j));
8444+
8445+ // from A0..A9, 35 bits are selected as inputs to 7 s-boxes
8446+ // 5 bits input per s-box, 2 bits output per s-box
8447+
8448+ // we can select bits with zero masking and shifting operations
8449+ // and synthetize s-boxes with optimized boolean functions.
8450+ // this is the actual reason we do all the crazy transposition
8451+ // stuff to switch between normal and bit slice representations.
8452+ // this code really flies.
8453+
8454+ fe=regs->A[aboff+3][0];fa=regs->A[aboff+0][2];fb=regs->A[aboff+5][1];fc=regs->A[aboff+6][3];fd=regs->A[aboff+8][0];
8455+/* 1000 1110 1110 0001 : lev 7: */ //tmp0=( fa^( fb^( ( ( ( fa|fb )^fc )|( fc^fd ) )^ALL_ONES ) ) );
8456+/* 1110 0010 0011 0011 : lev 6: */ //tmp1=( ( fa|fb )^( ( fc&( fa|( fb^fd ) ) )^ALL_ONES ) );
8457+/* 0011 0110 1000 1101 : lev 5: */ //tmp2=( fa^( ( fb&fd )^( ( fa&fd )|fc ) ) );
8458+/* 0101 0101 1001 0011 : lev 5: */ //tmp3=( ( fa&fc )^( fa^( ( fa&fb )|fd ) ) );
8459+/* 1000 1110 1110 0001 : lev 7: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFOR(FFXOR(FFOR(fa,fb),fc),FFXOR(fc,fd)),FF1())));
8460+/* 1110 0010 0011 0011 : lev 6: */ tmp1=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fa,FFXOR(fb,fd))),FF1()));
8461+/* 0011 0110 1000 1101 : lev 5: */ tmp2=FFXOR(fa,FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),fc)));
8462+/* 0101 0101 1001 0011 : lev 5: */ tmp3=FFXOR(FFAND(fa,fc),FFXOR(fa,FFOR(FFAND(fa,fb),fd)));
8463+ s1a=FFXOR(tmp0,FFAND(fe,tmp1));
8464+ s1b=FFXOR(tmp2,FFAND(fe,tmp3));
8465+//dump_mem("s1as1b-fe",&fe,BYPG,BYPG);
8466+//dump_mem("s1as1b-fa",&fa,BYPG,BYPG);
8467+//dump_mem("s1as1b-fb",&fb,BYPG,BYPG);
8468+//dump_mem("s1as1b-fc",&fc,BYPG,BYPG);
8469+//dump_mem("s1as1b-fd",&fd,BYPG,BYPG);
8470+
8471+ fe=regs->A[aboff+1][1];fa=regs->A[aboff+2][2];fb=regs->A[aboff+5][3];fc=regs->A[aboff+6][0];fd=regs->A[aboff+8][1];
8472+/* 1001 1110 0110 0001 : lev 6: */ //tmp0=( fa^( ( fb&( fc|fd ) )^( fc^( fd^ALL_ONES ) ) ) );
8473+/* 0000 0011 0111 1011 : lev 5: */ //tmp1=( ( fa&( fb^fd ) )|( ( fa|fb )&fc ) );
8474+/* 1100 0110 1101 0010 : lev 6: */ //tmp2=( ( fb&fd )^( ( fa&fd )|( fb^( fc^ALL_ONES ) ) ) );
8475+/* 0001 1110 1111 0101 : lev 5: */ //tmp3=( ( fa&fd )|( fa^( fb^( fc&fd ) ) ) );
8476+/* 1001 1110 0110 0001 : lev 6: */ tmp0=FFXOR(fa,FFXOR(FFAND(fb,FFOR(fc,fd)),FFXOR(fc,FFXOR(fd,FF1()))));
8477+/* 0000 0011 0111 1011 : lev 5: */ tmp1=FFOR(FFAND(fa,FFXOR(fb,fd)),FFAND(FFOR(fa,fb),fc));
8478+/* 1100 0110 1101 0010 : lev 6: */ tmp2=FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),FFXOR(fb,FFXOR(fc,FF1()))));
8479+/* 0001 1110 1111 0101 : lev 5: */ tmp3=FFOR(FFAND(fa,fd),FFXOR(fa,FFXOR(fb,FFAND(fc,fd))));
8480+ s2a=FFXOR(tmp0,FFAND(fe,tmp1));
8481+ s2b=FFXOR(tmp2,FFAND(fe,tmp3));
8482+
8483+ fe=regs->A[aboff+0][3];fa=regs->A[aboff+1][0];fb=regs->A[aboff+4][1];fc=regs->A[aboff+4][3];fd=regs->A[aboff+5][2];
8484+/* 0100 1011 1001 0110 : lev 5: */ //tmp0=( fa^( fb^( ( fc&( fa|fd ) )^fd ) ) );
8485+/* 1101 0101 1000 1100 : lev 7: */ //tmp1=( ( fa&fc )^( ( fa^fd )|( ( fb|fc )^( fd^ALL_ONES ) ) ) );
8486+/* 0010 0111 1101 1000 : lev 4: */ //tmp2=( fa^( ( ( fb^fc )&fd )^fc ) );
8487+/* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES;
8488+/* 0100 1011 1001 0110 : lev 5: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFAND(fc,FFOR(fa,fd)),fd)));
8489+/* 1101 0101 1000 1100 : lev 7: */ tmp1=FFXOR(FFAND(fa,fc),FFOR(FFXOR(fa,fd),FFXOR(FFOR(fb,fc),FFXOR(fd,FF1()))));
8490+/* 0010 0111 1101 1000 : lev 4: */ tmp2=FFXOR(fa,FFXOR(FFAND(FFXOR(fb,fc),fd),fc));
8491+/* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1();
8492+ s3a=FFXOR(tmp0,FFAND(FFNOT(fe),tmp1));
8493+ s3b=FFXOR(tmp2,FFAND(fe,tmp3));
8494+
8495+ fe=regs->A[aboff+2][3];fa=regs->A[aboff+0][1];fb=regs->A[aboff+1][3];fc=regs->A[aboff+3][2];fd=regs->A[aboff+7][0];
8496+/* 1011 0101 0100 1001 : lev 7: */ //tmp0=( fa^( ( fc&( fa^fd ) )|( fb^( fc|( fd^ALL_ONES ) ) ) ) );
8497+/* 0010 1101 0110 0110 : lev 6: */ //tmp1=( ( fa&fb )^( fb^( ( ( fa|fc )&fd )^fc ) ) );
8498+/* 0110 0111 1101 0000 : lev 7: */ //tmp2=( fa^( ( fb&fc )|( ( ( fa&( fb^fd ) )|fc )^fd ) ) );
8499+/* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES;
8500+/* 1011 0101 0100 1001 : lev 7: */ tmp0=FFXOR(fa,FFOR(FFAND(fc,FFXOR(fa,fd)),FFXOR(fb,FFOR(fc,FFXOR(fd,FF1())))));
8501+/* 0010 1101 0110 0110 : lev 6: */ tmp1=FFXOR(FFAND(fa,fb),FFXOR(fb,FFXOR(FFAND(FFOR(fa,fc),fd),fc)));
8502+/* 0110 0111 1101 0000 : lev 7: */ tmp2=FFXOR(fa,FFOR(FFAND(fb,fc),FFXOR(FFOR(FFAND(fa,FFXOR(fb,fd)),fc),fd)));
8503+/* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1();
8504+ s4a=FFXOR(tmp0,FFAND(fe,FFXOR(tmp1,tmp0)));
8505+ s4b=FFXOR(FFXOR(s4a,tmp2),FFAND(fe,tmp3));
8506+
8507+ fe=regs->A[aboff+4][2];fa=regs->A[aboff+3][3];fb=regs->A[aboff+5][0];fc=regs->A[aboff+7][1];fd=regs->A[aboff+8][2];
8508+/* 1000 1111 0011 0010 : lev 7: */ //tmp0=( ( ( fa&( fb|fc ) )^fb )|( ( ( fa^fc )|fd )^ALL_ONES ) );
8509+/* 0110 1011 0000 1011 : lev 6: */ //tmp1=( fb^( ( fc^fd )&( fc^( fb|( fa^fd ) ) ) ) );
8510+/* 0001 1010 0111 1001 : lev 6: */ //tmp2=( ( fa&fc )^( fb^( ( fb|( fa^fc ) )&fd ) ) );
8511+/* 0101 1101 1101 0101 : lev 4: */ //tmp3=( ( ( fa^fb )&( fc^ALL_ONES ) )|fd );
8512+/* 1000 1111 0011 0010 : lev 7: */ tmp0=FFOR(FFXOR(FFAND(fa,FFOR(fb,fc)),fb),FFXOR(FFOR(FFXOR(fa,fc),fd),FF1()));
8513+/* 0110 1011 0000 1011 : lev 6: */ tmp1=FFXOR(fb,FFAND(FFXOR(fc,fd),FFXOR(fc,FFOR(fb,FFXOR(fa,fd)))));
8514+/* 0001 1010 0111 1001 : lev 6: */ tmp2=FFXOR(FFAND(fa,fc),FFXOR(fb,FFAND(FFOR(fb,FFXOR(fa,fc)),fd)));
8515+/* 0101 1101 1101 0101 : lev 4: */ tmp3=FFOR(FFAND(FFXOR(fa,fb),FFXOR(fc,FF1())),fd);
8516+ s5a=FFXOR(tmp0,FFAND(fe,tmp1));
8517+ s5b=FFXOR(tmp2,FFAND(fe,tmp3));
8518+
8519+ fe=regs->A[aboff+2][1];fa=regs->A[aboff+3][1];fb=regs->A[aboff+4][0];fc=regs->A[aboff+6][2];fd=regs->A[aboff+8][3];
8520+/* 0011 0110 0010 1101 : lev 6: */ //tmp0=( ( ( fa&fc )&fd )^( ( fb&( fa|fd ) )^fc ) );
8521+/* 1110 1110 1011 1011 : lev 3: */ //tmp1=( ( ( fa^fc )&fd )^ALL_ONES );
8522+/* 0101 1000 0110 0111 : lev 6: */ //tmp2=( ( fa&( fb|fc ) )^( fb^( ( fb&fc )|fd ) ) );
8523+/* 0001 0011 0000 0001 : lev 5: */ //tmp3=( fc&( ( fa&( fb^fd ) )^( fb|fd ) ) );
8524+/* 0011 0110 0010 1101 : lev 6: */ tmp0=FFXOR(FFAND(FFAND(fa,fc),fd),FFXOR(FFAND(fb,FFOR(fa,fd)),fc));
8525+/* 1110 1110 1011 1011 : lev 3: */ tmp1=FFXOR(FFAND(FFXOR(fa,fc),fd),FF1());
8526+/* 0101 1000 0110 0111 : lev 6: */ tmp2=FFXOR(FFAND(fa,FFOR(fb,fc)),FFXOR(fb,FFOR(FFAND(fb,fc),fd)));
8527+/* 0001 0011 0000 0001 : lev 5: */ tmp3=FFAND(fc,FFXOR(FFAND(fa,FFXOR(fb,fd)),FFOR(fb,fd)));
8528+ s6a=FFXOR(tmp0,FFAND(fe,tmp1));
8529+ s6b=FFXOR(tmp2,FFAND(fe,tmp3));
8530+
8531+ fe=regs->A[aboff+1][2];fa=regs->A[aboff+2][0];fb=regs->A[aboff+6][1];fc=regs->A[aboff+7][2];fd=regs->A[aboff+7][3];
8532+/* 0111 1000 1001 0110 : lev 5: */ //tmp0=( fb^( ( fc&fd )|( fa^( fc^fd ) ) ) );
8533+/* 0100 1001 0101 1011 : lev 6: */ //tmp1=( ( fb|fd )&( ( fa&fc )|( fb^( fc^fd ) ) ) );
8534+/* 0100 1001 1011 1001 : lev 5: */ //tmp2=( ( fa|fb )^( ( fc&( fb|fd ) )^fd ) );
8535+/* 1111 1111 1101 1101 : lev 3: */ //tmp3=( fd|( ( fa&fc )^ALL_ONES ) );
8536+/* 0111 1000 1001 0110 : lev 5: */ tmp0=FFXOR(fb,FFOR(FFAND(fc,fd),FFXOR(fa,FFXOR(fc,fd))));
8537+/* 0100 1001 0101 1011 : lev 6: */ tmp1=FFAND(FFOR(fb,fd),FFOR(FFAND(fa,fc),FFXOR(fb,FFXOR(fc,fd))));
8538+/* 0100 1001 1011 1001 : lev 5: */ tmp2=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fb,fd)),fd));
8539+/* 1111 1111 1101 1101 : lev 3: */ tmp3=FFOR(fd,FFXOR(FFAND(fa,fc),FF1()));
8540+ s7a=FFXOR(tmp0,FFAND(fe,tmp1));
8541+ s7b=FFXOR(tmp2,FFAND(fe,tmp3));
8542+
8543+
8544+/*
8545+ we have just done this:
8546+
8547+ int sbox1[0x20] = {2,0,1,1,2,3,3,0, 3,2,2,0,1,1,0,3, 0,3,3,0,2,2,1,1, 2,2,0,3,1,1,3,0};
8548+ int sbox2[0x20] = {3,1,0,2,2,3,3,0, 1,3,2,1,0,0,1,2, 3,1,0,3,3,2,0,2, 0,0,1,2,2,1,3,1};
8549+ int sbox3[0x20] = {2,0,1,2,2,3,3,1, 1,1,0,3,3,0,2,0, 1,3,0,1,3,0,2,2, 2,0,1,2,0,3,3,1};
8550+ int sbox4[0x20] = {3,1,2,3,0,2,1,2, 1,2,0,1,3,0,0,3, 1,0,3,1,2,3,0,3, 0,3,2,0,1,2,2,1};
8551+ int sbox5[0x20] = {2,0,0,1,3,2,3,2, 0,1,3,3,1,0,2,1, 2,3,2,0,0,3,1,1, 1,0,3,2,3,1,0,2};
8552+ int sbox6[0x20] = {0,1,2,3,1,2,2,0, 0,1,3,0,2,3,1,3, 2,3,0,2,3,0,1,1, 2,1,1,2,0,3,3,0};
8553+ int sbox7[0x20] = {0,3,2,2,3,0,0,1, 3,0,1,3,1,2,2,1, 1,0,3,3,0,1,1,2, 2,3,1,0,2,3,0,2};
8554+
8555+ s12 = sbox1[ (((A3>>0)&1)<<4) | (((A0>>2)&1)<<3) | (((A5>>1)&1)<<2) | (((A6>>3)&1)<<1) | (((A8>>0)&1)<<0) ]
8556+ |sbox2[ (((A1>>1)&1)<<4) | (((A2>>2)&1)<<3) | (((A5>>3)&1)<<2) | (((A6>>0)&1)<<1) | (((A8>>1)&1)<<0) ];
8557+ s34 = sbox3[ (((A0>>3)&1)<<4) | (((A1>>0)&1)<<3) | (((A4>>1)&1)<<2) | (((A4>>3)&1)<<1) | (((A5>>2)&1)<<0) ]
8558+ |sbox4[ (((A2>>3)&1)<<4) | (((A0>>1)&1)<<3) | (((A1>>3)&1)<<2) | (((A3>>2)&1)<<1) | (((A7>>0)&1)<<0) ];
8559+ s56 = sbox5[ (((A4>>2)&1)<<4) | (((A3>>3)&1)<<3) | (((A5>>0)&1)<<2) | (((A7>>1)&1)<<1) | (((A8>>2)&1)<<0) ]
8560+ |sbox6[ (((A2>>1)&1)<<4) | (((A3>>1)&1)<<3) | (((A4>>0)&1)<<2) | (((A6>>2)&1)<<1) | (((A8>>3)&1)<<0) ];
8561+ s7 = sbox7[ (((A1>>2)&1)<<4) | (((A2>>0)&1)<<3) | (((A6>>1)&1)<<2) | (((A7>>2)&1)<<1) | (((A7>>3)&1)<<0) ];
8562+*/
8563+
8564+ // use 4x4 xor to produce extra nibble for T3
8565+
8566+ extra_B[3]=FFXOR(FFXOR(FFXOR(regs->B[aboff+2][0],regs->B[aboff+5][1]),regs->B[aboff+6][2]),regs->B[aboff+8][3]);
8567+ extra_B[2]=FFXOR(FFXOR(FFXOR(regs->B[aboff+5][0],regs->B[aboff+7][1]),regs->B[aboff+2][3]),regs->B[aboff+3][2]);
8568+ extra_B[1]=FFXOR(FFXOR(FFXOR(regs->B[aboff+4][3],regs->B[aboff+7][2]),regs->B[aboff+3][0]),regs->B[aboff+4][1]);
8569+ extra_B[0]=FFXOR(FFXOR(FFXOR(regs->B[aboff+8][2],regs->B[aboff+5][3]),regs->B[aboff+2][1]),regs->B[aboff+7][0]);
8570+for(dbg=0;dbg<4;dbg++){
8571+ DBG(fprintf(stderr,"extra_B[%i]=",dbg));
8572+ DBG(dump_mem("",(unsigned char *)&extra_B[dbg],BYPG,BYPG));
8573+}
8574+
8575+ // T1 = xor all inputs
8576+ // in1, in2, D are only used in T1 during initialisation, not generation
8577+ for(b=0;b<4;b++){
8578+ regs->A[aboff-1][b]=FFXOR(regs->A[aboff+9][b],regs->X[b]);
8579+ }
8580+
8581+#ifdef STREAM_INIT
8582+ for(b=0;b<4;b++){
8583+ regs->A[aboff-1][b]=FFXOR(FFXOR(regs->A[aboff-1][b],regs->D[b]),((j % 2) ? in2[b] : in1[b]));
8584+ }
8585+#endif
8586+
8587+for(dbg=0;dbg<4;dbg++){
8588+ DBG(fprintf(stderr,"next_A0[%i]=",dbg));
8589+ DBG(dump_mem("",(unsigned char *)&regs->A[aboff-1][dbg],BYPG,BYPG));
8590+}
8591+
8592+ // T2 = xor all inputs
8593+ // in1, in2 are only used in T1 during initialisation, not generation
8594+ // if p=0, use this, if p=1, rotate the result left
8595+ for(b=0;b<4;b++){
8596+ regs->B[aboff-1][b]=FFXOR(FFXOR(regs->B[aboff+6][b],regs->B[aboff+9][b]),regs->Y[b]);
8597+ }
8598+
8599+#ifdef STREAM_INIT
8600+ for(b=0;b<4;b++){
8601+ regs->B[aboff-1][b]=FFXOR(regs->B[aboff-1][b],((j % 2) ? in1[b] : in2[b]));
8602+ }
8603+#endif
8604+
8605+for(dbg=0;dbg<4;dbg++){
8606+ DBG(fprintf(stderr,"next_B0[%i]=",dbg));
8607+ DBG(dump_mem("",(unsigned char *)&regs->B[aboff-1][dbg],BYPG,BYPG));
8608+}
8609+
8610+ // if p=1, rotate left (yes, this is what we're doing)
8611+ tmp3=regs->B[aboff-1][3];
8612+ regs->B[aboff-1][3]=FFXOR(regs->B[aboff-1][3],FFAND(FFXOR(regs->B[aboff-1][3],regs->B[aboff-1][2]),regs->p));
8613+ regs->B[aboff-1][2]=FFXOR(regs->B[aboff-1][2],FFAND(FFXOR(regs->B[aboff-1][2],regs->B[aboff-1][1]),regs->p));
8614+ regs->B[aboff-1][1]=FFXOR(regs->B[aboff-1][1],FFAND(FFXOR(regs->B[aboff-1][1],regs->B[aboff-1][0]),regs->p));
8615+ regs->B[aboff-1][0]=FFXOR(regs->B[aboff-1][0],FFAND(FFXOR(regs->B[aboff-1][0],tmp3),regs->p));
8616+
8617+for(dbg=0;dbg<4;dbg++){
8618+ DBG(fprintf(stderr,"next_B0[%i]=",dbg));
8619+ DBG(dump_mem("",(unsigned char *)&regs->B[aboff-1][dbg],BYPG,BYPG));
8620+}
8621+
8622+ // T3 = xor all inputs
8623+ for(b=0;b<4;b++){
8624+ regs->D[b]=FFXOR(FFXOR(regs->E[b],regs->Z[b]),extra_B[b]);
8625+ }
8626+
8627+for(dbg=0;dbg<4;dbg++){
8628+ DBG(fprintf(stderr,"D[%i]=",dbg));
8629+ DBG(dump_mem("",(unsigned char *)&regs->D[dbg],BYPG,BYPG));
8630+}
8631+
8632+ // T4 = sum, carry of Z + E + r
8633+ for(b=0;b<4;b++){
8634+ next_E[b]=regs->F[b];
8635+ }
8636+
8637+ tmp0=FFXOR(regs->Z[0],regs->E[0]);
8638+ tmp1=FFAND(regs->Z[0],regs->E[0]);
8639+ regs->F[0]=FFXOR(regs->E[0],FFAND(regs->q,FFXOR(regs->Z[0],regs->r)));
8640+ tmp3=FFAND(tmp0,regs->r);
8641+ tmp4=FFOR(tmp1,tmp3);
8642+
8643+ tmp0=FFXOR(regs->Z[1],regs->E[1]);
8644+ tmp1=FFAND(regs->Z[1],regs->E[1]);
8645+ regs->F[1]=FFXOR(regs->E[1],FFAND(regs->q,FFXOR(regs->Z[1],tmp4)));
8646+ tmp3=FFAND(tmp0,tmp4);
8647+ tmp4=FFOR(tmp1,tmp3);
8648+
8649+ tmp0=FFXOR(regs->Z[2],regs->E[2]);
8650+ tmp1=FFAND(regs->Z[2],regs->E[2]);
8651+ regs->F[2]=FFXOR(regs->E[2],FFAND(regs->q,FFXOR(regs->Z[2],tmp4)));
8652+ tmp3=FFAND(tmp0,tmp4);
8653+ tmp4=FFOR(tmp1,tmp3);
8654+
8655+ tmp0=FFXOR(regs->Z[3],regs->E[3]);
8656+ tmp1=FFAND(regs->Z[3],regs->E[3]);
8657+ regs->F[3]=FFXOR(regs->E[3],FFAND(regs->q,FFXOR(regs->Z[3],tmp4)));
8658+ tmp3=FFAND(tmp0,tmp4);
8659+ regs->r=FFXOR(regs->r,FFAND(regs->q,FFXOR(FFOR(tmp1,tmp3),regs->r))); // ultimate carry
8660+
8661+/*
8662+ we have just done this: (believe it or not)
8663+
8664+ if (q) {
8665+ F = Z + E + r;
8666+ r = (F >> 4) & 1;
8667+ F = F & 0x0f;
8668+ }
8669+ else {
8670+ F = E;
8671+ }
8672+*/
8673+ for(b=0;b<4;b++){
8674+ regs->E[b]=next_E[b];
8675+ }
8676+for(dbg=0;dbg<4;dbg++){
8677+ DBG(fprintf(stderr,"F[%i]=",dbg));
8678+ DBG(dump_mem("",(unsigned char *)&regs->F[dbg],BYPG,BYPG));
8679+}
8680+DBG(fprintf(stderr,"r="));
8681+DBG(dump_mem("",(unsigned char *)&regs->r,BYPG,BYPG));
8682+for(dbg=0;dbg<4;dbg++){
8683+ DBG(fprintf(stderr,"E[%i]=",dbg));
8684+ DBG(dump_mem("",(unsigned char *)&regs->E[dbg],BYPG,BYPG));
8685+}
8686+
8687+ // this simple instruction is virtually shifting all the shift registers
8688+ aboff--;
8689+
8690+/*
8691+ we've just done this:
8692+
8693+ A9=A8;A8=A7;A7=A6;A6=A5;A5=A4;A4=A3;A3=A2;A2=A1;A1=A0;A0=next_A0;
8694+ B9=B8;B8=B7;B7=B6;B6=B5;B5=B4;B4=B3;B3=B2;B2=B1;B1=B0;B0=next_B0;
8695+*/
8696+
8697+ regs->X[0]=s1a;
8698+ regs->X[1]=s2a;
8699+ regs->X[2]=s3b;
8700+ regs->X[3]=s4b;
8701+ regs->Y[0]=s3a;
8702+ regs->Y[1]=s4a;
8703+ regs->Y[2]=s5b;
8704+ regs->Y[3]=s6b;
8705+ regs->Z[0]=s5a;
8706+ regs->Z[1]=s6a;
8707+ regs->Z[2]=s1b;
8708+ regs->Z[3]=s2b;
8709+ regs->p=s7a;
8710+ regs->q=s7b;
8711+for(dbg=0;dbg<4;dbg++){
8712+ DBG(fprintf(stderr,"X[%i]=",dbg));
8713+ DBG(dump_mem("",(unsigned char *)&regs->X[dbg],BYPG,BYPG));
8714+}
8715+for(dbg=0;dbg<4;dbg++){
8716+ DBG(fprintf(stderr,"Y[%i]=",dbg));
8717+ DBG(dump_mem("",(unsigned char *)&regs->Y[dbg],BYPG,BYPG));
8718+}
8719+for(dbg=0;dbg<4;dbg++){
8720+ DBG(fprintf(stderr,"Z[%i]=",dbg));
8721+ DBG(dump_mem("",(unsigned char *)&regs->Z[dbg],BYPG,BYPG));
8722+}
8723+DBG(fprintf(stderr,"p="));
8724+DBG(dump_mem("",(unsigned char *)&regs->p,BYPG,BYPG));
8725+DBG(fprintf(stderr,"q="));
8726+DBG(dump_mem("",(unsigned char *)&regs->q,BYPG,BYPG));
8727+
8728+#ifdef STREAM_NORMAL
8729+ // require 4 loops per output byte
8730+ // 2 output bits are a function of the 4 bits of D
8731+ // xor 2 by 2
8732+ cb_g[8*i+7-2*j]=FFXOR(regs->D[2],regs->D[3]);
8733+ cb_g[8*i+6-2*j]=FFXOR(regs->D[0],regs->D[1]);
8734+for(dbg=0;dbg<8;dbg++){
8735+ DBG(fprintf(stderr,"op[%i]=",dbg));
8736+ DBG(dump_mem("",(unsigned char *)&cb_g[8*i+dbg],BYPG,BYPG));
8737+}
8738+#endif
8739+
8740+DBG(fprintf(stderr,"---END INTERNAL LOOP\n"));
8741+
8742+ } // INTERNAL LOOP
8743+
8744+DBG(fprintf(stderr,"--END EXTERNAL LOOP\n"));
8745+
8746+ } // EXTERNAL LOOP
8747+
8748+ // move 32 steps forward, ready for next call
8749+ for(k=0;k<10;k++){
8750+ for(b=0;b<4;b++){
8751+DBG(fprintf(stderr,"moving forward AB k=%i b=%i\n",k,b));
8752+ regs->A[32+k][b]=regs->A[k][b];
8753+ regs->B[32+k][b]=regs->B[k][b];
8754+ }
8755+ }
8756+
8757+
8758+////////////////////////////////////////////////////////////////////////////////
8759+
8760+#ifdef STREAM_NORMAL
8761+for(j=0;j<64;j++){
8762+ DBG(fprintf(stderr,"postcall prerot cb[%2i]=",j));
8763+ DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG));
8764+}
8765+
8766+#if GROUP_PARALLELISM==32
8767+trasp64_32_88cw(cb);
8768+#endif
8769+#if GROUP_PARALLELISM==64
8770+trasp64_64_88cw(cb);
8771+#endif
8772+#if GROUP_PARALLELISM==128
8773+trasp64_128_88cw(cb);
8774+#endif
8775+
8776+for(j=0;j<64;j++){
8777+ DBG(fprintf(stderr,"postcall postrot cb[%2i]=",j));
8778+ DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG));
8779+}
8780+#endif
8781+
8782+#ifdef STREAM_INIT
8783+ DBG(fprintf(stderr,":::::::::: END STREAM INIT\n"));
8784+#endif
8785+#ifdef STREAM_NORMAL
8786+ DBG(fprintf(stderr,":::::::::: END STREAM NORMAL\n"));
8787+#endif
8788+
8789+}
8790+
8791Index: libs/libmythtv/dvbrecorder.cpp
8792===================================================================
8793--- libs/libmythtv/dvbrecorder.cpp.orig 2006-06-20 17:34:53.000000000 -0400
8794+++ libs/libmythtv/dvbrecorder.cpp 2006-06-20 17:39:37.000000000 -0400
8795@@ -67,6 +67,11 @@
8796 #include "../libavformat/avformat.h"
8797 #include "../libavformat/mpegts.h"
8798
8799+#include "FFdecsa/FFdecsa.h"
8800+static pthread_mutex_t csalock=PTHREAD_MUTEX_INITIALIZER;
8801+static pthread_mutex_t allkeylock=PTHREAD_MUTEX_INITIALIZER;
8802+static QMap<int, struct decsaKey *> decsaMap;
8803+
8804 const int DVBRecorder::PMT_PID = 0x1700; ///< PID for rewritten PMT
8805 const int DVBRecorder::TSPACKETS_BETWEEN_PSIP_SYNC = 2000;
8806 const int DVBRecorder::POLL_INTERVAL = 50; // msec
8807@@ -111,6 +116,9 @@
8808
8809 _buffer = new unsigned char[_buffer_size];
8810 bzero(_buffer, _buffer_size);
8811+
8812+ initDeCSA();
8813+
8814 }
8815
8816 DVBRecorder::~DVBRecorder()
8817@@ -190,6 +198,8 @@
8818 _reset_pid_filters = true;
8819
8820 bzero(_ps_rec_buf, sizeof(unsigned char) * 3);
8821+ _csa_softKey = DVBRecorder::UpdateDeCSAKeys(
8822+ _card_number_option, 'G', 0, NULL, 0); //'G' = 'Get'
8823 }
8824
8825 bool DVBRecorder::Open(void)
8826@@ -411,6 +421,8 @@
8827
8828 void DVBRecorder::StartRecording(void)
8829 {
8830+ uint bufferOffset = 0;
8831+
8832 if (!Open())
8833 {
8834 _error = true;
8835@@ -467,12 +479,14 @@
8836 if (Poll())
8837 {
8838 #ifdef USE_DRB
8839- ssize_t len = _drb->Read(_buffer, _buffer_size);
8840+ ssize_t len = _drb->Read(_buffer + bufferOffset,
8841+ _buffer_size - bufferOffset);
8842 #else // if !USE_DRB
8843- ssize_t len = safe_read(_stream_fd, _buffer, _buffer_size);
8844+ ssize_t len = safe_read(_stream_fd, _buffer + bufferOffset,
8845+ _buffer_size - bufferOffset);
8846 #endif // !USE_DRB
8847- if (len > 0)
8848- ProcessDataTS(_buffer, len);
8849+ if (len + bufferOffset > 0)
8850+ bufferOffset = ProcessDataTS(_buffer, len + bufferOffset);
8851 }
8852
8853 #ifdef USE_DRB
8854@@ -690,14 +704,20 @@
8855 return len;
8856
8857 uint pos = 0;
8858- uint end = len - TSPacket::SIZE;
8859+ int end = len - TSPacket::SIZE;
8860+
8861+ // we may not decode the entire buffer
8862+ if ((end = PreProcessDataTS(buffer, len)) < 0)
8863+ return 0;
8864 while (pos <= end)
8865 {
8866 const TSPacket *pkt = reinterpret_cast<const TSPacket*>(&buffer[pos]);
8867 ProcessTSPacket(*pkt);
8868 pos += TSPacket::SIZE;
8869 }
8870- return len - pos;
8871+
8872+ // At this point pos is the start of the first packet not handled
8873+ return PostProcessDataTS(buffer, pos, len - pos);
8874 }
8875
8876 bool DVBRecorder::ProcessTSPacket(const TSPacket& tspacket)
8877@@ -1205,3 +1225,203 @@
8878 int cardnum = _card_number_option;
8879 GENERAL(debugmsg);
8880 }
8881+
8882+void DVBRecorder::initDeCSA()
8883+{
8884+ delete _buffer;
8885+
8886+ _csa_cluster_size = get_suggested_cluster_size();
8887+ _buffer_size = MPEG_TS_PKT_SIZE * _csa_cluster_size;
8888+ _buffer = new unsigned char[_buffer_size];
8889+ bzero(_buffer, _buffer_size);
8890+ _csa_cluster = new unsigned char *[2 * _csa_cluster_size]; // start / end ptr for each packet
8891+ _csa_clusterptr = _csa_cluster;
8892+ _csa_pkt_buf_count = 0;
8893+#if 0
8894+ _csa_even_pkt_count = 0;
8895+ _csa_odd_pkt_count = 0;
8896+#endif
8897+
8898+ memset(_csa_even_ck, 0, 8);
8899+ memset(_csa_odd_ck, 0, 8);
8900+ _csa_softKey = NULL;
8901+}
8902+
8903+struct decsaKey *DVBRecorder::UpdateDeCSAKeys(
8904+ int cardnum, unsigned char keytype,
8905+ int index, unsigned char *key, int pid)
8906+{
8907+ struct decsaKey *decsaPtr = NULL;
8908+
8909+ VERBOSE(VB_RECORD, QString("Got Key type(%1) idx: %2, pid: %3")
8910+ .arg(keytype).arg(index).arg(pid));
8911+
8912+ pthread_mutex_lock(&allkeylock);
8913+ QMap<int, struct decsaKey *>::Iterator it = decsaMap.begin();
8914+ while (it != decsaMap.end())
8915+ {
8916+ if (it.key() == cardnum)
8917+ {
8918+ decsaPtr = it.data();
8919+ break;
8920+ }
8921+ else
8922+ it++;
8923+ }
8924+ if (keytype == 'I')
8925+ {
8926+ if (decsaPtr)
8927+ {
8928+ pthread_mutex_lock(&decsaPtr->keylock);
8929+ decsaPtr->valid_keys = 0x00;
8930+ pthread_mutex_unlock(&decsaPtr->keylock);
8931+ }
8932+ pthread_mutex_unlock(&allkeylock);
8933+ return decsaPtr;
8934+ }
8935+ if (!decsaPtr) {
8936+ decsaPtr = new struct decsaKey;
8937+ memset(decsaPtr,0, sizeof(struct decsaKey));
8938+ pthread_mutex_init(&decsaPtr->keylock, NULL);
8939+ decsaMap[cardnum] = decsaPtr;
8940+ memset(decsaPtr->keys, 0, sizeof(decsaPtr->keys));
8941+ memset(decsaPtr->pidmap, 0, sizeof(decsaPtr->pidmap));
8942+ }
8943+ pthread_mutex_lock(&decsaPtr->keylock);
8944+ pthread_mutex_unlock(&allkeylock);
8945+ if (keytype == 'G')
8946+ {
8947+ pthread_mutex_unlock(&decsaPtr->keylock);
8948+ return decsaPtr;
8949+ }
8950+ if (! decsaPtr->use_decsa)
8951+ decsaPtr->use_decsa = true;
8952+ if (! decsaPtr->keys[index])
8953+ decsaPtr->keys[index] = get_key_struct();
8954+
8955+ if (keytype == 'P') // PID
8956+ decsaPtr->pidmap[pid] = index;
8957+ else if (keytype == 'F') //Force PID
8958+ memset(decsaPtr->pidmap, index, sizeof(decsaPtr->pidmap));
8959+ else if (keytype == 'E') //Even
8960+ {
8961+ decsaPtr->valid_keys |= 0x01;
8962+ set_even_control_word(decsaPtr->keys[index], key);
8963+ }
8964+ else if (keytype == 'O') //Odd
8965+ {
8966+ decsaPtr->valid_keys |= 0x02;
8967+ set_odd_control_word(decsaPtr->keys[index], key);
8968+ }
8969+
8970+ pthread_mutex_unlock(&decsaPtr->keylock);
8971+ return decsaPtr;
8972+}
8973+
8974+int DVBRecorder::PreProcessDataTS(unsigned char *buffer, uint len)
8975+{
8976+ int decodedPackets = 0, unencrypted = 0;
8977+ bool new_range = true;
8978+ int curr_idx = -1;
8979+ int offset = -2;
8980+
8981+ // packets already added to the cluster array can be skipped
8982+ uint pos = _csa_pkt_buf_count * TSPacket::SIZE;
8983+ uint end = len - TSPacket::SIZE;
8984+
8985+ // walk the packets to setup the decrypt cluster array
8986+ while (pos <= end)
8987+ {
8988+ if (buffer[pos + 3] & 0xC0)
8989+ {
8990+ // encrypted
8991+ int index = _csa_softKey->pidmap[((buffer[pos + 1] << 8) +
8992+ (buffer[pos + 2])) & (MAX_CSA_PIDS-1)];
8993+ if (curr_idx < 0 || index == curr_idx)
8994+ {
8995+ //same or no index
8996+ curr_idx = index;
8997+
8998+ if (new_range)
8999+ {
9000+ // the buffer is allocated to never be
9001+ // bigger than the cluster array
9002+ new_range = false;
9003+ offset += 2;
9004+ _csa_clusterptr[offset] = buffer + pos;
9005+ }
9006+ _csa_clusterptr[ offset + 1] = buffer + pos + TSPacket::SIZE;
9007+ // advance packet count and packet position
9008+ _csa_pkt_buf_count++;
9009+ }
9010+ else
9011+ new_range = true;
9012+ }
9013+ else if (! new_range)
9014+ {
9015+ _csa_clusterptr[ offset + 1] = buffer + pos + TSPacket::SIZE;
9016+ _csa_pkt_buf_count++;
9017+ }
9018+ else if (_csa_pkt_buf_count == 0)
9019+ unencrypted++;
9020+ pos += TSPacket::SIZE;
9021+ }
9022+ _csa_clusterptr += offset + 2;
9023+
9024+ // terminate the cluster array
9025+ _csa_clusterptr[0] = NULL;
9026+
9027+ // set the keys (if needed) and decrypt
9028+ pthread_mutex_lock(&_csa_softKey->keylock);
9029+ if(_csa_softKey->use_decsa && _csa_pkt_buf_count) {
9030+ if (_csa_softKey->valid_keys && curr_idx > 0) {
9031+ pthread_mutex_lock(&csalock);
9032+ decodedPackets = decrypt_packets(_csa_softKey->keys[curr_idx],
9033+ _csa_cluster);
9034+ pthread_mutex_unlock(&csalock);
9035+ pthread_mutex_unlock(&_csa_softKey->keylock);
9036+ }
9037+ else
9038+ {
9039+ // Clear all queued packets since we don't have a valid key
9040+ pthread_mutex_unlock(&_csa_softKey->keylock);
9041+ _csa_pkt_buf_count = 0;
9042+ _csa_clusterptr = _csa_cluster;
9043+ return (unencrypted - 1) * TSPacket::SIZE;
9044+ }
9045+ } else {
9046+ // Not using decsa so pass through all the packets
9047+ pthread_mutex_unlock(&_csa_softKey->keylock);
9048+ _csa_cluster[0] = NULL; // ensure the cluster table doesn't overflow when not using decsa
9049+ return len - TSPacket::SIZE;
9050+ }
9051+
9052+ _csa_pkt_buf_count -= decodedPackets;
9053+
9054+ // Return the start of the last available packet
9055+ return (unencrypted + decodedPackets - 1) * TSPacket::SIZE;
9056+}
9057+
9058+uint DVBRecorder::PostProcessDataTS(unsigned char *buffer, uint offset, uint len)
9059+{
9060+ if (len == 0) {
9061+ _csa_clusterptr = _csa_cluster;
9062+ return 0;
9063+ }
9064+
9065+ // relocate the unhandled packets
9066+ memmove(buffer, buffer + offset, len);
9067+
9068+ // fix up the cluster array to account for the relocation
9069+ unsigned char **clusterWalk = _csa_cluster;
9070+ while (clusterWalk[0]) {
9071+ clusterWalk[0] -= offset;
9072+ clusterWalk[1] -= offset;
9073+ clusterWalk += 2;
9074+ }
9075+
9076+ // save the address for the next time we add to the cluster array
9077+ _csa_clusterptr = clusterWalk;
9078+
9079+ return len;
9080+}
This page took 1.293125 seconds and 4 git commands to generate.