]> git.pld-linux.org Git - packages/MigrationTools.git/blob - MigrationTools-utf8.patch
df44802b5734ce9738dd269a2f2228cd190c862c
[packages/MigrationTools.git] / MigrationTools-utf8.patch
1 diff -ruN migrationtools-47.orig/migrate_common.ph migrationtools-47/migrate_common.ph
2 --- migrationtools-47.orig/migrate_common.ph    2006-01-25 05:18:16.000000000 +0100
3 +++ migrationtools-47/migrate_common.ph 2007-04-07 00:05:52.000000000 +0200
4 @@ -89,6 +89,9 @@
5  # such as person.
6  $EXTENDED_SCHEMA = 0;
7  
8 +# Comment this out if your ldap server does not support UTF8 encoding
9 +$USE_UTF8 = 1;
10 +
11  #
12  # allow environment variables to override predefines
13  #
14 diff -ruN migrationtools-47.orig/migrate_passwd.pl migrationtools-47/migrate_passwd.pl
15 --- migrationtools-47.orig/migrate_passwd.pl    2006-01-25 05:18:16.000000000 +0100
16 +++ migrationtools-47/migrate_passwd.pl 2007-04-07 00:06:13.000000000 +0200
17 @@ -36,6 +36,7 @@
18  #
19  # Thanks to Peter Jacob Slot <peter@vision.auk.dk>.
20  #
21 +# UTF8 support by Jonas Smedegaard <dr@jones.dk>.
22  
23  require 'migrate_common.ph';
24  
25 @@ -53,28 +54,6 @@
26         next if /^#/;
27         next if /^\+/;
28  
29 -       s/Ä/Ae/g;
30 -       s/Ë/Ee/g;
31 -       s/Ï/Ie/g;
32 -       s/Ö/Oe/g;
33 -       s/Ü/Ue/g;
34 -
35 -       s/ä/ae/g;
36 -       s/ë/ee/g;
37 -       s/ï/ie/g;
38 -       s/ö/oe/g;
39 -       s/ü/ue/g;
40 -       s/ÿ/ye/g;
41 -       s/ß/ss/g;
42 -       s/é/e/g;
43 -
44 -       s/Æ/Ae/g;
45 -       s/æ/ae/g;
46 -       s/Ø/Oe/g;
47 -       s/ø/oe/g;
48 -       s/Å/Ae/g;
49 -       s/å/ae/g;
50 -
51         local($user, $pwd, $uid, $gid, $gecos, $homedir, $shell) = split(/:/);
52         
53         if ($use_stdout) {
54 @@ -100,25 +79,25 @@
55         $sn = $tmp[$#tmp];
56         pop(@tmp);
57         $givenname=join(' ',@tmp);
58 -       
59 +
60         print $HANDLE "dn: uid=$user,$NAMINGCONTEXT\n";
61         print $HANDLE "uid: $user\n";
62 -       print $HANDLE "cn: $cn\n";
63 +       &print_utf8($HANDLE, "cn", $cn);
64  
65         if ($EXTENDED_SCHEMA) {
66                 if ($wphone) {
67 -                       print $HANDLE "telephoneNumber: $wphone\n";
68 +                       &print_utf8($HANDLE, "telephoneNumber", $wphone);
69                 }
70                 if ($office) {
71 -                       print $HANDLE "roomNumber: $office\n";
72 +                       &print_utf8($HANDLE, "roomNumber", $office);
73                 }
74                 if ($hphone) {
75 -                       print $HANDLE "homePhone: $hphone\n";
76 +                       &print_utf8($HANDLE, "homePhone", $hphone);
77                 }
78                 if ($givenname) {
79 -                       print $HANDLE "givenName: $givenname\n";
80 +                       &print_utf8($HANDLE, "givenName", $givenname);
81                 }
82 -               print $HANDLE "sn: $sn\n";
83 +               &print_utf8($HANDLE, "sn", $sn);
84                 if ($DEFAULT_MAIL_DOMAIN) {
85                         print $HANDLE "mail: $user\@$DEFAULT_MAIL_DOMAIN\n";
86                 }
87 @@ -174,7 +153,7 @@
88         }
89  
90         if ($gecos) {
91 -               print $HANDLE "gecos: $gecos\n";
92 +               &print_ascii($HANDLE, "gecos", $gecos);
93         }
94  
95         print $HANDLE "\n";
96 @@ -225,3 +204,177 @@
97         }
98  }
99  
100 +sub print_utf8
101 +{
102 +       my($HANDLE, $attribute, $content) = @_;
103 +
104 +       if (&validate_ascii($content)) {
105 +               print $HANDLE "$attribute: $content\n";
106 +       } elsif ($USE_UTF8) {
107 +#              $content = &recode_custom_to_utf8($content);
108 +               $content = &recode_latin1_to_utf8($content);
109 +               if (&validate_utf8($content)) {
110 +                       $content = &encode_base64($content, "");
111 +                       print $HANDLE "$attribute\:: $content\n";
112 +               } else {
113 +                       die "ERROR: Illegal character(s) in UTF-8 string: \"$content\"";
114 +               }
115 +       } else {
116 +               &print_ascii($HANDLE, "$attribute", "$content");
117 +       }
118 +}
119 +
120 +sub print_ascii
121 +{
122 +       my($HANDLE, $attribute, $content) = @_;
123 +
124 +       if (&validate_utf8($content)) {
125 +               $content = &recode_utf8_to_latin1($content);
126 +       } else {
127 +               $content = &recode_latin1_to_utf8($content);
128 +               $content = &recode_utf8_to_latin1($content);
129 +       }
130 +       $content = &recode_custom_to_ascii($content);
131 +       if (&validate_ascii($content)) {
132 +               print $HANDLE "$attribute: $content\n";
133 +       } else {
134 +               my $badchars = $content;
135 +               for ($badchars) {
136 +                       s/[\x20-\x7E]//g;
137 +               }
138 +               die "ERROR: Illegal character(s) \"$badchars\" in ASCII string: \"$content\"";
139 +       }
140 +}
141 +
142 +sub recode_latin1_to_utf8
143 +{
144 +       my ($content) = @_;
145 +       for ($content) {
146 +               s/([\x80-\xFF])/chr(0xC0|ord($1)>>6).chr(0x80|ord($1)&0x3F)/eg;
147 +       }
148 +       return ($content)
149 +}
150 +
151 +sub recode_utf8_to_latin1
152 +{
153 +       my ($content) = @_;
154 +       for ($content) {
155 +               s/([\xC2\xC3])([\x80-\xBF])/chr(ord($1)<<6&0xC0|ord($2)&0x3F)/eg;
156 +       }
157 +       return ($content)
158 +}
159 +
160 +sub recode_custom_to_ascii
161 +{
162 +       my ($content) = @_;
163 +       for ($content) {
164 +               s/\xc0/A/g; # latin capital letter a with grave
165 +               s/\xc1/A/g; # latin capital letter a with acute
166 +               s/\xc2/A/g; # latin capital letter a with circumflex
167 +               s/\xc3/A/g; # latin capital letter a with tilde
168 +               s/\xc4/Ae/g; # latin capital letter a with diaeresis
169 +               s/\xc5/Aa/g; # latin capital letter a with ring above
170 +               s/\xc6/Ae/g; # latin capital letter ae
171 +               s/\xc7/C/g; # latin capital letter c with cedilla
172 +               s/\xc8/E/g; # latin capital letter e with grave
173 +               s/\xc9/E/g; # latin capital letter e with acute
174 +               s/\xca/E/g; # latin capital letter e with circumflex
175 +               s/\xcb/Ee/g; # latin capital letter e with diaeresis
176 +               s/\xcc/I/g; # latin capital letter i with grave
177 +               s/\xcd/I/g; # latin capital letter i with acute
178 +               s/\xce/I/g; # latin capital letter i with circumflex
179 +               s/\xcf/Ie/g; # latin capital letter i with diaeresis
180 +               s/\xd0/Dh/g; # latin capital letter eth (icelandic)
181 +               s/\xd1/N/g; # latin capital letter n with tilde
182 +               s/\xd2/O/g; # latin capital letter o with grave
183 +               s/\xd3/O/g; # latin capital letter o with acute
184 +               s/\xd4/O/g; # latin capital letter o with circumflex
185 +               s/\xd5/O/g; # latin capital letter o with tilde
186 +               s/\xd6/Oe/g; # latin capital letter o with diaeresis
187 +               s/\xd8/Oe/g; # latin capital letter o with stroke
188 +               s/\xd9/U/g; # latin capital letter u with grave
189 +               s/\xda/U/g; # latin capital letter u with acute
190 +               s/\xdb/U/g; # latin capital letter u with circumflex
191 +               s/\xdc/Ue/g; # latin capital letter u with diaeresis
192 +               s/\xdd/Y/g; # latin capital letter y with acute
193 +               s/\xde/TH/g; # latin capital letter thorn (icelandic)
194 +               s/\xdf/ss/g; # latin small letter sharp s (german)
195 +               s/\xe0/a/g; # latin small letter a with grave
196 +               s/\xe1/a/g; # latin small letter a with acute
197 +               s/\xe2/a/g; # latin small letter a with circumflex
198 +               s/\xe3/a/g; # latin small letter a with tilde
199 +               s/\xe4/ae/g; # latin small letter a with diaeresis
200 +               s/\xe5/aa/g; # latin small letter a with ring above
201 +               s/\xe6/ae/g; # latin small letter ae
202 +               s/\xe7/c/g; # latin small letter c with cedilla
203 +               s/\xe8/e/g; # latin small letter e with grave
204 +               s/\xe9/e/g; # latin small letter e with acute
205 +               s/\xea/e/g; # latin small letter e with circumflex
206 +               s/\xeb/ee/g; # latin small letter e with diaeresis
207 +               s/\xec/i/g; # latin small letter i with grave
208 +               s/\xed/i/g; # latin small letter i with acute
209 +               s/\xee/i/g; # latin small letter i with circumflex
210 +               s/\xef/ii/g; # latin small letter i with diaeresis
211 +               s/\xf0/dh/g; # latin small letter eth (icelandic)
212 +               s/\xf1/n/g; # latin small letter n with tilde
213 +               s/\xf2/o/g; # latin small letter o with grave
214 +               s/\xf3/o/g; # latin small letter o with acute
215 +               s/\xf4/o/g; # latin small letter o with circumflex
216 +               s/\xf5/o/g; # latin small letter o with tilde
217 +               s/\xf6/oe/g; # latin small letter o with diaeresis
218 +               s/\xf8/oe/g; # latin small letter o with stroke
219 +               s/\xf9/u/g; # latin small letter u with grave
220 +               s/\xfa/u/g; # latin small letter u with acute
221 +               s/\xfb/u/g; # latin small letter u with circumflex
222 +               s/\xfc/ue/g; # latin small letter u with diaeresis
223 +               s/\xfd/y/g; # latin small letter y with acute
224 +               s/\xfe/th/g; # latin small letter thorn (icelandic)
225 +               s/\xff/ye/g; # latin small letter y with diaeresis
226 +       }
227 +       return ($content);
228 +}
229 +
230 +sub encode_base64
231 +# Found in email by Baruzzi Giovanni <giovanni.baruzzi@allianz-leben.de> on openldap mailinglist
232 +
233 +# Historically this module has been implemented as pure perl code.
234 +# The XS implementation runs about 20 times faster, but the Perl
235 +# code might be more portable, so it is still here.
236 +{
237 +       my $res = "";
238 +       my $eol = $_[1];
239 +       $eol = "\n" unless defined $eol;
240 +       pos($_[0]) = 0; # ensure start at the beginning
241 +       while ($_[0] =~ /(.{1,45})/gs) {
242 +               $res .= substr(pack('u', $1), 1);
243 +               chop($res);
244 +       }
245 +       $res =~ tr|` -_|AA-Za-z0-9+/|;               # `# help emacs
246 +       # fix padding at the end
247 +       my $padding = (3 - length($_[0]) % 3) % 3;
248 +       $res =~ s/.{$padding}$/'=' x $padding/e if $padding;
249 +       # break encoded string into lines of no more than 76 characters each
250 +       if (length $eol) {
251 +               $res =~ s/(.{1,76})/$1$eol/g;
252 +       }
253 +       $res;
254 +}
255 +
256 +sub validate_ascii
257 +{
258 +       my ($content) = @_;
259 +       $content =~ /^[\x20-\x7E]*$/;
260 +}
261 +
262 +sub validate_utf8
263 +{
264 +       my ($content) = @_;
265 +       if (&validate_ascii($content)) {
266 +               return 1;
267 +       }
268 +       if ($] >= 5.8) {
269 +               ## No Perl support for UTF-8! ;-/
270 +               return undef;
271 +       }
272 +       $content =~ /^[\x20-\x7E\x{0080}-\x{FFFF}]*$/;
273 +}
This page took 0.090642 seconds and 3 git commands to generate.