1 diff -ruN migrationtools-47.orig/migrate_common.ph migrationtools-47/migrate_common.ph
2 --- migrationtools-47.orig/migrate_common.ph 2006-01-25 05:18:16.000000000 +0100
3 +++ migrationtools-47/migrate_common.ph 2007-04-07 00:05:52.000000000 +0200
8 +# Comment this out if your ldap server does not support UTF8 encoding
12 # allow environment variables to override predefines
14 diff -ruN migrationtools-47.orig/migrate_passwd.pl migrationtools-47/migrate_passwd.pl
15 --- migrationtools-47.orig/migrate_passwd.pl 2006-01-25 05:18:16.000000000 +0100
16 +++ migrationtools-47/migrate_passwd.pl 2007-04-07 00:06:13.000000000 +0200
19 # Thanks to Peter Jacob Slot <peter@vision.auk.dk>.
21 +# UTF8 support by Jonas Smedegaard <dr@jones.dk>.
23 require 'migrate_common.ph';
51 local($user, $pwd, $uid, $gid, $gecos, $homedir, $shell) = split(/:/);
57 $givenname=join(' ',@tmp);
60 print $HANDLE "dn: uid=$user,$NAMINGCONTEXT\n";
61 print $HANDLE "uid: $user\n";
62 - print $HANDLE "cn: $cn\n";
63 + &print_utf8($HANDLE, "cn", $cn);
65 if ($EXTENDED_SCHEMA) {
67 - print $HANDLE "telephoneNumber: $wphone\n";
68 + &print_utf8($HANDLE, "telephoneNumber", $wphone);
71 - print $HANDLE "roomNumber: $office\n";
72 + &print_utf8($HANDLE, "roomNumber", $office);
75 - print $HANDLE "homePhone: $hphone\n";
76 + &print_utf8($HANDLE, "homePhone", $hphone);
79 - print $HANDLE "givenName: $givenname\n";
80 + &print_utf8($HANDLE, "givenName", $givenname);
82 - print $HANDLE "sn: $sn\n";
83 + &print_utf8($HANDLE, "sn", $sn);
84 if ($DEFAULT_MAIL_DOMAIN) {
85 print $HANDLE "mail: $user\@$DEFAULT_MAIL_DOMAIN\n";
91 - print $HANDLE "gecos: $gecos\n";
92 + &print_ascii($HANDLE, "gecos", $gecos);
102 + my($HANDLE, $attribute, $content) = @_;
104 + if (&validate_ascii($content)) {
105 + print $HANDLE "$attribute: $content\n";
106 + } elsif ($USE_UTF8) {
107 +# $content = &recode_custom_to_utf8($content);
108 + $content = &recode_latin1_to_utf8($content);
109 + if (&validate_utf8($content)) {
110 + $content = &encode_base64($content, "");
111 + print $HANDLE "$attribute\:: $content\n";
113 + die "ERROR: Illegal character(s) in UTF-8 string: \"$content\"";
116 + &print_ascii($HANDLE, "$attribute", "$content");
122 + my($HANDLE, $attribute, $content) = @_;
124 + if (&validate_utf8($content)) {
125 + $content = &recode_utf8_to_latin1($content);
127 + $content = &recode_latin1_to_utf8($content);
128 + $content = &recode_utf8_to_latin1($content);
130 + $content = &recode_custom_to_ascii($content);
131 + if (&validate_ascii($content)) {
132 + print $HANDLE "$attribute: $content\n";
134 + my $badchars = $content;
138 + die "ERROR: Illegal character(s) \"$badchars\" in ASCII string: \"$content\"";
142 +sub recode_latin1_to_utf8
144 + my ($content) = @_;
146 + s/([\x80-\xFF])/chr(0xC0|ord($1)>>6).chr(0x80|ord($1)&0x3F)/eg;
151 +sub recode_utf8_to_latin1
153 + my ($content) = @_;
155 + s/([\xC2\xC3])([\x80-\xBF])/chr(ord($1)<<6&0xC0|ord($2)&0x3F)/eg;
160 +sub recode_custom_to_ascii
162 + my ($content) = @_;
164 + s/\xc0/A/g; # latin capital letter a with grave
165 + s/\xc1/A/g; # latin capital letter a with acute
166 + s/\xc2/A/g; # latin capital letter a with circumflex
167 + s/\xc3/A/g; # latin capital letter a with tilde
168 + s/\xc4/Ae/g; # latin capital letter a with diaeresis
169 + s/\xc5/Aa/g; # latin capital letter a with ring above
170 + s/\xc6/Ae/g; # latin capital letter ae
171 + s/\xc7/C/g; # latin capital letter c with cedilla
172 + s/\xc8/E/g; # latin capital letter e with grave
173 + s/\xc9/E/g; # latin capital letter e with acute
174 + s/\xca/E/g; # latin capital letter e with circumflex
175 + s/\xcb/Ee/g; # latin capital letter e with diaeresis
176 + s/\xcc/I/g; # latin capital letter i with grave
177 + s/\xcd/I/g; # latin capital letter i with acute
178 + s/\xce/I/g; # latin capital letter i with circumflex
179 + s/\xcf/Ie/g; # latin capital letter i with diaeresis
180 + s/\xd0/Dh/g; # latin capital letter eth (icelandic)
181 + s/\xd1/N/g; # latin capital letter n with tilde
182 + s/\xd2/O/g; # latin capital letter o with grave
183 + s/\xd3/O/g; # latin capital letter o with acute
184 + s/\xd4/O/g; # latin capital letter o with circumflex
185 + s/\xd5/O/g; # latin capital letter o with tilde
186 + s/\xd6/Oe/g; # latin capital letter o with diaeresis
187 + s/\xd8/Oe/g; # latin capital letter o with stroke
188 + s/\xd9/U/g; # latin capital letter u with grave
189 + s/\xda/U/g; # latin capital letter u with acute
190 + s/\xdb/U/g; # latin capital letter u with circumflex
191 + s/\xdc/Ue/g; # latin capital letter u with diaeresis
192 + s/\xdd/Y/g; # latin capital letter y with acute
193 + s/\xde/TH/g; # latin capital letter thorn (icelandic)
194 + s/\xdf/ss/g; # latin small letter sharp s (german)
195 + s/\xe0/a/g; # latin small letter a with grave
196 + s/\xe1/a/g; # latin small letter a with acute
197 + s/\xe2/a/g; # latin small letter a with circumflex
198 + s/\xe3/a/g; # latin small letter a with tilde
199 + s/\xe4/ae/g; # latin small letter a with diaeresis
200 + s/\xe5/aa/g; # latin small letter a with ring above
201 + s/\xe6/ae/g; # latin small letter ae
202 + s/\xe7/c/g; # latin small letter c with cedilla
203 + s/\xe8/e/g; # latin small letter e with grave
204 + s/\xe9/e/g; # latin small letter e with acute
205 + s/\xea/e/g; # latin small letter e with circumflex
206 + s/\xeb/ee/g; # latin small letter e with diaeresis
207 + s/\xec/i/g; # latin small letter i with grave
208 + s/\xed/i/g; # latin small letter i with acute
209 + s/\xee/i/g; # latin small letter i with circumflex
210 + s/\xef/ii/g; # latin small letter i with diaeresis
211 + s/\xf0/dh/g; # latin small letter eth (icelandic)
212 + s/\xf1/n/g; # latin small letter n with tilde
213 + s/\xf2/o/g; # latin small letter o with grave
214 + s/\xf3/o/g; # latin small letter o with acute
215 + s/\xf4/o/g; # latin small letter o with circumflex
216 + s/\xf5/o/g; # latin small letter o with tilde
217 + s/\xf6/oe/g; # latin small letter o with diaeresis
218 + s/\xf8/oe/g; # latin small letter o with stroke
219 + s/\xf9/u/g; # latin small letter u with grave
220 + s/\xfa/u/g; # latin small letter u with acute
221 + s/\xfb/u/g; # latin small letter u with circumflex
222 + s/\xfc/ue/g; # latin small letter u with diaeresis
223 + s/\xfd/y/g; # latin small letter y with acute
224 + s/\xfe/th/g; # latin small letter thorn (icelandic)
225 + s/\xff/ye/g; # latin small letter y with diaeresis
231 +# Found in email by Baruzzi Giovanni <giovanni.baruzzi@allianz-leben.de> on openldap mailinglist
233 +# Historically this module has been implemented as pure perl code.
234 +# The XS implementation runs about 20 times faster, but the Perl
235 +# code might be more portable, so it is still here.
239 + $eol = "\n" unless defined $eol;
240 + pos($_[0]) = 0; # ensure start at the beginning
241 + while ($_[0] =~ /(.{1,45})/gs) {
242 + $res .= substr(pack('u', $1), 1);
245 + $res =~ tr|` -_|AA-Za-z0-9+/|; # `# help emacs
246 + # fix padding at the end
247 + my $padding = (3 - length($_[0]) % 3) % 3;
248 + $res =~ s/.{$padding}$/'=' x $padding/e if $padding;
249 + # break encoded string into lines of no more than 76 characters each
251 + $res =~ s/(.{1,76})/$1$eol/g;
258 + my ($content) = @_;
259 + $content =~ /^[\x20-\x7E]*$/;
264 + my ($content) = @_;
265 + if (&validate_ascii($content)) {
269 + ## No Perl support for UTF-8! ;-/
272 + $content =~ /^[\x20-\x7E\x{0080}-\x{FFFF}]*$/;