- better solution from debian

author Jan Rękorajski <baggins@pld-linux.org>

Thu, 25 Jun 2009 10:02:24 +0000 (10:02 +0000)

committer cvs2git <feedback@pld-linux.org>

Sun, 24 Jun 2012 12:13:13 +0000 (12:13 +0000)
author Jan Rękorajski <baggins@pld-linux.org>
Thu, 25 Jun 2009 10:02:24 +0000 (10:02 +0000)
committer cvs2git <feedback@pld-linux.org>
Sun, 24 Jun 2012 12:13:13 +0000 (12:13 +0000)
diff --git a/MigrationTools-utf8.patch b/MigrationTools-utf8.patch

index c216ac4a13532f410fa1b910fa1a794e899effeb..fe6f06d6b64d6d7b89f71a3e90d770b6a43289f8 100644 (file)
--- a/MigrationTools-utf8.patch
+++ b/MigrationTools-utf8.patch
@@ -1,52 +1,276 @@
-diff -ur MigrationTools-47/migrate_passwd.pl MigrationTools-47.x/migrate_passwd.pl
---- MigrationTools-47/migrate_passwd.pl        2009-06-22 14:37:33.091177870 +0200
-+++ MigrationTools-47.x/migrate_passwd.pl      2009-06-22 14:27:03.000000000 +0200
-@@ -39,6 +39,9 @@
- 
- require '/etc/openldap/migrate_common.ph';
- 
-+use Text::Iconv;
-+$converter = Text::Iconv->new("UTF-8", "ASCII//TRANSLIT");
-+
- $PROGRAM = "migrate_passwd.pl";
- $NAMINGCONTEXT = &getsuffix($PROGRAM);
- 
-@@ -70,28 +73,6 @@
-       next if /^#/;
-       next if /^\+/;
- 
--      s/Ä/Ae/g;
--      s/Ë/Ee/g;
--      s/Ï/Ie/g;
--      s/Ö/Oe/g;
--      s/Ü/Ue/g;
--
--      s/ä/ae/g;
--      s/ë/ee/g;
--      s/ï/ie/g;
--      s/ö/oe/g;
--      s/ü/ue/g;
--      s/ÿ/ye/g;
--      s/ß/ss/g;
--      s/é/e/g;
--
--      s/Æ/Ae/g;
--      s/æ/ae/g;
--      s/Ø/Oe/g;
--      s/ø/oe/g;
--      s/Å/Ae/g;
--      s/å/ae/g;
--
-       local($user, $pwd, $uid, $gid, $gecos, $homedir, $shell) = split(/:/);
-       next if (defined($minuid) and ($uid < $minuid));
-       next if (defined($maxuid) and ($uid > $maxuid));
-@@ -193,7 +174,8 @@
-       }
- 
-       if ($gecos) {
--              print $HANDLE "gecos: $gecos\n";
-+              $cgecos = $converter->convert($gecos);
-+              print $HANDLE "gecos: $cgecos\n";
-       }
- 
-       print $HANDLE "\n";
+--- migrationtools-47.orig/debian/patches/1001_utf8_support.patch
++++ migrationtools-47/debian/patches/1001_utf8_support.patch
+@@ -0,0 +1,273 @@
++diff -ruN migrationtools-47.orig/migrate_common.ph migrationtools-47/migrate_common.ph
++--- migrationtools-47.orig/migrate_common.ph  2006-01-25 05:18:16.000000000 +0100
+++++ migrationtools-47/migrate_common.ph       2007-04-07 00:05:52.000000000 +0200
++@@ -89,6 +89,9 @@
++ # such as person.
++ $EXTENDED_SCHEMA = 0;
++ 
+++# Comment this out if your ldap server does not support UTF8 encoding
+++$USE_UTF8 = 1;
+++
++ #
++ # allow environment variables to override predefines
++ #
++diff -ruN migrationtools-47.orig/migrate_passwd.pl migrationtools-47/migrate_passwd.pl
++--- migrationtools-47.orig/migrate_passwd.pl  2006-01-25 05:18:16.000000000 +0100
+++++ migrationtools-47/migrate_passwd.pl       2007-04-07 00:06:13.000000000 +0200
++@@ -36,6 +36,7 @@
++ #
++ # Thanks to Peter Jacob Slot <peter@vision.auk.dk>.
++ #
+++# UTF8 support by Jonas Smedegaard <dr@jones.dk>.
++ 
++ require 'migrate_common.ph';
++ 
++@@ -53,28 +54,6 @@
++      next if /^#/;
++      next if /^\+/;
++ 
++-     s/Ä/Ae/g;
++-     s/Ë/Ee/g;
++-     s/Ï/Ie/g;
++-     s/Ö/Oe/g;
++-     s/Ü/Ue/g;
++-
++-     s/ä/ae/g;
++-     s/ë/ee/g;
++-     s/ï/ie/g;
++-     s/ö/oe/g;
++-     s/ü/ue/g;
++-     s/ÿ/ye/g;
++-     s/ß/ss/g;
++-     s/é/e/g;
++-
++-     s/Æ/Ae/g;
++-     s/æ/ae/g;
++-     s/Ø/Oe/g;
++-     s/ø/oe/g;
++-     s/Å/Ae/g;
++-     s/å/ae/g;
++-
++      local($user, $pwd, $uid, $gid, $gecos, $homedir, $shell) = split(/:/);
++      
++      if ($use_stdout) {
++@@ -100,25 +79,25 @@
++      $sn = $tmp[$#tmp];
++      pop(@tmp);
++      $givenname=join(' ',@tmp);
++-     
+++
++      print $HANDLE "dn: uid=$user,$NAMINGCONTEXT\n";
++      print $HANDLE "uid: $user\n";
++-     print $HANDLE "cn: $cn\n";
+++     &print_utf8($HANDLE, "cn", $cn);
++ 
++      if ($EXTENDED_SCHEMA) {
++              if ($wphone) {
++-                     print $HANDLE "telephoneNumber: $wphone\n";
+++                     &print_utf8($HANDLE, "telephoneNumber", $wphone);
++              }
++              if ($office) {
++-                     print $HANDLE "roomNumber: $office\n";
+++                     &print_utf8($HANDLE, "roomNumber", $office);
++              }
++              if ($hphone) {
++-                     print $HANDLE "homePhone: $hphone\n";
+++                     &print_utf8($HANDLE, "homePhone", $hphone);
++              }
++              if ($givenname) {
++-                     print $HANDLE "givenName: $givenname\n";
+++                     &print_utf8($HANDLE, "givenName", $givenname);
++              }
++-             print $HANDLE "sn: $sn\n";
+++             &print_utf8($HANDLE, "sn", $sn);
++              if ($DEFAULT_MAIL_DOMAIN) {
++                      print $HANDLE "mail: $user\@$DEFAULT_MAIL_DOMAIN\n";
++              }
++@@ -174,7 +153,7 @@
++      }
++ 
++      if ($gecos) {
++-             print $HANDLE "gecos: $gecos\n";
+++             &print_ascii($HANDLE, "gecos", $gecos);
++      }
++ 
++      print $HANDLE "\n";
++@@ -225,3 +204,177 @@
++      }
++ }
++ 
+++sub print_utf8
+++{
+++     my($HANDLE, $attribute, $content) = @_;
+++
+++     if (&validate_ascii($content)) {
+++             print $HANDLE "$attribute: $content\n";
+++     } elsif ($USE_UTF8) {
+++#            $content = &recode_custom_to_utf8($content);
+++             $content = &recode_latin1_to_utf8($content);
+++             if (&validate_utf8($content)) {
+++                     $content = &encode_base64($content, "");
+++                     print $HANDLE "$attribute\:: $content\n";
+++             } else {
+++                     die "ERROR: Illegal character(s) in UTF-8 string: \"$content\"";
+++             }
+++     } else {
+++             &print_ascii($HANDLE, "$attribute", "$content");
+++     }
+++}
+++
+++sub print_ascii
+++{
+++     my($HANDLE, $attribute, $content) = @_;
+++
+++     if (&validate_utf8($content)) {
+++             $content = &recode_utf8_to_latin1($content);
+++     } else {
+++             $content = &recode_latin1_to_utf8($content);
+++             $content = &recode_utf8_to_latin1($content);
+++     }
+++     $content = &recode_custom_to_ascii($content);
+++     if (&validate_ascii($content)) {
+++             print $HANDLE "$attribute: $content\n";
+++     } else {
+++             my $badchars = $content;
+++             for ($badchars) {
+++                     s/[\x20-\x7E]//g;
+++             }
+++             die "ERROR: Illegal character(s) \"$badchars\" in ASCII string: \"$content\"";
+++     }
+++}
+++
+++sub recode_latin1_to_utf8
+++{
+++     my ($content) = @_;
+++     for ($content) {
+++             s/([\x80-\xFF])/chr(0xC0|ord($1)>>6).chr(0x80|ord($1)&0x3F)/eg;
+++     }
+++     return ($content)
+++}
+++
+++sub recode_utf8_to_latin1
+++{
+++     my ($content) = @_;
+++     for ($content) {
+++             s/([\xC2\xC3])([\x80-\xBF])/chr(ord($1)<<6&0xC0|ord($2)&0x3F)/eg;
+++     }
+++     return ($content)
+++}
+++
+++sub recode_custom_to_ascii
+++{
+++     my ($content) = @_;
+++     for ($content) {
+++             s/\xc0/A/g; # latin capital letter a with grave
+++             s/\xc1/A/g; # latin capital letter a with acute
+++             s/\xc2/A/g; # latin capital letter a with circumflex
+++             s/\xc3/A/g; # latin capital letter a with tilde
+++             s/\xc4/Ae/g; # latin capital letter a with diaeresis
+++             s/\xc5/Aa/g; # latin capital letter a with ring above
+++             s/\xc6/Ae/g; # latin capital letter ae
+++             s/\xc7/C/g; # latin capital letter c with cedilla
+++             s/\xc8/E/g; # latin capital letter e with grave
+++             s/\xc9/E/g; # latin capital letter e with acute
+++             s/\xca/E/g; # latin capital letter e with circumflex
+++             s/\xcb/Ee/g; # latin capital letter e with diaeresis
+++             s/\xcc/I/g; # latin capital letter i with grave
+++             s/\xcd/I/g; # latin capital letter i with acute
+++             s/\xce/I/g; # latin capital letter i with circumflex
+++             s/\xcf/Ie/g; # latin capital letter i with diaeresis
+++             s/\xd0/Dh/g; # latin capital letter eth (icelandic)
+++             s/\xd1/N/g; # latin capital letter n with tilde
+++             s/\xd2/O/g; # latin capital letter o with grave
+++             s/\xd3/O/g; # latin capital letter o with acute
+++             s/\xd4/O/g; # latin capital letter o with circumflex
+++             s/\xd5/O/g; # latin capital letter o with tilde
+++             s/\xd6/Oe/g; # latin capital letter o with diaeresis
+++             s/\xd8/Oe/g; # latin capital letter o with stroke
+++             s/\xd9/U/g; # latin capital letter u with grave
+++             s/\xda/U/g; # latin capital letter u with acute
+++             s/\xdb/U/g; # latin capital letter u with circumflex
+++             s/\xdc/Ue/g; # latin capital letter u with diaeresis
+++             s/\xdd/Y/g; # latin capital letter y with acute
+++             s/\xde/TH/g; # latin capital letter thorn (icelandic)
+++             s/\xdf/ss/g; # latin small letter sharp s (german)
+++             s/\xe0/a/g; # latin small letter a with grave
+++             s/\xe1/a/g; # latin small letter a with acute
+++             s/\xe2/a/g; # latin small letter a with circumflex
+++             s/\xe3/a/g; # latin small letter a with tilde
+++             s/\xe4/ae/g; # latin small letter a with diaeresis
+++             s/\xe5/aa/g; # latin small letter a with ring above
+++             s/\xe6/ae/g; # latin small letter ae
+++             s/\xe7/c/g; # latin small letter c with cedilla
+++             s/\xe8/e/g; # latin small letter e with grave
+++             s/\xe9/e/g; # latin small letter e with acute
+++             s/\xea/e/g; # latin small letter e with circumflex
+++             s/\xeb/ee/g; # latin small letter e with diaeresis
+++             s/\xec/i/g; # latin small letter i with grave
+++             s/\xed/i/g; # latin small letter i with acute
+++             s/\xee/i/g; # latin small letter i with circumflex
+++             s/\xef/ii/g; # latin small letter i with diaeresis
+++             s/\xf0/dh/g; # latin small letter eth (icelandic)
+++             s/\xf1/n/g; # latin small letter n with tilde
+++             s/\xf2/o/g; # latin small letter o with grave
+++             s/\xf3/o/g; # latin small letter o with acute
+++             s/\xf4/o/g; # latin small letter o with circumflex
+++             s/\xf5/o/g; # latin small letter o with tilde
+++             s/\xf6/oe/g; # latin small letter o with diaeresis
+++             s/\xf8/oe/g; # latin small letter o with stroke
+++             s/\xf9/u/g; # latin small letter u with grave
+++             s/\xfa/u/g; # latin small letter u with acute
+++             s/\xfb/u/g; # latin small letter u with circumflex
+++             s/\xfc/ue/g; # latin small letter u with diaeresis
+++             s/\xfd/y/g; # latin small letter y with acute
+++             s/\xfe/th/g; # latin small letter thorn (icelandic)
+++             s/\xff/ye/g; # latin small letter y with diaeresis
+++     }
+++     return ($content);
+++}
+++
+++sub encode_base64
+++# Found in email by Baruzzi Giovanni <giovanni.baruzzi@allianz-leben.de> on openldap mailinglist
+++
+++# Historically this module has been implemented as pure perl code.
+++# The XS implementation runs about 20 times faster, but the Perl
+++# code might be more portable, so it is still here.
+++{
+++     my $res = "";
+++     my $eol = $_[1];
+++     $eol = "\n" unless defined $eol;
+++     pos($_[0]) = 0; # ensure start at the beginning
+++     while ($_[0] =~ /(.{1,45})/gs) {
+++             $res .= substr(pack('u', $1), 1);
+++             chop($res);
+++     }
+++     $res =~ tr|` -_|AA-Za-z0-9+/|;               # `# help emacs
+++     # fix padding at the end
+++     my $padding = (3 - length($_[0]) % 3) % 3;
+++     $res =~ s/.{$padding}$/'=' x $padding/e if $padding;
+++     # break encoded string into lines of no more than 76 characters each
+++     if (length $eol) {
+++             $res =~ s/(.{1,76})/$1$eol/g;
+++     }
+++     $res;
+++}
+++
+++sub validate_ascii
+++{
+++     my ($content) = @_;
+++     $content =~ /^[\x20-\x7E]*$/;
+++}
+++
+++sub validate_utf8
+++{
+++     my ($content) = @_;
+++     if (&validate_ascii($content)) {
+++             return 1;
+++     }
+++     if ($] >= 5.8) {
+++             ## No Perl support for UTF-8! ;-/
+++             return undef;
+++     }
+++     $content =~ /^[\x20-\x7E\x{0080}-\x{FFFF}]*$/;
+++}
author	Jan Rękorajski <baggins@pld-linux.org>
	Thu, 25 Jun 2009 10:02:24 +0000 (10:02 +0000)
committer	cvs2git <feedback@pld-linux.org>
	Sun, 24 Jun 2012 12:13:13 +0000 (12:13 +0000)