to-dict

   1 #!/bin/sh
   2 # A shell script for conversion of MOVA MuellerXX.koi dictionaries
   3 # into DICT format.
   4 # Written by Andrew Comech <comech@math.sunysb.edu>
   5 # GNU GPL (2000)
   6 # The latest version is available from
   7 # http://www.math.sunysb.edu/~comech/tools/to-dict
   8
   9 version="0.1"
  10 versiondate="November 11, 2000"
  11
  12 # We need the following binaries:
  13 DICTFMT=`which dictfmt`
  14 DICTZIP=`which dictzip`
  15
  16 INFO () {
  17   echo "
  18 to-dict, version $version ($versiondate).
  19 Conversion of MOVA MuellerXX.koi dictionaries into DICT format.
  20 Written by Andrew Comech <comech@math.sunysb.edu>. GNU GPL (2000)
  21
  22 The latest version is available from
  23 http://www.math.sunysb.edu/~comech/tools/to-dict
  24 "
  25 }
  26
  27 REQUIREMENTS () {
  28   echo "
  29 REQUIREMENTS: you need the binaries \`dictfmt' and \`dictzip'.
  30
  31 dictzip.c can be found in dictd-1.5.0.tar.gz (or later version) at
  32 ftp://ftp.cs.unc.edu/pub/users/faith/dict/
  33
  34 dictfmt.c can be found in Debian/GNU Linux package dict-elements at
  35 ftp://ftp.debian.org/debian/dists/potato/main/source/text/
  36
  37 Compiled binaries (dictfmt and dictzip) could be downloaded from
  38 http://www.wh9.tu-dresden.de/~heinrich/dict/dict_leo_ftp/static-binaries/
  39 or
  40 http://iris.ltas.ulg.ac.be/download/apps/dict/
  41 "
  42 }
  43
  44 USAGE () {
  45     echo "
  46 USAGE:
  47  -version: show version
  48  -h, --help, or no parameters: show this help
  49
  50 (*) To make DICT database from Mueller7GPL.koi available from
  51 http://www.chat.ru/~mueller_dic/Mueller7GPL.tgz
  52
  53 # Remove transcription:
  54 ./to-dict --no-trans Mueller7GPL.koi mueller7.notr
  55 # Convert <source> into <data> (a file with %h, %d-headers):
  56 ./to-dict --src-data mueller7.notr mueller7.data && rm -i mueller7.notr
  57 # Convert <data> into DICT-format (files <name>.dict.dz and <name>.index):
  58 ./to-dict --data-dict mueller7.data mueller7 && rm -i mueller7.data
  59 # Expand index file (to be able to access lines like \"A, a\" by \"A\" and \"a\"):
  60 ./to-dict --expand-index mueller7.index mueller7.index.exp
  61 # Install a new dictionary with expanded index (RUN AS ROOT).
  62 # The location of files may depend on your distribution!!!
  63 cp mueller7.dict.dz /usr/share/dictd/mueller7.dict.dz
  64 cp mueller7.index.exp /usr/share/dictd/mueller7.index
  65 dictdconfig -w && (killall dictd; dictd)
  66
  67 (*) To make DICT database from Mueller24.koi available from
  68 http://www.chat.ru/~mueller_dic/Mueller24.tgz (this one is preferred)
  69
  70 # Convert <source> into <data> (a file with %h, %d):
  71 ./to-dict --src-data Mueller24.koi mueller24.data
  72 # Convert <data> into DICT-format (files <name>.dict.dz and <name>.index):
  73 ./to-dict --data-dict mueller24.data mueller24 && rm -i mueller24.data
  74 # Install a new dictionary with expanded index (RUN AS ROOT).
  75 # The location of files may depend on your distribution!!!
  76 cp mueller24.dict.dz /usr/share/dictd/mueller24.dict.dz
  77 cp mueller24.index /usr/share/dictd/mueller24.index
  78 dictdconfig -w && (killall dictd; dictd)
  79
  80 (*) To re-convert <dict> into <data> (a file with %h, %d-headers):
  81
  82 ./to-dict --dict-data <dict> <data>
  83
  84  *************************************************************
  85     !!WARNING!!    !!WARNING!!    !!WARNING!!    !!WARNING!!
  86
  87  Temporary files created by this script occupy a lot of drive space!
  88  15 MB for Mueller7GPL.koi (have to strip off transcription first)
  89  12 MB for Mueller24.koi
  90  *************************************************************
  91 "
  92 }
  93
  94 # To remove the transcription except for [r] and [ju:] which found in the text.
  95 # This procedure should not change Mueller24.koi if applied to it.
  96 NO_TRANS () {
  97 sed 's/как\ \[juЫ\]/как\ "ju:"/; s/\[l\],/"l",/g; s/\[r\]/"r"/g; s/\], \[/A/g; s/\]\; _ам\. \[/A/g; s/\]\; _pl\. \[/A/g; s/\[[^]]*\]\ (полн.. форм.). \[[^]]*\] (редуцированн[^)]*)\ //g; s/\[[^]]*\]\ //g; s/\[[^]]*\],\ //; s/\ \[[^]]*\],/,/g; s/\ \[[^]]*\])/)/g; s/\ \[[^]]*\]:/:/g; s/\ \[[^]]*\];/;/g; s/\ \[[^]]*\]$//g; s/как\ "ju:"/как\ \[juЫ\]/g; s/"l"/\[l\]/g; s/"r"/\[r\]/g '
  98 }
  99
 100 # Strip the copyright/info
 101 STRIP () {
 102 sed -n '/^_[aA]/,$p'
 103 }
 104
 105 # Format the file
 106 MK_DATA () {
 107 sed 's/$/\
 108 /g; s/[^]]*\ \ /%h&\
 109 %d/; s/_[IVX][IVX]* /\
 110  &/g; s/ [1-9]\. /\
 111   &/g; s/[1-9][0-9]*>/\
 112       &/g; s/[абвгдежзиклмнопрстуфчцчшхщъьэюя]>/(&>/g; s/>>/)/g; s/\ \_[AISE][a-z]*:/\
 113   &/g; s/>/:/g'\
 114 |sed ' s/%d$/%z/; s/%d/%d\
 115    / ; s/%z/%d/; s/%h/%h / '  \
 116 |fmt -s -w 74;}
 117
 118 ########################################################################
 119
 120 if [ "$1" = "-version" ]; then
 121     INFO
 122     exit 0
 123 fi
 124
 125 if [ "$#" = 0 -o "$1" = "-h" -o "$1" = "--help" -o "$1" = "-help" ]; then
 126     USAGE
 127     exit 0
 128 fi
 129
 130 if [ "$#" != 3 ]; then
 131     USAGE; exit 1;
 132 fi
 133
 134 ## Will not go further if there are no dictfmt and dictzip binaries:
 135 if [ "$DICTFMT" = "" -o "$DICTZIP" = "" ]; then
 136     REQUIREMENTS
 137     exit 1
 138 fi
 139 ##
 140
 141 if [ ! -f "$2" ]; then
 142     echo "No input file: $2"; USAGE; exit 1
 143 fi
 144
 145 case $1 in
 146     "--no-trans")
 147         echo  "Removing transcription ($2 -> $3)..";
 148         cat $2 | NO_TRANS > $3 || exit 1
 149         echo "."; exit 0
 150         ;;
 151     "--src-data")
 152         echo "Writing the header of $3.."
 153         echo -e "%h 00-database-info\n%d" > $3
 154         cat $2 | sed -n '1p' | sed 's/^/  /' | fmt -s -w 74 >> $3;
 155         cat $2 | sed -n '/^_/,/_яп.  Japan японский/p' | sed 's/^/  /' | fmt -s -w 74 >> $3;
 156         echo "" >> $3
 157         echo "Formatting data ($2 -> $3).."
 158         cat $2 | sed -n '/^_[aA]/,$p' | MK_DATA >> $3 || exit 1
 159         echo "."; exit 0
 160         ;;
 161     "--data-dict")
 162         TITLE="Mueller English-Russian Dictionary"
 163         echo "dictfmt: $2 -> $3.dict and $3.index.."
 164         dictfmt -p -u "http://www.chat.ru/~mueller_dic" \
 165             -s "$TITLE" $3 < $2 || exit 1
 166         echo "Compressing $3.dict.."; dictzip $3.dict || exit 1
 167 #       echo -n "Restarting daemons"; killall dictd; dictd
 168         echo "."; exit 0
 169         ;;
 170     "--expand-index")
 171 # So that the line
 172 # ``whisky, whiskey   a sort of spirit I like''
 173 # could be found not only by /usr/bin/dict "whisky, whiskey", but also by
 174 # /usr/bin/dict "whisky" and /usr/bin/dict "whiskey"
 175         cat $2 | sed 's/^[^,]*, [^,]*/%TAG1&\
 176 %REM2&\
 177 %TAG3&/; s/^%TAG1[^,]*, /&%REM1/; s/, %REM1[^'$'\t'']*//; s/%REM2[^,]*, //; s/%TAG[13]//g' > $3 || exit 1
 178         exit 0
 179         ;;
 180     "--dict-data")
 181         if [ "` file $2 | grep  gzip`" != "0" ]; then
 182             CAT=zcat;
 183         else
 184             CAT=cat;
 185         fi
 186         $CAT $2 | sed 's/^[^\ ].*/%h &\
 187 %d/; s/^[\ ][\ ]*/   /' >$3 || exit 1
 188         echo "."; exit 0
 189         ;;
 190     *) INFO; USAGE; exit 1
 191 esac
 192
 193 echo "You are not supposed to be here."
 194 exit 1