]>
Commit | Line | Data |
---|---|---|
2f658ed1 JR |
1 | #!/bin/sh |
2 | # A shell script for conversion of MOVA MuellerXX.koi dictionaries | |
3 | # into DICT format. | |
4 | # Written by Andrew Comech <comech@math.sunysb.edu> | |
5 | # GNU GPL (2000) | |
6 | # The latest version is available from | |
7 | # http://www.math.sunysb.edu/~comech/tools/to-dict | |
8 | ||
9 | version="0.1" | |
10 | versiondate="November 11, 2000" | |
11 | ||
12 | # We need the following binaries: | |
13 | DICTFMT=`which dictfmt` | |
14 | DICTZIP=`which dictzip` | |
15 | ||
16 | INFO () { | |
17 | echo " | |
18 | to-dict, version $version ($versiondate). | |
19 | Conversion of MOVA MuellerXX.koi dictionaries into DICT format. | |
20 | Written by Andrew Comech <comech@math.sunysb.edu>. GNU GPL (2000) | |
21 | ||
22 | The latest version is available from | |
23 | http://www.math.sunysb.edu/~comech/tools/to-dict | |
24 | " | |
25 | } | |
26 | ||
27 | REQUIREMENTS () { | |
28 | echo " | |
29 | REQUIREMENTS: you need the binaries \`dictfmt' and \`dictzip'. | |
30 | ||
31 | dictzip.c can be found in dictd-1.5.0.tar.gz (or later version) at | |
32 | ftp://ftp.cs.unc.edu/pub/users/faith/dict/ | |
33 | ||
34 | dictfmt.c can be found in Debian/GNU Linux package dict-elements at | |
35 | ftp://ftp.debian.org/debian/dists/potato/main/source/text/ | |
36 | ||
37 | Compiled binaries (dictfmt and dictzip) could be downloaded from | |
38 | http://www.wh9.tu-dresden.de/~heinrich/dict/dict_leo_ftp/static-binaries/ | |
39 | or | |
40 | http://iris.ltas.ulg.ac.be/download/apps/dict/ | |
41 | " | |
42 | } | |
43 | ||
44 | USAGE () { | |
45 | echo " | |
46 | USAGE: | |
47 | -version: show version | |
48 | -h, --help, or no parameters: show this help | |
49 | ||
50 | (*) To make DICT database from Mueller7GPL.koi available from | |
51 | http://www.chat.ru/~mueller_dic/Mueller7GPL.tgz | |
52 | ||
53 | # Remove transcription: | |
54 | ./to-dict --no-trans Mueller7GPL.koi mueller7.notr | |
55 | # Convert <source> into <data> (a file with %h, %d-headers): | |
56 | ./to-dict --src-data mueller7.notr mueller7.data && rm -i mueller7.notr | |
57 | # Convert <data> into DICT-format (files <name>.dict.dz and <name>.index): | |
58 | ./to-dict --data-dict mueller7.data mueller7 && rm -i mueller7.data | |
59 | # Expand index file (to be able to access lines like \"A, a\" by \"A\" and \"a\"): | |
60 | ./to-dict --expand-index mueller7.index mueller7.index.exp | |
61 | # Install a new dictionary with expanded index (RUN AS ROOT). | |
62 | # The location of files may depend on your distribution!!! | |
63 | cp mueller7.dict.dz /usr/share/dictd/mueller7.dict.dz | |
64 | cp mueller7.index.exp /usr/share/dictd/mueller7.index | |
65 | dictdconfig -w && (killall dictd; dictd) | |
66 | ||
67 | (*) To make DICT database from Mueller24.koi available from | |
68 | http://www.chat.ru/~mueller_dic/Mueller24.tgz (this one is preferred) | |
69 | ||
70 | # Convert <source> into <data> (a file with %h, %d): | |
71 | ./to-dict --src-data Mueller24.koi mueller24.data | |
72 | # Convert <data> into DICT-format (files <name>.dict.dz and <name>.index): | |
73 | ./to-dict --data-dict mueller24.data mueller24 && rm -i mueller24.data | |
74 | # Install a new dictionary with expanded index (RUN AS ROOT). | |
75 | # The location of files may depend on your distribution!!! | |
76 | cp mueller24.dict.dz /usr/share/dictd/mueller24.dict.dz | |
77 | cp mueller24.index /usr/share/dictd/mueller24.index | |
78 | dictdconfig -w && (killall dictd; dictd) | |
79 | ||
80 | (*) To re-convert <dict> into <data> (a file with %h, %d-headers): | |
81 | ||
82 | ./to-dict --dict-data <dict> <data> | |
83 | ||
84 | ************************************************************* | |
85 | !!WARNING!! !!WARNING!! !!WARNING!! !!WARNING!! | |
86 | ||
87 | Temporary files created by this script occupy a lot of drive space! | |
88 | 15 MB for Mueller7GPL.koi (have to strip off transcription first) | |
89 | 12 MB for Mueller24.koi | |
90 | ************************************************************* | |
91 | " | |
92 | } | |
93 | ||
94 | # To remove the transcription except for [r] and [ju:] which found in the text. | |
95 | # This procedure should not change Mueller24.koi if applied to it. | |
96 | NO_TRANS () { | |
97 | sed 's/как\ \[juЫ\]/как\ "ju:"/; s/\[l\],/"l",/g; s/\[r\]/"r"/g; s/\], \[/A/g; s/\]\; _ам\. \[/A/g; s/\]\; _pl\. \[/A/g; s/\[[^]]*\]\ (полн.. форм.). \[[^]]*\] (редуцированн[^)]*)\ //g; s/\[[^]]*\]\ //g; s/\[[^]]*\],\ //; s/\ \[[^]]*\],/,/g; s/\ \[[^]]*\])/)/g; s/\ \[[^]]*\]:/:/g; s/\ \[[^]]*\];/;/g; s/\ \[[^]]*\]$//g; s/как\ "ju:"/как\ \[juЫ\]/g; s/"l"/\[l\]/g; s/"r"/\[r\]/g ' | |
98 | } | |
99 | ||
100 | # Strip the copyright/info | |
101 | STRIP () { | |
102 | sed -n '/^_[aA]/,$p' | |
103 | } | |
104 | ||
105 | # Format the file | |
106 | MK_DATA () { | |
107 | sed 's/$/\ | |
108 | /g; s/[^]]*\ \ /%h&\ | |
109 | %d/; s/_[IVX][IVX]* /\ | |
110 | &/g; s/ [1-9]\. /\ | |
111 | &/g; s/[1-9][0-9]*>/\ | |
112 | &/g; s/[абвгдежзиклмнопрстуфчцчшхщъьэюя]>/(&>/g; s/>>/)/g; s/\ \_[AISE][a-z]*:/\ | |
113 | &/g; s/>/:/g'\ | |
114 | |sed ' s/%d$/%z/; s/%d/%d\ | |
115 | / ; s/%z/%d/; s/%h/%h / ' \ | |
116 | |fmt -s -w 74;} | |
117 | ||
118 | ######################################################################## | |
119 | ||
120 | if [ "$1" = "-version" ]; then | |
121 | INFO | |
122 | exit 0 | |
123 | fi | |
124 | ||
125 | if [ "$#" = 0 -o "$1" = "-h" -o "$1" = "--help" -o "$1" = "-help" ]; then | |
126 | USAGE | |
127 | exit 0 | |
128 | fi | |
129 | ||
130 | if [ "$#" != 3 ]; then | |
131 | USAGE; exit 1; | |
132 | fi | |
133 | ||
134 | ## Will not go further if there are no dictfmt and dictzip binaries: | |
135 | if [ "$DICTFMT" = "" -o "$DICTZIP" = "" ]; then | |
136 | REQUIREMENTS | |
137 | exit 1 | |
138 | fi | |
139 | ## | |
140 | ||
141 | if [ ! -f "$2" ]; then | |
142 | echo "No input file: $2"; USAGE; exit 1 | |
143 | fi | |
144 | ||
145 | case $1 in | |
146 | "--no-trans") | |
147 | echo "Removing transcription ($2 -> $3).."; | |
148 | cat $2 | NO_TRANS > $3 || exit 1 | |
149 | echo "."; exit 0 | |
150 | ;; | |
151 | "--src-data") | |
152 | echo "Writing the header of $3.." | |
153 | echo -e "%h 00-database-info\n%d" > $3 | |
154 | cat $2 | sed -n '1p' | sed 's/^/ /' | fmt -s -w 74 >> $3; | |
155 | cat $2 | sed -n '/^_/,/_яп. Japan японский/p' | sed 's/^/ /' | fmt -s -w 74 >> $3; | |
156 | echo "" >> $3 | |
157 | echo "Formatting data ($2 -> $3).." | |
158 | cat $2 | sed -n '/^_[aA]/,$p' | MK_DATA >> $3 || exit 1 | |
159 | echo "."; exit 0 | |
160 | ;; | |
161 | "--data-dict") | |
162 | TITLE="Mueller English-Russian Dictionary" | |
163 | echo "dictfmt: $2 -> $3.dict and $3.index.." | |
164 | dictfmt -p -u "http://www.chat.ru/~mueller_dic" \ | |
165 | -s "$TITLE" $3 < $2 || exit 1 | |
166 | echo "Compressing $3.dict.."; dictzip $3.dict || exit 1 | |
167 | # echo -n "Restarting daemons"; killall dictd; dictd | |
168 | echo "."; exit 0 | |
169 | ;; | |
170 | "--expand-index") | |
171 | # So that the line | |
172 | # ``whisky, whiskey a sort of spirit I like'' | |
173 | # could be found not only by /usr/bin/dict "whisky, whiskey", but also by | |
174 | # /usr/bin/dict "whisky" and /usr/bin/dict "whiskey" | |
175 | cat $2 | sed 's/^[^,]*, [^,]*/%TAG1&\ | |
176 | %REM2&\ | |
177 | %TAG3&/; s/^%TAG1[^,]*, /&%REM1/; s/, %REM1[^'$'\t'']*//; s/%REM2[^,]*, //; s/%TAG[13]//g' > $3 || exit 1 | |
178 | exit 0 | |
179 | ;; | |
180 | "--dict-data") | |
181 | if [ "` file $2 | grep gzip`" != "0" ]; then | |
182 | CAT=zcat; | |
183 | else | |
184 | CAT=cat; | |
185 | fi | |
186 | $CAT $2 | sed 's/^[^\ ].*/%h &\ | |
187 | %d/; s/^[\ ][\ ]*/ /' >$3 || exit 1 | |
188 | echo "."; exit 0 | |
189 | ;; | |
190 | *) INFO; USAGE; exit 1 | |
191 | esac | |
192 | ||
193 | echo "You are not supposed to be here." | |
194 | exit 1 |