]> git.pld-linux.org Git - packages/dict-mueller24.git/blame - to-dict
- converted tool and dictionary to utf-8
[packages/dict-mueller24.git] / to-dict
CommitLineData
2f658ed1
JR
1#!/bin/sh
2# A shell script for conversion of MOVA MuellerXX.koi dictionaries
3# into DICT format.
4# Written by Andrew Comech <comech@math.sunysb.edu>
5# GNU GPL (2000)
6# The latest version is available from
7# http://www.math.sunysb.edu/~comech/tools/to-dict
8
9version="0.1"
10versiondate="November 11, 2000"
11
12# We need the following binaries:
13DICTFMT=`which dictfmt`
14DICTZIP=`which dictzip`
15
16INFO () {
17 echo "
18to-dict, version $version ($versiondate).
19Conversion of MOVA MuellerXX.koi dictionaries into DICT format.
20Written by Andrew Comech <comech@math.sunysb.edu>. GNU GPL (2000)
21
22The latest version is available from
23http://www.math.sunysb.edu/~comech/tools/to-dict
24"
25}
26
27REQUIREMENTS () {
28 echo "
29REQUIREMENTS: you need the binaries \`dictfmt' and \`dictzip'.
30
31dictzip.c can be found in dictd-1.5.0.tar.gz (or later version) at
32ftp://ftp.cs.unc.edu/pub/users/faith/dict/
33
34dictfmt.c can be found in Debian/GNU Linux package dict-elements at
35ftp://ftp.debian.org/debian/dists/potato/main/source/text/
36
37Compiled binaries (dictfmt and dictzip) could be downloaded from
38http://www.wh9.tu-dresden.de/~heinrich/dict/dict_leo_ftp/static-binaries/
39or
40http://iris.ltas.ulg.ac.be/download/apps/dict/
41"
42}
43
44USAGE () {
45 echo "
46USAGE:
47 -version: show version
48 -h, --help, or no parameters: show this help
49
50(*) To make DICT database from Mueller7GPL.koi available from
51http://www.chat.ru/~mueller_dic/Mueller7GPL.tgz
52
53# Remove transcription:
54./to-dict --no-trans Mueller7GPL.koi mueller7.notr
55# Convert <source> into <data> (a file with %h, %d-headers):
56./to-dict --src-data mueller7.notr mueller7.data && rm -i mueller7.notr
57# Convert <data> into DICT-format (files <name>.dict.dz and <name>.index):
58./to-dict --data-dict mueller7.data mueller7 && rm -i mueller7.data
59# Expand index file (to be able to access lines like \"A, a\" by \"A\" and \"a\"):
60./to-dict --expand-index mueller7.index mueller7.index.exp
61# Install a new dictionary with expanded index (RUN AS ROOT).
62# The location of files may depend on your distribution!!!
63cp mueller7.dict.dz /usr/share/dictd/mueller7.dict.dz
64cp mueller7.index.exp /usr/share/dictd/mueller7.index
65dictdconfig -w && (killall dictd; dictd)
66
67(*) To make DICT database from Mueller24.koi available from
68http://www.chat.ru/~mueller_dic/Mueller24.tgz (this one is preferred)
69
70# Convert <source> into <data> (a file with %h, %d):
71./to-dict --src-data Mueller24.koi mueller24.data
72# Convert <data> into DICT-format (files <name>.dict.dz and <name>.index):
73./to-dict --data-dict mueller24.data mueller24 && rm -i mueller24.data
74# Install a new dictionary with expanded index (RUN AS ROOT).
75# The location of files may depend on your distribution!!!
76cp mueller24.dict.dz /usr/share/dictd/mueller24.dict.dz
77cp mueller24.index /usr/share/dictd/mueller24.index
78dictdconfig -w && (killall dictd; dictd)
79
80(*) To re-convert <dict> into <data> (a file with %h, %d-headers):
81
82./to-dict --dict-data <dict> <data>
83
84 *************************************************************
85 !!WARNING!! !!WARNING!! !!WARNING!! !!WARNING!!
86
87 Temporary files created by this script occupy a lot of drive space!
88 15 MB for Mueller7GPL.koi (have to strip off transcription first)
89 12 MB for Mueller24.koi
90 *************************************************************
91"
92}
93
94# To remove the transcription except for [r] and [ju:] which found in the text.
95# This procedure should not change Mueller24.koi if applied to it.
96NO_TRANS () {
97sed 's/как\ \[juЫ\]/как\ "ju:"/; s/\[l\],/"l",/g; s/\[r\]/"r"/g; s/\], \[/A/g; s/\]\; _ам\. \[/A/g; s/\]\; _pl\. \[/A/g; s/\[[^]]*\]\ (полн.. форм.). \[[^]]*\] (редуцированн[^)]*)\ //g; s/\[[^]]*\]\ //g; s/\[[^]]*\],\ //; s/\ \[[^]]*\],/,/g; s/\ \[[^]]*\])/)/g; s/\ \[[^]]*\]:/:/g; s/\ \[[^]]*\];/;/g; s/\ \[[^]]*\]$//g; s/как\ "ju:"/как\ \[juЫ\]/g; s/"l"/\[l\]/g; s/"r"/\[r\]/g '
98}
99
100# Strip the copyright/info
101STRIP () {
102sed -n '/^_[aA]/,$p'
103}
104
105# Format the file
106MK_DATA () {
107sed 's/$/\
108/g; s/[^]]*\ \ /%h&\
109%d/; s/_[IVX][IVX]* /\
110 &/g; s/ [1-9]\. /\
111 &/g; s/[1-9][0-9]*>/\
112 &/g; s/[абвгдежзиклмнопрстуфчцчшхщъьэюя]>/(&>/g; s/>>/)/g; s/\ \_[AISE][a-z]*:/\
113 &/g; s/>/:/g'\
114|sed ' s/%d$/%z/; s/%d/%d\
115 / ; s/%z/%d/; s/%h/%h / ' \
116|fmt -s -w 74;}
117
118########################################################################
119
120if [ "$1" = "-version" ]; then
121 INFO
122 exit 0
123fi
124
125if [ "$#" = 0 -o "$1" = "-h" -o "$1" = "--help" -o "$1" = "-help" ]; then
126 USAGE
127 exit 0
128fi
129
130if [ "$#" != 3 ]; then
131 USAGE; exit 1;
132fi
133
134## Will not go further if there are no dictfmt and dictzip binaries:
135if [ "$DICTFMT" = "" -o "$DICTZIP" = "" ]; then
136 REQUIREMENTS
137 exit 1
138fi
139##
140
141if [ ! -f "$2" ]; then
142 echo "No input file: $2"; USAGE; exit 1
143fi
144
145case $1 in
146 "--no-trans")
147 echo "Removing transcription ($2 -> $3)..";
148 cat $2 | NO_TRANS > $3 || exit 1
149 echo "."; exit 0
150 ;;
151 "--src-data")
152 echo "Writing the header of $3.."
153 echo -e "%h 00-database-info\n%d" > $3
154 cat $2 | sed -n '1p' | sed 's/^/ /' | fmt -s -w 74 >> $3;
155 cat $2 | sed -n '/^_/,/_яп. Japan японский/p' | sed 's/^/ /' | fmt -s -w 74 >> $3;
156 echo "" >> $3
157 echo "Formatting data ($2 -> $3).."
158 cat $2 | sed -n '/^_[aA]/,$p' | MK_DATA >> $3 || exit 1
159 echo "."; exit 0
160 ;;
161 "--data-dict")
162 TITLE="Mueller English-Russian Dictionary"
163 echo "dictfmt: $2 -> $3.dict and $3.index.."
164 dictfmt -p -u "http://www.chat.ru/~mueller_dic" \
165 -s "$TITLE" $3 < $2 || exit 1
166 echo "Compressing $3.dict.."; dictzip $3.dict || exit 1
167# echo -n "Restarting daemons"; killall dictd; dictd
168 echo "."; exit 0
169 ;;
170 "--expand-index")
171# So that the line
172# ``whisky, whiskey a sort of spirit I like''
173# could be found not only by /usr/bin/dict "whisky, whiskey", but also by
174# /usr/bin/dict "whisky" and /usr/bin/dict "whiskey"
175 cat $2 | sed 's/^[^,]*, [^,]*/%TAG1&\
176%REM2&\
177%TAG3&/; s/^%TAG1[^,]*, /&%REM1/; s/, %REM1[^'$'\t'']*//; s/%REM2[^,]*, //; s/%TAG[13]//g' > $3 || exit 1
178 exit 0
179 ;;
180 "--dict-data")
181 if [ "` file $2 | grep gzip`" != "0" ]; then
182 CAT=zcat;
183 else
184 CAT=cat;
185 fi
186 $CAT $2 | sed 's/^[^\ ].*/%h &\
187%d/; s/^[\ ][\ ]*/ /' >$3 || exit 1
188 echo "."; exit 0
189 ;;
190 *) INFO; USAGE; exit 1
191esac
192
193echo "You are not supposed to be here."
194exit 1
This page took 0.165454 seconds and 4 git commands to generate.