From 78ddaac8fc4a3cd5335057d9c391f686cfcf68c7 Mon Sep 17 00:00:00 2001 From: Jakub Bogusz Date: Fri, 25 Dec 2015 09:34:54 +0100 Subject: [PATCH] - updated to 3.3.1 (note: soname changed) - removed outdated svn and soname patches --- lttoolbox-soname.patch | 11 - lttoolbox-svn20130412.patch | 3112 ----------------------------------- lttoolbox.spec | 26 +- 3 files changed, 11 insertions(+), 3138 deletions(-) delete mode 100644 lttoolbox-soname.patch delete mode 100644 lttoolbox-svn20130412.patch diff --git a/lttoolbox-soname.patch b/lttoolbox-soname.patch deleted file mode 100644 index 973caac..0000000 --- a/lttoolbox-soname.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- lttoolbox-3.2.0/configure.ac.orig 2013-06-26 16:15:39.881717927 +0200 -+++ lttoolbox-3.2.0/configure.ac 2013-06-26 16:23:06.398365855 +0200 -@@ -23,7 +23,7 @@ - AC_SUBST(GENERIC_MAJOR_VERSION) - - # Shared library versioning --GENERIC_LIBRARY_VERSION=0:0:0 -+GENERIC_LIBRARY_VERSION=1:0:0 - # | | | - # +------+ | +---+ - # | | | diff --git a/lttoolbox-svn20130412.patch b/lttoolbox-svn20130412.patch deleted file mode 100644 index 71dc646..0000000 --- a/lttoolbox-svn20130412.patch +++ /dev/null @@ -1,3112 +0,0 @@ -Index: lttoolbox/lt-proc.1 -=================================================================== ---- lttoolbox/lt-proc.1 (revision 21745) -+++ lttoolbox/lt-proc.1 (working copy) -@@ -12,7 +12,9 @@ - [ - .B \-a \fR| - .B \-b \fR| -+.B \-o \fR| - .B \-c \fR| -+.B \-d \fR| - .B \-e \fR| - .B \-g \fR| - .B \-n \fR| -@@ -29,7 +31,10 @@ - [ - .B \-\-analysis \fR| - .B \-\-bilingual \fR| -+.B \-\-surf-bilingual \fR| - .B \-\-case-sensitive \fR| -+.B \-\-debugged-gen \fR| -+.B \-\-decompose-nouns \fR| - .B \-\-generation \fR| - .B \-\-non-marked-gen \fR| - .B \-\-tagged-gen \fR| -@@ -98,9 +103,18 @@ - form in the source language. Works tipically with the output of - apertium-pretransfer. - .TP -+.B \-o, \-\-surf-bilingual -+As with \-b, but takes input from apertium\-tagger \-p , with -+surface forms, and if the lexical form is not found in the bilingual -+dictionary, it outputs the surface form of the word. -+.TP -+ - .B \-c, \-\-case-sensitive - Use the literal case of the incoming characters - .TP -+.B \-d, \-\-debugged-gen -+Morph. generation with all the stuff -+.TP - .B \-e, \-\-decompose-compounds - Try to treat unknown words as compounds, and decompose them. - .TP -@@ -154,5 +168,4 @@ - .SH BUGS - Lots of...lurking in the dark and waiting for you! - .SH AUTHOR --(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights --reserved. -+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. -Index: lttoolbox/fst_processor.cc -=================================================================== ---- lttoolbox/fst_processor.cc (revision 21745) -+++ lttoolbox/fst_processor.cc (working copy) -@@ -44,14 +44,17 @@ - - caseSensitive = false; - dictionaryCase = false; -- compoundDecomposition = false; -+ do_decomposition = false; - nullFlush = false; - nullFlushGeneration = false; -+ showControlSymbols = false; -+ biltransSurfaceForms = false; -+ compoundOnlyLSymbol = 0; -+ compoundRSymbol = 0; -+ compound_max_elements = 4; - -- pool = new Pool >(4, vector(50)); -- -- initial_state = new State(pool); -- current_state = new State(pool); -+ initial_state = new State(); -+ current_state = new State(); - } - - FSTProcessor::~FSTProcessor() -@@ -58,7 +61,6 @@ - { - delete current_state; - delete initial_state; -- delete pool; - } - - void -@@ -408,6 +410,100 @@ - return 0x7fffffff; - } - -+pair -+FSTProcessor::readBilingual(FILE *input, FILE *output) -+{ -+ wint_t val = fgetwc_unlocked(input); -+ wstring symbol = L""; -+ -+ if(feof(input)) -+ { -+ return pair(symbol, 0x7fffffff); -+ } -+ -+ if(outOfWord) -+ { -+ if(val == L'^') -+ { -+ val = fgetwc_unlocked(input); -+ if(feof(input)) -+ { -+ return pair(symbol, 0x7fffffff); -+ } -+ } -+ else if(val == L'\\') -+ { -+ fputwc_unlocked(val, output); -+ val = fgetwc_unlocked(input); -+ if(feof(input)) -+ { -+ return pair(symbol, 0x7fffffff); -+ } -+ fputwc_unlocked(val,output); -+ skipUntil(input, output, L'^'); -+ val = fgetwc_unlocked(input); -+ if(feof(input)) -+ { -+ return pair(symbol, 0x7fffffff); -+ } -+ } -+ else -+ { -+ fputwc_unlocked(val, output); -+ skipUntil(input, output, L'^'); -+ val = fgetwc_unlocked(input); -+ if(feof(input)) -+ { -+ return pair(symbol, 0x7fffffff); -+ } -+ } -+ outOfWord = false; -+ } -+ -+ if(val == L'\\') -+ { -+ val = fgetwc_unlocked(input); -+ return pair(symbol, val); -+ } -+ else if(val == L'$') -+ { -+ outOfWord = true; -+ return pair(symbol, static_cast(L'$')); -+ } -+ else if(val == L'<') -+ { -+ wstring cad = L""; -+ cad += static_cast(val); -+ while((val = fgetwc_unlocked(input)) != L'>') -+ { -+ if(feof(input)) -+ { -+ streamError(); -+ } -+ cad += static_cast(val); -+ } -+ cad += static_cast(val); -+ -+ int res = alphabet(cad); -+ -+ if (res == 0) { -+ symbol = cad; -+ } -+ return pair(symbol, res); -+ } -+ else if(val == L'[') -+ { -+ fputws_unlocked(readFullBlock(input, L'[', L']').c_str(), output); -+ return readBilingual(input, output); -+ } -+ else -+ { -+ return pair(symbol, val); -+ } -+ -+ return pair(symbol, 0x7fffffff); -+} -+ - void - FSTProcessor::flushBlanks(FILE *output) - { -@@ -494,6 +590,27 @@ - } - - void -+FSTProcessor::writeEscapedWithTags(wstring const &str, FILE *output) -+{ -+ for(unsigned int i = 0, limit = str.size(); i < limit; i++) -+ { -+ if(str[i] == L'<' && i >=1 && str[i-1] != L'\\') -+ { -+ fputws_unlocked(str.substr(i).c_str(), output); -+ return; -+ } -+ -+ if(escaped_chars.find(str[i]) != escaped_chars.end()) -+ { -+ fputwc_unlocked(L'\\', output); -+ } -+ fputwc_unlocked(str[i], output); -+ } -+} -+ -+ -+ -+void - FSTProcessor::printWord(wstring const &sf, wstring const &lf, FILE *output) - { - fputwc_unlocked(L'^', output); -@@ -642,7 +759,86 @@ - initGeneration(); - } - -+ - wstring -+FSTProcessor::compoundAnalysis(wstring input_word, bool uppercase, bool firstupper) { -+ const int MAX_COMBINATIONS = 500; -+ //wcerr << L"compoundAnalysis(input_word = " << input_word << L")" << endl; -+ -+ State current_state = *initial_state; -+ -+ for(unsigned int i=0; i")) == 0 -+ && (compoundOnlyLSymbol=alphabet(L"<:compound:only-L>")) == 0 -+ && (compoundOnlyLSymbol=alphabet(L"<@co:only-L>")) == 0 -+ && (compoundOnlyLSymbol=alphabet(L"<@compound:only-L>")) == 0 -+ && (compoundOnlyLSymbol=alphabet(L"")) == 0) -+ { -+ wcerr << L"Warning: Decomposition symbol <:compound:only-L> not found" << endl; -+ } -+ else if (!showControlSymbols) -+ alphabet.setSymbol(compoundOnlyLSymbol, L""); -+ -+ if ((compoundRSymbol=alphabet(L"<:co:R>")) == 0 -+ && (compoundRSymbol=alphabet(L"<:compound:R>")) == 0 -+ && (compoundRSymbol=alphabet(L"<@co:R>")) == 0 -+ && (compoundRSymbol=alphabet(L"<@compound:R>")) == 0 -+ && (compoundRSymbol=alphabet(L"")) == 0) -+ { -+ wcerr << L"Warning: Decomposition symbol <:compound:R> not found" << endl; -+ } -+ else if (!showControlSymbols) -+ alphabet.setSymbol(compoundRSymbol, L""); -+} -+ -+ -+void -+FSTProcessor::initDecomposition() { -+ do_decomposition = true; -+ initAnalysis(); -+ initDecompositionSymbols(); -+} -+ -+/*wstring - FSTProcessor::decompose(wstring w) - { - State current_state = *initial_state; -@@ -807,7 +1003,7 @@ - } - //wcerr << L"+ decompose: " << lf << endl; - return lf; --} -+}*/ - - void - FSTProcessor::analysis(FILE *input, FILE *output) -@@ -839,6 +1035,10 @@ - uppercase = firstupper && iswupper(sf[sf.size()-1]); - } - -+ if(do_decomposition && compoundOnlyLSymbol != 0) -+ { -+ current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol); -+ } - lf = current_state.filterFinals(all_finals, alphabet, - escaped_chars, - uppercase, firstupper); -@@ -853,6 +1053,10 @@ - uppercase = firstupper && iswupper(sf[sf.size()-1]); - } - -+ if(do_decomposition && compoundOnlyLSymbol != 0) -+ { -+ current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol); -+ } - lf = current_state.filterFinals(all_finals, alphabet, - escaped_chars, - uppercase, firstupper); -@@ -867,6 +1071,10 @@ - uppercase = firstupper && iswupper(sf[sf.size()-1]); - } - -+ if(do_decomposition && compoundOnlyLSymbol != 0) -+ { -+ current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol); -+ } - lf = current_state.filterFinals(all_finals, alphabet, - escaped_chars, - uppercase, firstupper); -@@ -881,6 +1089,10 @@ - uppercase = firstupper && iswupper(sf[sf.size()-1]); - } - -+ if(do_decomposition && compoundOnlyLSymbol != 0) -+ { -+ current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol); -+ } - lf = current_state.filterFinals(all_finals, alphabet, - escaped_chars, - uppercase, firstupper); -@@ -969,16 +1181,22 @@ - if(limit == 0) - { - input_buffer.back(sf.size()); -- fputwc_unlocked(sf[0], output); -+ writeEscaped(sf.substr(0,1), output); - } - else - { - input_buffer.back(1+(size-limit)); - wstring unknown_word = sf.substr(0, limit); -- if(compoundDecomposition) -+ if(do_decomposition) - { -+ if(!dictionaryCase) -+ { -+ firstupper = iswupper(sf[0]); -+ uppercase = firstupper && iswupper(sf[sf.size()-1]); -+ } -+ - wstring compound = L""; -- compound = decompose(unknown_word); -+ compound = compoundAnalysis(unknown_word, uppercase, firstupper); - if(compound != L"") - { - printWord(unknown_word, compound, output); -@@ -1002,16 +1220,22 @@ - if(limit == 0) - { - input_buffer.back(sf.size()); -- fputwc_unlocked(sf[0], output); -+ writeEscaped(sf.substr(0,1), output); - } - else - { - input_buffer.back(1+(size-limit)); - wstring unknown_word = sf.substr(0, limit); -- if(compoundDecomposition) -+ if(do_decomposition) - { -+ if(!dictionaryCase) -+ { -+ firstupper = iswupper(sf[0]); -+ uppercase = firstupper && iswupper(sf[sf.size()-1]); -+ } -+ - wstring compound = L""; -- compound = decompose(unknown_word); -+ compound = compoundAnalysis(unknown_word, uppercase, firstupper); - if(compound != L"") - { - printWord(unknown_word, compound, output); -@@ -1296,19 +1520,27 @@ - fputwc(L'=', output); - val = readGeneration(input, output); - } -- -+ - if(val == L'$' && outOfWord) - { - if(sf[0] == L'*' || sf[0] == L'%') - { -- if(mode != gm_clean) -+ if(mode != gm_clean && mode != gm_tagged_nm) - { - writeEscaped(sf, output); - } -- else -+ else if (mode == gm_clean) - { - writeEscaped(sf.substr(1), output); - } -+ else if(mode == gm_tagged_nm) -+ { -+ fputwc_unlocked(L'^', output); -+ writeEscaped(removeTags(sf.substr(1)), output); -+ fputwc_unlocked(L'/', output); -+ writeEscapedWithTags(sf, output); -+ fputwc_unlocked(L'$', output); -+ } - } - else if(sf[0] == L'@') - { -@@ -1324,6 +1556,18 @@ - { - writeEscaped(removeTags(sf), output); - } -+ else if(mode == gm_tagged) -+ { -+ writeEscaped(removeTags(sf), output); -+ } -+ else if(mode == gm_tagged_nm) -+ { -+ fputwc_unlocked(L'^', output); -+ writeEscaped(removeTags(sf.substr(1)), output); -+ fputwc_unlocked(L'/', output); -+ writeEscapedWithTags(sf, output); -+ fputwc_unlocked(L'$', output); -+ } - } - else if(current_state.isFinal(all_finals)) - { -@@ -1330,7 +1574,7 @@ - bool uppercase = sf.size() > 1 && iswupper(sf[1]); - bool firstupper= iswupper(sf[0]); - -- if(mode == gm_tagged) -+ if(mode == gm_tagged || mode == gm_tagged_nm) - { - fputwc_unlocked(L'^', output); - } -@@ -1339,10 +1583,10 @@ - escaped_chars, - uppercase, firstupper).substr(1).c_str(), - output); -- if(mode == gm_tagged) -+ if(mode == gm_tagged || mode == gm_tagged_nm) - { - fputwc_unlocked(L'/', output); -- fputws_unlocked(sf.c_str(), output); -+ writeEscapedWithTags(sf, output); - fputwc_unlocked(L'$', output); - } - -@@ -1360,9 +1604,26 @@ - } - else if(mode == gm_unknown) - { -+ if(sf != L"") -+ { -+ fputwc_unlocked(L'#', output); -+ writeEscaped(removeTags(sf), output); -+ } -+ } -+ else if(mode == gm_tagged) -+ { - fputwc_unlocked(L'#', output); - writeEscaped(removeTags(sf), output); - } -+ else if(mode == gm_tagged_nm) -+ { -+ fputwc_unlocked(L'^', output); -+ writeEscaped(removeTags(sf), output); -+ fputwc_unlocked(L'/', output); -+ fputwc_unlocked(L'#', output); -+ writeEscapedWithTags(sf, output); -+ fputwc_unlocked(L'$', output); -+ } - } - - current_state = *initial_state; -@@ -2033,19 +2294,62 @@ - } - - State current_state = *initial_state; -- wstring sf = L""; -- wstring queue = L""; -- wstring result = L""; -+ wstring sf = L""; // source language analysis -+ wstring queue = L""; // symbols to be added to each target -+ wstring result = L""; // result of looking up analysis in bidix - - outOfWord = false; - - skipUntil(input, output, L'^'); -- int val; -+ pair tr; // readBilingual return value, containing: -+ int val; // the alphabet value of current symbol, and -+ wstring symbol = L""; // the current symbol as a string -+ bool seentags = false; // have we seen any tags at all in the analysis? - -- while((val = readGeneration(input, output)) != 0x7fffffff) -+ bool seensurface = false; -+ wstring surface = L""; -+ -+ while(true) // ie. while(val != 0x7fffffff) - { -+ tr = readBilingual(input, output); -+ symbol = tr.first; -+ val = tr.second; -+ -+ //fwprintf(stderr, L"> %S : %C : %d\n", tr.first.c_str(), tr.second, tr.second); -+ if(biltransSurfaceForms && !seensurface && !outOfWord) -+ { -+ while(val != L'/' && val != 0x7fffffff) -+ { -+ surface = surface + symbol; -+ alphabet.getSymbol(surface, val); -+ tr = readBilingual(input, output); -+ symbol = tr.first; -+ val = tr.second; -+ //fwprintf(stderr, L" == %S : %C : %d => %S\n", symbol.c_str(), val, val, surface.c_str()); -+ } -+ seensurface = true; -+ tr = readBilingual(input, output); -+ symbol = tr.first; -+ val = tr.second; -+ } -+ -+ if (val == 0x7fffffff) -+ { -+ break; -+ } -+ - if(val == L'$' && outOfWord) - { -+ if(!seentags) // if no tags: only return complete matches -+ { -+ bool uppercase = sf.size() > 1 && iswupper(sf[1]); -+ bool firstupper= iswupper(sf[0]); -+ -+ result = current_state.filterFinals(all_finals, alphabet, -+ escaped_chars, -+ uppercase, firstupper, 0); -+ } -+ - if(sf[0] == L'*') - { - printWordBilingual(sf, L"/"+sf, output); -@@ -2055,14 +2359,23 @@ - printWordBilingual(sf, compose(result, queue), output); - } - else -- { -- printWordBilingual(sf, L"/@"+sf, output); -+ { //xxx -+ if(biltransSurfaceForms) -+ { -+ printWordBilingual(surface, L"/@"+surface, output); -+ } -+ else -+ { -+ printWordBilingual(sf, L"/@"+sf, output); -+ } - } -- -+ seensurface = false; -+ surface = L""; - queue = L""; - result = L""; - current_state = *initial_state; - sf = L""; -+ seentags = false; - } - else if(iswspace(val) && sf.size() == 0) - { -@@ -2074,7 +2387,11 @@ - { - sf += L'\\'; - } -- alphabet.getSymbol(sf, val); -+ alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic -+ if(val == 0) // non-alphabetic, possibly unknown tag; add to sf -+ { -+ sf += symbol; -+ } - } - else - { -@@ -2082,7 +2399,15 @@ - { - sf += L'\\'; - } -- alphabet.getSymbol(sf,val); -+ alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic -+ if(val == 0) // non-alphabetic, possibly unknown tag; add to sf -+ { -+ sf += symbol; -+ } -+ if(alphabet.isTag(val) || val == 0) -+ { -+ seentags = true; -+ } - if(current_state.size() != 0) - { - if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive) -@@ -2105,12 +2430,21 @@ - } - if(current_state.size() == 0 && result != L"") - { -- if(alphabet.isTag(val)) -+ // We already have a result, but there is still more to read -+ // of the analysis; following tags are not consumed, but -+ // output as target language tags (added to result on -+ // end-of-word) -+ if(alphabet.isTag(val)) // known tag - { - alphabet.getSymbol(queue, val); - } -+ else if (val == 0) // non-alphabetic, possibly unknown tag -+ { -+ queue += symbol; -+ } - else - { -+ // There are no more alive transductions and the current symbol is not a tag -- unknown word! - result = L""; - } - } -@@ -2127,6 +2461,7 @@ - unsigned int end_point = input_word.size()-2; - wstring queue = L""; - bool mark = false; -+ bool seentags = false; // have we seen any tags at all in the analysis? - - if(with_delim == false) - { -@@ -2160,6 +2495,7 @@ - } - else if(input_word[i] == L'<') - { -+ seentags = true; - symbol = L'<'; - for(unsigned int j = i + 1; j <= end_point; j++) - { -@@ -2217,7 +2553,7 @@ - } - - if(current_state.size() == 0) -- { -+ { - if(symbol != L"" && result != L"") - { - queue.append(symbol); -@@ -2224,20 +2560,39 @@ - } - else - { -- // word is not present -+ // word is not present - if(with_delim) -- { -+ { - result = L"^@" + input_word.substr(1); -- } -+ } - else -- { -+ { - result = L"@" + input_word; -- } -+ } - return pair(result, 0); - } - } - } - -+ if (!seentags -+ && L"" == current_state.filterFinals(all_finals, alphabet, -+ escaped_chars, -+ uppercase, firstupper, 0)) -+ { -+ // word is not present -+ if(with_delim) -+ { -+ result = L"^@" + input_word.substr(1); -+ } -+ else -+ { -+ result = L"@" + input_word; -+ } -+ return pair(result, 0); -+ } -+ -+ -+ - // attach unmatched queue automatically - - if(queue != L"") -@@ -2661,10 +3016,11 @@ - return str; - } - -+ - void --FSTProcessor::setDecompoundingMode(bool const value) -+FSTProcessor::setBiltransSurfaceForms(bool const value) - { -- compoundDecomposition = value; -+ biltransSurfaceForms = value; - } - - void -@@ -2688,7 +3044,7 @@ - bool - FSTProcessor::getDecompoundingMode() - { -- return compoundDecomposition; -+ return do_decomposition; - } - - bool -Index: lttoolbox/lt_comp.cc -=================================================================== ---- lttoolbox/lt_comp.cc (revision 21745) -+++ lttoolbox/lt_comp.cc (working copy) -@@ -23,6 +23,7 @@ - #include - #include - #include -+#include - - using namespace std; - -@@ -31,7 +32,11 @@ - if(name != NULL) - { - cout << basename(name) << " v" << PACKAGE_VERSION <<": build a letter transducer from a dictionary" << endl; -- cout << "USAGE: " << basename(name) << " lr | rl dictionary_file output_file [acx_file]" << endl; -+ cout << "USAGE: " << basename(name) << " [-avh] lr | rl dictionary_file output_file [acx_file]" << endl; -+ cout << " -v: set language variant" << endl; -+ cout << " -a: set alternative (monodix)" << endl; -+ cout << " -l: set left language variant (bidix)" << endl; -+ cout << " -r: set right language variant (bidix)" << endl; - cout << "Modes:" << endl; - cout << " lr: left-to-right compilation" << endl; - cout << " rl: right-to-left compilation" << endl; -@@ -42,27 +47,113 @@ - - int main(int argc, char *argv[]) - { -- if(argc != 4 && argc != 5) -+ Compiler c; -+ c.setVerbose(false); -+ -+#if HAVE_GETOPT_LONG -+ int option_index=0; -+#endif -+ -+ string vl; -+ string vr; -+ -+ while (true) { -+#if HAVE_GETOPT_LONG -+ static struct option long_options[] = -+ { -+ {"alt", required_argument, 0, 'a'}, -+ {"var", required_argument, 0, 'v'}, -+ {"var-left", required_argument, 0, 'l'}, -+ {"var-right", required_argument, 0, 'r'}, -+ {"help", no_argument, 0, 'h'}, -+ {"verbose", no_argument, 0, 'V'}, -+ {0, 0, 0, 0} -+ }; -+ -+ int cnt=getopt_long(argc, argv, "a:v:l:r:hV", long_options, &option_index); -+#else -+ int cnt=getopt(argc, argv, "a:v:l:r:hV"); -+#endif -+ if (cnt==-1) -+ break; -+ -+ switch (cnt) -+ { -+ case 'a': -+ c.setAltValue(optarg); -+ break; -+ -+ case 'v': -+ c.setVariantValue(optarg); -+ break; -+ -+ case 'l': -+ vl = optarg; -+ c.setVariantLeftValue(vl); -+ break; -+ -+ case 'r': -+ vr = optarg; -+ c.setVariantRightValue(vr); -+ break; -+ -+ case 'V': -+ c.setVerbose(true); -+ break; -+ -+ case 'h': -+ default: -+ endProgram(argv[0]); -+ break; -+ } -+ } -+ -+ string opc; -+ string infile; -+ string outfile; -+ string acxfile; -+ -+ switch(argc - optind + 1) - { -- endProgram(argv[0]); -+ case 5: -+ opc = argv[argc-4]; -+ infile = argv[argc-3]; -+ outfile = argv[argc-2]; -+ acxfile = argv[argc-1]; -+ break; -+ -+ case 4: -+ opc = argv[argc-3]; -+ infile = argv[argc-2]; -+ outfile = argv[argc-1]; -+ break; -+ -+ default: -+ endProgram(argv[0]); -+ break; - } - -- string opc = argv[1]; -- -- Compiler c; -- -- - if(opc == "lr") - { -- if(argc == 5) -+ if(vr == "" && vl != "") - { -- c.parseACX(argv[4], Compiler::COMPILER_RESTRICTION_LR_VAL); -+ cout << "Error: -l specified, but mode is lr" << endl; -+ endProgram(argv[0]); - } -- c.parse(argv[2], Compiler::COMPILER_RESTRICTION_LR_VAL); -+ if(acxfile != "") -+ { -+ c.parseACX(acxfile, Compiler::COMPILER_RESTRICTION_LR_VAL); -+ } -+ c.parse(infile, Compiler::COMPILER_RESTRICTION_LR_VAL); - } - else if(opc == "rl") - { -- c.parse(argv[2], Compiler::COMPILER_RESTRICTION_RL_VAL); -+ if(vl == "" && vr != "") -+ { -+ cout << "Error: -r specified, but mode is rl" << endl; -+ endProgram(argv[0]); -+ } -+ c.parse(infile, Compiler::COMPILER_RESTRICTION_RL_VAL); - } - else - { -@@ -69,10 +160,10 @@ - endProgram(argv[0]); - } - -- FILE *output = fopen(argv[3], "wb"); -+ FILE *output = fopen(outfile.c_str(), "wb"); - if(!output) - { -- cerr << "Error: Cannot open file '" << argv[2] << "'." << endl; -+ cerr << "Error: Cannot open file '" << outfile << "'." << endl; - exit(EXIT_FAILURE); - } - c.write(output); -Index: lttoolbox/fst_processor.h -=================================================================== ---- lttoolbox/fst_processor.h (revision 21745) -+++ lttoolbox/fst_processor.h (working copy) -@@ -43,7 +43,8 @@ - gm_clean, // clear all - gm_unknown, // display unknown words, clear transfer and generation tags - gm_all, // display all -- gm_tagged // tagged generation -+ gm_tagged, // tagged generation -+ gm_tagged_nm // clean tagged generation - }; - - /** -@@ -57,8 +58,6 @@ - */ - map transducers; - -- Pool > *pool; -- - /** - * Current state of lexical analysis - */ -@@ -130,6 +129,12 @@ - bool outOfWord; - - /** -+ * true if we're automatically removing surface forms. -+ */ -+ bool biltransSurfaceForms; -+ -+ -+ /** - * if true, makes always difference between uppercase and lowercase - * characters - */ -@@ -154,9 +159,30 @@ - /** - * try analysing unknown words as compounds - */ -- bool compoundDecomposition; -+ bool do_decomposition; - - /** -+ * Symbol of CompoundOnlyL -+ */ -+ int compoundOnlyLSymbol; -+ -+ /** -+ * Symbol of CompoundR -+ */ -+ int compoundRSymbol; -+ -+ /** -+ * Show or not the controls symbols (as compoundRSymbol) -+ */ -+ bool showControlSymbols; -+ -+ /** -+ * Max compound elements -+ * Hard coded for now, but there might come a switch one day -+ */ -+ int compound_max_elements; -+ -+ /** - * Prints an error of input stream and exits - */ - void streamError(); -@@ -219,6 +245,13 @@ - int readGeneration(FILE *input, FILE *output); - - /** -+ * Read text from stream (biltrans version) -+ * @param input the stream to read -+ * @return the queue of 0-symbols, and the next symbol in the stream -+ */ -+ pair readBilingual(FILE *input, FILE *output); -+ -+ /** - * Read text from stream (SAO version) - * @param input the stream to read - * @return the next symbol in the stream -@@ -248,7 +281,17 @@ - */ - void writeEscaped(wstring const &str, FILE *output); - -+ - /** -+ * Write a string to an output stream, escaping all escapable characters -+ * but keeping symbols without escaping -+ * @param str the string to write, escaping characters -+ * @param output the stream to write in -+ */ -+ void writeEscapedWithTags(wstring const &str, FILE *output); -+ -+ -+ /** - * Checks if an string ends with a particular suffix - * @param str the string to test - * @param the searched suffix -@@ -287,6 +330,8 @@ - */ - void printUnknownWord(wstring const &sf, FILE *output); - -+ void initDecompositionSymbols(); -+ - vector numbers; - int readTMAnalysis(FILE *input); - -@@ -294,7 +339,7 @@ - void printSpace(wchar_t const val, FILE *output); - void skipUntil(FILE *input, FILE *output, wint_t const character); - static wstring removeTags(wstring const &str); -- wstring decompose(wstring str); -+ wstring compoundAnalysis(wstring str, bool uppercase, bool firstupper); - size_t firstNotAlpha(wstring const &sf); - - void analysis_wrapper_null_flush(FILE *input, FILE *output); -@@ -338,9 +383,9 @@ - - void setCaseSensitiveMode(bool const value); - void setDictionaryCaseMode(bool const value); -+ void setBiltransSurfaceForms(bool const value); - void setNullFlush(bool const value); - bool getNullFlush(); -- void setDecompoundingMode(bool const value); - bool getDecompoundingMode(); - }; - -Index: lttoolbox/lt_proc.cc -=================================================================== ---- lttoolbox/lt_proc.cc (revision 21745) -+++ lttoolbox/lt_proc.cc (working copy) -@@ -36,35 +36,42 @@ - void endProgram(char *name) - { - cout << basename(name) << ": process a stream with a letter transducer" << endl; -- cout << "USAGE: " << basename(name) << " [-c] [-a|-g|-n|-d|-p|-s|-t|-b] fst_file [input_file [output_file]]" << endl; -+ cout << "USAGE: " << basename(name) << " [ -a | -b | -c | -d | -e | -g | -n | -p | -s | -t | -v | -h -z -w ] fst_file [input_file [output_file]]" << endl; - cout << "Options:" << endl; - #if HAVE_GETOPT_LONG - cout << " -a, --analysis: morphological analysis (default behavior)" << endl; -- cout << " -b, --bilingual: lexical transference" << endl; -+ cout << " -b, --bilingual: lexical transfer" << endl; - cout << " -c, --case-sensitive: use the literal case of the incoming characters" << endl; -+ cout << " -d, --debugged-gen morph. generation with all the stuff" <attrib(Compiler::COMPILER_RESTRICTION_ATTR); - wstring entrname=this->attrib(Compiler::COMPILER_LEMMA_ATTR); -+ wstring altval = this->attrib(Compiler::COMPILER_ALT_ATTR); -+ wstring varval = this->attrib(Compiler::COMPILER_V_ATTR); -+ wstring varl = this->attrib(Compiler::COMPILER_VL_ATTR); -+ wstring varr = this->attrib(Compiler::COMPILER_VR_ATTR); - - wstring myname = L""; -- if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes") -+ if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes" -+ || altval != L"" && altval != alt -+ || (varval != L"" && varval != variant && atributo == Compiler::COMPILER_RESTRICTION_RL_VAL) -+ || ((varl != L"" && varl != variant_left) && (varr != L"" && varr != variant_right)) -+ || (varl != L"" && varl != variant_left && atributo == Compiler::COMPILER_RESTRICTION_RL_VAL) -+ || (varr != L"" && varr != variant_right && atributo == Compiler::COMPILER_RESTRICTION_LR_VAL)) - { - do - { -@@ -316,11 +325,14 @@ - } - - EntList items, items_lr, items_rl; -- if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL) -+ if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL -+ || (varval != L"" && varval != variant && atributo != Compiler::COMPILER_RESTRICTION_RL_VAL) -+ || varl != L"" && varl != variant_left) - { - items_lr.push_back(pair(L"", L"")); - } -- else if(atributo == Compiler::COMPILER_RESTRICTION_RL_VAL) -+ else if(atributo == Compiler::COMPILER_RESTRICTION_RL_VAL -+ || (varr != L"" && varr != variant_right)) - { - items_rl.push_back(pair(L"", L"")); - } -@@ -594,3 +606,27 @@ - it->second.append(endings.second); - } - } -+ -+void -+Expander::setAltValue(string const &a) -+{ -+ alt = XMLParseUtil::stows(a); -+} -+ -+void -+Expander::setVariantValue(string const &v) -+{ -+ variant = XMLParseUtil::stows(v); -+} -+ -+void -+Expander::setVariantLeftValue(string const &v) -+{ -+ variant_left = XMLParseUtil::stows(v); -+} -+ -+void -+Expander::setVariantRightValue(string const &v) -+{ -+ variant_right = XMLParseUtil::stows(v); -+} -Index: lttoolbox/lt-expand.1 -=================================================================== ---- lttoolbox/lt-expand.1 (revision 21745) -+++ lttoolbox/lt-expand.1 (working copy) -@@ -9,11 +9,28 @@ - architecture: \fBhttp://www.apertium.org\fR. - .SH SYNOPSIS - .B lt-expand -+[ -+.B \-a \fR| -+.B \-v \fR| -+.B \-l \fR| -+.B \-r \fR| -+.B \-h -+] - dictionary_file [output_file] - .PP -+.B lt-expand -+[ -+.B \-\-alt \fR| -+.B \-\-var \fR| -+.B \-\-var\-left \fR| -+.B \-\-var\-right \fR| -+.B \-\-help -+] -+dictionary_file [output_file] -+.PP - .SH DESCRIPTION - .BR lt-expand --Is the application responsible of expanding a dictionary into a -+Is the application responsible for expanding a dictionary into a - simple list of input string-output string pairs by eliminating - paradigms through substitution and unfolding. - .PP -@@ -20,6 +37,23 @@ - The output goes to \fIoutput_file\fR if it is present or to standard - output if it is missing. - .PP -+.SH OPTIONS -+.TP -+.B \-a, \-\-alt -+Sets the value of the \fIalt\fR attribute to use in expansion -+.TP -+.B \-v, \-\-var -+Sets the value of the \fIv\fR attribute to use in expansion of monodixes -+.TP -+.B \-l, \-\-var\-left -+Sets the value of the \fIvl\fR attribute to use in expansion of bidixes -+.TP -+.B \-r, \-\-var\-right -+Sets the value of the \fIvr\fR attribute to use in expansion of bidixes -+.TP -+.B \-h, \-\-help -+Prints a short help message -+.PP - .SH FILES - .B dictionary_file - The input dictionary to expand. -@@ -34,5 +68,4 @@ - .SH BUGS - Lots of...lurking in the dark and waiting for you! - .SH AUTHOR --(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights --reserved. -+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. -Index: lttoolbox/dix.dtd -=================================================================== ---- lttoolbox/dix.dtd (revision 21745) -+++ lttoolbox/dix.dtd (working copy) -@@ -1,4 +1,21 @@ - - - -@@ -66,6 +87,10 @@ - - - -+ -+ -+ -+ - - - attrib(COMPILER_RESTRICTION_ATTR); - wstring ignore = this->attrib(COMPILER_IGNORE_ATTR); -+ wstring altval = this->attrib(COMPILER_ALT_ATTR); -+ wstring varval = this->attrib(COMPILER_V_ATTR); -+ wstring varl = this->attrib(COMPILER_VL_ATTR); -+ wstring varr = this->attrib(COMPILER_VR_ATTR); - - // if entry is masked by a restriction of direction or an ignore mark -- if((atributo != L"" && atributo != direction) || ignore == COMPILER_IGNORE_YES_VAL) -+ if((atributo != L"" && atributo != direction) -+ || ignore == COMPILER_IGNORE_YES_VAL -+ || (altval != L"" && altval != alt) -+ || (direction == COMPILER_RESTRICTION_RL_VAL && varval != L"" && varval != variant) -+ || (direction == COMPILER_RESTRICTION_RL_VAL && varl != L"" && varl != variant_left) -+ || (direction == COMPILER_RESTRICTION_LR_VAL && varr != L"" && varr != variant_right)) - { - // parse to the end of the entry - wstring name = L""; -@@ -662,6 +696,11 @@ - wstring name = XMLParseUtil::towstring(xmlTextReaderConstName(reader)); - skipBlanks(name); - -+ if(current_paradigm == L"" && verbose) -+ { -+ first_element = true; -+ } -+ - int tipo = xmlTextReaderNodeType(reader); - if(name == COMPILER_PAIR_ELEM) - { -@@ -845,3 +884,33 @@ - it->second.write(output); - } - } -+ -+void -+Compiler::setAltValue(string const &a) -+{ -+ alt = XMLParseUtil::stows(a); -+} -+ -+void -+Compiler::setVariantValue(string const &v) -+{ -+ variant = XMLParseUtil::stows(v); -+} -+ -+void -+Compiler::setVariantLeftValue(string const &v) -+{ -+ variant_left = XMLParseUtil::stows(v); -+} -+ -+void -+Compiler::setVariantRightValue(string const &v) -+{ -+ variant_right = XMLParseUtil::stows(v); -+} -+ -+void -+Compiler::setVerbose(bool verbosity) -+{ -+ verbose = verbosity; -+} -Index: lttoolbox/transducer.h -=================================================================== ---- lttoolbox/transducer.h (revision 21745) -+++ lttoolbox/transducer.h (working copy) -@@ -146,6 +146,13 @@ - bool isFinal(int const state) const; - - /** -+ * Test if a pattern is recognised by the FST -+ * @param a widestring of the pattern to be recognised -+ * @return true if the pattern is recognised by the transducer -+ */ -+ bool recognise(wstring patro, Alphabet &a, FILE *err = stderr); -+ -+ /** - * Set the state as a final or not, yes by default - * @param state the state - * @param value if true, the state is set as final state -@@ -179,6 +186,12 @@ - void reverse(int const epsilon_tag = 0); - - /** -+ * Print all the transductions of a transducer in ATT format -+ * @param epsilon_tag the tag to take as epsilon -+ */ -+ void show(Alphabet &a, FILE *output = stdout, int const epsilon_tag = 0); -+ -+ /** - * Determinize the transducer - * @param epsilon_tag the tag to take as epsilon - */ -@@ -242,6 +255,12 @@ - bool isEmpty(int const state) const; - - /** -+ * Returns the number of transitions from a given state -+ * @return the number of transitions -+ */ -+ int getStateSize(int const state); -+ -+ /** - * Write method - * @param output the stream to write to - * @param decalage offset to sum to the tags -Index: lttoolbox/lt_expand.cc -=================================================================== ---- lttoolbox/lt_expand.cc (revision 21745) -+++ lttoolbox/lt_expand.cc (working copy) -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - - #ifdef _MSC_VER - #include -@@ -37,7 +38,7 @@ - if(name != NULL) - { - cout << basename(name) << " v" << PACKAGE_VERSION <<": expand the contents of a dictionary file" << endl; -- cout << "USAGE: " << basename(name) << " dictionary_file [output_file]" << endl; -+ cout << "USAGE: " << basename(name) << " [-avlrh] dictionary_file [output_file]" << endl; - } - exit(EXIT_FAILURE); - } -@@ -45,14 +46,67 @@ - int main(int argc, char *argv[]) - { - FILE *input = NULL, *output = NULL; -+ Expander e; - -- switch(argc) -+#if HAVE_GETOPT_LONG -+ int option_index=0; -+#endif -+ -+ while (true) { -+#if HAVE_GETOPT_LONG -+ static struct option long_options[] = -+ { -+ {"alt", required_argument, 0, 'a'}, -+ {"var", required_argument, 0, 'v'}, -+ {"var-left", required_argument, 0, 'l'}, -+ {"var-right", required_argument, 0, 'r'}, -+ {"help", no_argument, 0, 'h'}, -+ {0, 0, 0, 0} -+ }; -+ -+ int cnt=getopt_long(argc, argv, "a:v:l:r:h", long_options, &option_index); -+#else -+ int cnt=getopt(argc, argv, "a:v:l:r:h"); -+#endif -+ if (cnt==-1) -+ break; -+ -+ switch (cnt) -+ { -+ case 'a': -+ e.setAltValue(optarg); -+ break; -+ -+ case 'v': -+ e.setVariantValue(optarg); -+ break; -+ -+ case 'l': -+ e.setVariantLeftValue(optarg); -+ break; -+ -+ case 'r': -+ e.setVariantRightValue(optarg); -+ break; -+ -+ case 'h': -+ default: -+ endProgram(argv[0]); -+ break; -+ } -+ } -+ -+ string infile; -+ string outfile; -+ -+ switch(argc - optind + 1) - { - case 2: -- input = fopen(argv[1], "rb"); -+ infile = argv[argc-1]; -+ input = fopen(infile.c_str(), "rb"); - if(input == NULL) - { -- cerr << "Error: Cannot open file '" << argv[1] << "'." << endl; -+ cerr << "Error: Cannot open file '" << infile << "'." << endl; - exit(EXIT_FAILURE); - } - fclose(input); -@@ -60,18 +114,20 @@ - break; - - case 3: -- input = fopen(argv[1], "rb"); -+ infile = argv[argc-2]; -+ input = fopen(infile.c_str(), "rb"); - if(input == NULL) - { -- cerr << "Error: Cannot open file '" << argv[1] << "'." << endl; -+ cerr << "Error: Cannot open file '" << infile << "'." << endl; - exit(EXIT_FAILURE); - } - fclose(input); - -- output = fopen(argv[2], "wb"); -+ outfile = argv[argc-1]; -+ output = fopen(argv[argc-1], "wb"); - if(output == NULL) - { -- cerr << "Error: Cannot open file '" << argv[2] << "'." << endl; -+ cerr << "Error: Cannot open file '" << outfile << "'." << endl; - exit(EXIT_FAILURE); - } - break; -@@ -85,8 +141,7 @@ - _setmode(_fileno(output), _O_U8TEXT); - #endif - -- Expander e; -- e.expand(argv[1], output); -+ e.expand(infile, output); - fclose(output); - - return EXIT_SUCCESS; -Index: lttoolbox/state.cc -=================================================================== ---- lttoolbox/state.cc (revision 21745) -+++ lttoolbox/state.cc (working copy) -@@ -20,10 +20,15 @@ - - #include - #include -+#include - --State::State(Pool > *p) -+//debug// -+//#include -+//using namespace std; -+//debug// -+ -+State::State() - { -- pool = p; - } - - State::~State() -@@ -51,10 +56,9 @@ - void - State::destroy() - { -- // release references - for(size_t i = 0, limit = state.size(); i != limit; i++) - { -- pool->release(state[i].sequence); -+ delete state[i].sequence; - } - - state.clear(); -@@ -66,15 +70,14 @@ - // release references - for(size_t i = 0, limit = state.size(); i != limit; i++) - { -- pool->release(state[i].sequence); -+ delete state[i].sequence; - } - - state = s.state; -- pool = s.pool; - - for(size_t i = 0, limit = state.size(); i != limit; i++) - { -- vector *tmp = pool->get(); -+ vector *tmp = new vector(); - *tmp = *(state[i].sequence); - state[i].sequence = tmp; - } -@@ -90,7 +93,7 @@ - State::init(Node *initial) - { - state.clear(); -- state.push_back(TNodeState(initial,pool->get(),false)); -+ state.push_back(TNodeState(initial, new vector(), false)); - state[0].sequence->clear(); - epsilonClosure(); - } -@@ -113,7 +116,7 @@ - { - for(int j = 0; j != it->second.size; j++) - { -- vector *new_v = pool->get(); -+ vector *new_v = new vector(); - *new_v = *(state[i].sequence); - if(it->first != 0) - { -@@ -122,7 +125,7 @@ - new_state.push_back(TNodeState(it->second.dest[j], new_v, state[i].dirty||false)); - } - } -- pool->release(state[i].sequence); -+ delete state[i].sequence; - } - - state = new_state; -@@ -147,8 +150,8 @@ - { - for(int j = 0; j != it->second.size; j++) - { -- vector *new_v = pool->get(); -- *new_v = *(state[i].sequence); -+ vector *new_v = new vector(); -+ *new_v = *(state[i].sequence); - if(it->first != 0) - { - new_v->push_back(it->second.out_tag[j]); -@@ -161,7 +164,7 @@ - { - for(int j = 0; j != it->second.size; j++) - { -- vector *new_v = pool->get(); -+ vector *new_v = new vector(); - *new_v = *(state[i].sequence); - if(it->first != 0) - { -@@ -170,7 +173,7 @@ - new_state.push_back(TNodeState(it->second.dest[j], new_v, true)); - } - } -- pool->release(state[i].sequence); -+ delete state[i].sequence; - } - - state = new_state; -@@ -187,7 +190,7 @@ - { - for(int j = 0 ; j != it2->second.size; j++) - { -- vector *tmp = pool->get(); -+ vector *tmp = new vector(); - *tmp = *(state[i].sequence); - if(it2->second.out_tag[j] != 0) - { -@@ -199,6 +202,69 @@ - } - } - -+void -+State::apply(int const input, int const alt1, int const alt2) -+{ -+ vector new_state; -+ if(input == 0 || alt1 == 0 || alt2 == 0) -+ { -+ state = new_state; -+ return; -+ } -+ -+ for(size_t i = 0, limit = state.size(); i != limit; i++) -+ { -+ map::const_iterator it; -+ it = state[i].where->transitions.find(input); -+ if(it != state[i].where->transitions.end()) -+ { -+ for(int j = 0; j != it->second.size; j++) -+ { -+ vector *new_v = new vector(); -+ *new_v = *(state[i].sequence); -+ if(it->first != 0) -+ { -+ new_v->push_back(it->second.out_tag[j]); -+ } -+ new_state.push_back(TNodeState(it->second.dest[j], new_v, state[i].dirty||false)); -+ } -+ } -+ it = state[i].where->transitions.find(alt1); -+ if(it != state[i].where->transitions.end()) -+ { -+ for(int j = 0; j != it->second.size; j++) -+ { -+ vector *new_v = new vector(); -+ *new_v = *(state[i].sequence); -+ if(it->first != 0) -+ { -+ new_v->push_back(it->second.out_tag[j]); -+ } -+ new_state.push_back(TNodeState(it->second.dest[j], new_v, true)); -+ } -+ } -+ it = state[i].where->transitions.find(alt2); -+ if(it != state[i].where->transitions.end()) -+ { -+ for(int j = 0; j != it->second.size; j++) -+ { -+ vector *new_v = new vector(); -+ *new_v = *(state[i].sequence); -+ if(it->first != 0) -+ { -+ new_v->push_back(it->second.out_tag[j]); -+ } -+ new_state.push_back(TNodeState(it->second.dest[j], new_v, true)); -+ } -+ } -+ -+ delete state[i].sequence; -+ } -+ -+ state = new_state; -+} -+ -+ - void - State::step(int const input) - { -@@ -213,6 +279,37 @@ - epsilonClosure(); - } - -+void -+State::step(int const input, int const alt1, int const alt2) -+{ -+ apply(input, alt1, alt2); -+ epsilonClosure(); -+} -+ -+void -+State::step_case(wchar_t val, wchar_t val2, bool caseSensitive) -+{ -+ if (!iswupper(val) || caseSensitive) { -+ step(val, val2); -+ } else if(val != towlower(val)) { -+ step(val, towlower(val), val2); -+ } else { -+ step(val, val2); -+ } -+} -+ -+ -+void -+State::step_case(wchar_t val, bool caseSensitive) -+{ -+ if (!iswupper(val) || caseSensitive) { -+ step(val); -+ } else { -+ step(val, towlower(val)); -+ } -+} -+ -+ - bool - State::isFinal(set const &finals) const - { -@@ -282,6 +379,60 @@ - return result; - } - -+ -+set > > -+State::filterFinalsLRX(set const &finals, -+ Alphabet const &alphabet, -+ set const &escaped_chars, -+ bool uppercase, bool firstupper, int firstchar) const -+{ -+ set > > results; -+ -+ vector current_result; -+ wstring rule_id = L""; -+ -+ // /<$>station<$><6> -+ -+ // if <$> current_result.push_back(current_word) -+ // if / results.insert(current_result) -+ -+ for(size_t i = 0, limit = state.size(); i != limit; i++) -+ { -+ if(finals.find(state[i].where) != finals.end()) -+ { -+ current_result.clear(); -+ rule_id = L""; -+ wstring current_word = L""; -+ for(size_t j = 0, limit2 = state[i].sequence->size(); j != limit2; j++) -+ { -+ if(escaped_chars.find((*(state[i].sequence))[j]) != escaped_chars.end()) -+ { -+ current_word += L'\\'; -+ } -+ wstring sym = L""; -+ alphabet.getSymbol(sym, (*(state[i].sequence))[j], uppercase); -+ if(sym == L"<$>") -+ { -+ if(current_word != L"") -+ { -+ current_result.push_back(current_word); -+ } -+ current_word = L""; -+ } -+ else -+ { -+ current_word += sym; -+ } -+ } -+ rule_id = current_word; -+ results.insert(make_pair(rule_id, current_result)); -+ } -+ } -+ -+ return results; -+} -+ -+ - wstring - State::filterFinalsSAO(set const &finals, - Alphabet const &alphabet, -@@ -438,3 +589,149 @@ - - return result; - } -+ -+ -+ -+void -+State::pruneCompounds(int requiredSymbol, int separationSymbol, int compound_max_elements) -+{ -+ int minNoOfCompoundElements = compound_max_elements; -+ int *noOfCompoundElements = new int[state.size()]; -+ -+ //wcerr << L"pruneCompounds..." << endl; -+ -+ for (unsigned int i = 0; i seq = *state.at(i).sequence; -+ -+ if (lastPartHasRequiredSymbol(seq, requiredSymbol, separationSymbol)) { -+ int this_noOfCompoundElements = 0; -+ for (int j = seq.size()-2; j>0; j--) if (seq.at(j)==separationSymbol) this_noOfCompoundElements++; -+ noOfCompoundElements[i] = this_noOfCompoundElements; -+ minNoOfCompoundElements = (minNoOfCompoundElements < this_noOfCompoundElements) ? -+ minNoOfCompoundElements : this_noOfCompoundElements; -+ } -+ else { -+ noOfCompoundElements[i] = INT_MAX; -+ //wcerr << L"Prune - No requiered symbol in state number " << i << endl; -+ } -+ } -+ -+ // remove states with more than minimum number of compounds (or without the requiered symbol in the last part) -+ vector::iterator it = state.begin(); -+ int i=0; -+ while(it != state.end()) { -+ if (noOfCompoundElements[i] > minNoOfCompoundElements) { -+ delete (*it).sequence; -+ it = state.erase(it); -+ //wcerr << L"Prune - State number " << i << L" removed!" << endl; -+ } -+ else it++; -+ i++; -+ } -+ -+ delete[] noOfCompoundElements; -+} -+ -+ -+ -+void -+State::pruneStatesWithForbiddenSymbol(int forbiddenSymbol) -+{ -+ vector::iterator it = state.begin(); -+ while(it != state.end()) { -+ vector *seq = (*it).sequence; -+ bool found = false; -+ for(int i = seq->size()-1; i>=0; i--) { -+ if(seq->at(i) == forbiddenSymbol) { -+ i=-1; -+ delete (*it).sequence; -+ it = state.erase(it); -+ found = true; -+ } -+ } -+ if (!found) it++; -+ } -+} -+ -+ -+ -+bool -+State::lastPartHasRequiredSymbol(const vector &seq, int requiredSymbol, int separationSymbol) -+{ -+ // state is final - it should be restarted it with all elements in stateset restart_state, with old symbols conserved -+ bool restart=false; -+ for (int n=seq.size()-1; n>=0; n--) { -+ int symbol=seq.at(n); -+ if (symbol==requiredSymbol) { -+ restart=true; -+ break; -+ } -+ if (symbol==separationSymbol) { -+ break; -+ } -+ } -+ return restart; -+} -+ -+ -+void -+State::restartFinals(const set &finals, int requiredSymbol, State *restart_state, int separationSymbol) -+{ -+ -+ for (unsigned int i=0; i 0) { -+ bool restart = lastPartHasRequiredSymbol(*(state_i.sequence), requiredSymbol, separationSymbol); -+ if (restart) { -+ if (restart_state != NULL) { -+ for (unsigned int j=0; jstate.size(); j++) { -+ TNodeState initst = restart_state->state.at(j); -+ vector *tnvec = new vector; -+ -+ for(unsigned int k=0; k < state_i.sequence->size(); k++) tnvec->push_back(state_i.sequence->at(k)); -+ TNodeState tn(initst.where, tnvec, state_i.dirty); -+ tn.sequence->push_back(separationSymbol); -+ state.push_back(tn); -+ } -+ } -+ } -+ } -+ } -+} -+ -+ -+ -+wstring -+State::getReadableString(const Alphabet &a) -+{ -+ wstring retval = L"["; -+ -+ for(unsigned int i=0; i* seq = state.at(i).sequence; -+ if(seq != NULL) for (unsigned int j=0; jsize(); j++) { -+ wstring ws = L""; -+ a.getSymbol(ws, seq->at(j)); -+ //if(ws == L"") ws = L"?"; -+ retval.append(ws); -+ } -+ -+ /*Node *where = state.at(i).where; -+ if(where == NULL) retval.append(L"→@null"); -+ else { -+ retval.append(L"→"); -+ map::iterator it; -+ wstring ws; -+ for (it = where->transitions.begin(); it != where->transitions.end(); it++) { -+ int symbol = (*it).first; -+ a.getSymbol(ws, symbol); -+ retval.append(ws); -+ } -+ }*/ -+ if (i+1 < state.size()) retval.append(L", "); -+ } -+ retval.append(L"]"); -+ return retval; -+} -+ -Index: lttoolbox/alphabet.cc -=================================================================== ---- lttoolbox/alphabet.cc (revision 21745) -+++ lttoolbox/alphabet.cc (working copy) -@@ -221,3 +221,9 @@ - { - return spairinv[code]; - } -+ -+ -+void Alphabet::setSymbol(int symbol, wstring newSymbolString) { -+ //Should be a special character! -+ if (symbol < 0) slexicinv[-symbol-1] = newSymbolString; -+} -Index: lttoolbox/lt-tmxproc.1 -=================================================================== ---- lttoolbox/lt-tmxproc.1 (revision 21745) -+++ lttoolbox/lt-tmxproc.1 (working copy) -@@ -30,5 +30,4 @@ - .SH BUGS - Lots of...lurking in the dark and waiting for you! - .SH AUTHOR --(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights --reserved. -+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. -Index: lttoolbox/lt-comp.1 -=================================================================== ---- lttoolbox/lt-comp.1 (revision 21745) -+++ lttoolbox/lt-comp.1 (working copy) -@@ -10,10 +10,30 @@ - .SH SYNOPSIS - .B lt-comp - [ -+.B \-a \fR| -+.B \-v \fR| -+.B \-l \fR| -+.B \-r \fR| -+.B \-h -+] -+[ - .B lr \fR| - .B rl - ] dictionary_file output_file - .PP -+.B lt-comp -+[ -+.B \-\-alt \fR| -+.B \-\-var \fR| -+.B \-\-var\-left \fR| -+.B \-\-var\-right \fR| -+.B \-\-help -+] -+[ -+.B lr \fR| -+.B rl -+] dictionary_file output_file -+.PP - .SH DESCRIPTION - .BR lt-comp - Is the application responsible of compiling dictionaries used by -@@ -23,6 +43,32 @@ - .PP - .SH OPTIONS - .TP -+.B \-a, \-\-alt -+Sets the value of the \fIalt\fR attribute to use in compilation. -+ -+Note that if no value is set, all entries containing an \fIalt\fR -+attribute are omitted. -+.TP -+.B \-v, \-\-var -+Sets the value of the \fIv\fR attribute to use in compilation. -+This should only be used with monodixes; for bidixes, see \-l and \-r. -+ -+Note that if no value is set, all entries containing a \fIv\fR -+attribute are considered to be \fIleft-to-right\fR. -+.TP -+.B \-l, \-\-var\-left -+Sets the value of the \fIvl\fR attribute for use in compilation of bidixes. -+"Left" here refers to the side of the dictionary, so this option is only valid -+in \fIrl\fR mode. -+.TP -+.B \-r, \-\-var\-right -+Sets the value of the \fIvr\fR attribute for use in compilation of bidixes. -+"Right" here refers to the side of the dictionary, so this option is only valid -+in \fIlr\fR mode. -+.TP -+.B \-h, \-\-help -+Prints a short help message -+.TP - .B lr - The resulting transducer will process dictionary entries - \fIleft-to-right\fR. -@@ -45,5 +91,4 @@ - .SH BUGS - Lots of...lurking in the dark and waiting for you! - .SH AUTHOR --(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights --reserved. -+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. -Index: lttoolbox/lt_locale.h -=================================================================== ---- lttoolbox/lt_locale.h (revision 21745) -+++ lttoolbox/lt_locale.h (working copy) -@@ -16,6 +16,7 @@ - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. - */ -+ - #ifndef _MYLOCALE_ - #define _MYLOCALE_ - -Index: lttoolbox/expander.h -=================================================================== ---- lttoolbox/expander.h (revision 21745) -+++ lttoolbox/expander.h (working copy) -@@ -42,6 +42,26 @@ - xmlTextReaderPtr reader; - - /** -+ * The alt value -+ */ -+ wstring alt; -+ -+ /** -+ * The variant value (monodix) -+ */ -+ wstring variant; -+ -+ /** -+ * The variant value (left side of bidix) -+ */ -+ wstring variant_left; -+ -+ /** -+ * The variant value (right side of bidix) -+ */ -+ wstring variant_right; -+ -+ /** - * The paradigm being compiled - */ - wstring current_paradigm; -@@ -186,6 +206,29 @@ - * Compile dictionary to letter transducers - */ - void expand(string const &fichero, FILE *output); -+ /** -+ * Set the alt value to use in compilation -+ * @param a the value -+ */ -+ void setAltValue(string const &a); -+ -+ /** -+ * Set the variant value to use in expansion -+ * @param v the value -+ */ -+ void setVariantValue(string const &v); -+ -+ /** -+ * Set the variant_left value to use in expansion -+ * @param v the value -+ */ -+ void setVariantLeftValue(string const &v); -+ -+ /** -+ * Set the variant_right value to use in expansion -+ * @param v the value -+ */ -+ void setVariantRightValue(string const &v); - }; - - -Index: lttoolbox/transducer.cc -=================================================================== ---- lttoolbox/transducer.cc (revision 21745) -+++ lttoolbox/transducer.cc (working copy) -@@ -18,6 +18,7 @@ - */ - #include - #include -+#include - #include - #include - -@@ -187,6 +188,13 @@ - void - Transducer::setFinal(int const state, bool valor) - { -+ int initial_copy = getInitial(); -+/* -+ if(state == initial_copy) -+ { -+ wcerr << L"Setting initial state to final" << endl; -+ } -+*/ - if(valor) - { - finals.insert(state); -@@ -609,3 +617,119 @@ - finals.clear(); - finals.insert(tmp); - } -+ -+void -+Transducer::show(Alphabet &alphabet, FILE *output, int const epsilon_tag) -+{ -+ joinFinals(epsilon_tag); -+ -+ map > temporal; -+ -+ for(map >::iterator it = transitions.begin(); it != transitions.end(); it++) -+ { -+ multimap aux = it->second; -+ -+ for(multimap::iterator it2 = aux.begin(); it2 != aux.end(); it2++) -+ { -+ pair t = alphabet.decode(it2->first); -+ fwprintf(output, L"%d\t", it->first); -+ fwprintf(output, L"%d\t", it2->second); -+ wstring l = L""; -+ alphabet.getSymbol(l, t.first); -+ if(l == L"") // If we find an epsilon -+ { -+ fwprintf(output, L"ε\t", l.c_str()); -+ } -+ else -+ { -+ fwprintf(output, L"%S\t", l.c_str()); -+ } -+ wstring r = L""; -+ alphabet.getSymbol(r, t.second); -+ if(r == L"") // If we find an epsilon -+ { -+ fwprintf(output, L"ε\t", r.c_str()); -+ } -+ else -+ { -+ fwprintf(output, L"%S\t", r.c_str()); -+ } -+ fwprintf(output, L"\n"); -+ } -+ } -+ -+ for(set::iterator it3 = finals.begin(); it3 != finals.end(); it3++) -+ { -+ fwprintf(output, L"%d\n", *it3); -+ } -+} -+ -+int -+Transducer::getStateSize(int const state) -+{ -+ set states; -+ set myclosure1 = closure(state, 0); -+ states.insert(myclosure1.begin(), myclosure1.end()); -+ int num_transitions = 0; -+ -+ for(set::iterator it2 = states.begin(); it2 != states.end(); it2++) -+ { -+ num_transitions += transitions[*it2].size(); -+ } -+ -+ return num_transitions; -+} -+ -+bool -+Transducer::recognise(wstring patro, Alphabet &a, FILE *err) -+{ -+ bool accepted = false; -+ set states ; -+ -+ set myclosure1 = closure(getInitial(), 0); -+ states.insert(myclosure1.begin(), myclosure1.end()); -+ // For each of the characters in the input string -+ for(wstring::iterator it = patro.begin(); it != patro.end(); it++) -+ { -+ set new_state; //Transducer::closure(int const state, int const epsilon_tag) -+ int sym = *it; -+ // For each of the current alive states -+ //fwprintf(err, L"step: %S %C (%d)\n", patro.c_str(), *it, sym); -+ for(set::iterator it2 = states.begin(); it2 != states.end(); it2++) -+ { -+ multimap p = transitions[*it2]; -+ // For each of the transitions in the state -+ -+ for(multimap::iterator it3 = p.begin(); it3 != p.end(); it3++) -+ { -+ -+ pair t = a.decode(it3->first); -+ wstring l = L""; -+ a.getSymbol(l, t.first); -+ //wstring r = L""; -+ //a.getSymbol(r, t.second); -+ -+ //fwprintf(err, L" -> state: %d, trans: %S:%S, targ: %d\n", *it2, (l == L"") ? L"ε" : l.c_str(), (r == L"") ? L"ε" : r.c_str(), it3->second); -+ //if(l.find(*it) != wstring::npos || l == L"" ) -+ if(l.find(*it) != wstring::npos) -+ { -+ set myclosure = closure(it3->second, 0); -+ //wcerr << L"Before closure alives: " <::iterator it4 = states.begin(); it4 != states.end(); it4++) -+ { -+ if(isFinal(*it4)) -+ { -+ accepted = true; -+ } -+ } -+ -+ return accepted; -+} -+ -Index: lttoolbox/pool.h -=================================================================== ---- lttoolbox/pool.h (revision 21745) -+++ lttoolbox/pool.h (working copy) -@@ -1,175 +0,0 @@ --/* -- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante -- * -- * This program is free software; you can redistribute it and/or -- * modify it under the terms of the GNU General Public License as -- * published by the Free Software Foundation; either version 2 of the -- * License, or (at your option) any later version. -- * -- * This program is distributed in the hope that it will be useful, but -- * WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- * General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with this program; if not, write to the Free Software -- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -- * 02111-1307, USA. -- */ --#ifndef _GENERIC_POOL_ --#define _GENERIC_POOL_ -- --#include -- --using namespace std; -- --/** -- * Pool of T objects -- */ --template --class Pool --{ --private: -- /** -- * Free pointers to objects -- */ -- list free; -- -- /** -- * Currently created objects -- */ -- list created; -- -- /** -- * copy method -- * @param other pool object -- */ -- void copy(Pool const &p) -- { -- created = p.created; -- -- // all new members are available -- for(typename list::iterator it = created.begin(), limit = created.end(); -- it != limit; it++) -- { -- free.push_back(&(*it)); -- } -- } -- -- /** -- * destroy method -- */ -- void destroy() -- { -- // do nothing -- } -- -- /** -- * Allocate a pool of nelems size -- * @param nelems initial size of the pool -- */ -- void init(unsigned int const nelems) -- { -- created.clear(); -- free.clear(); -- T tmp; -- for(unsigned int i = 0; i != nelems; i++) -- { -- created.push_front(tmp); -- free.push_front(&(*(created.begin()))); -- } -- } -- -- /** -- * Allocate a pool of nelems size with objects equal to 'object' -- * @param nelems initial size of the pool -- * @param object initial value of the objects in the pool -- */ -- void init(unsigned int const nelems, T const &object) -- { -- created.clear(); -- free.clear(); -- for(unsigned int i = 0; i != nelems; i++) -- { -- created.push_front(object); -- free.push_front(&(*(created.begin()))); -- } -- } -- -- --public: -- -- /** -- * Constructor -- */ -- Pool() -- { -- init(1); -- } -- -- /** -- * Parametrized constructor -- * @param nelems initial size of the pool -- * @param object initial value of the objects in the pool -- */ -- Pool(unsigned int const nelems, T const &object) -- { -- init(nelems, object); -- } -- -- /** -- * Parametrized constructor -- * @param nelems initial size of the pool -- */ -- Pool(unsigned int const nelems) -- { -- init(nelems); -- } -- -- /** -- * Destructor -- */ -- ~Pool() -- { -- destroy(); -- } -- -- /** -- * Copy constructor -- */ -- Pool(Pool const &p) -- { -- copy(p); -- } -- -- /** -- * Allocate a pointer to a free 'new' object. -- * @return pointer to the object -- */ -- T * get() -- { -- if(free.size() != 0) -- { -- T *result = *(free.begin()); -- free.erase(free.begin()); -- return result; -- } -- else -- { -- T tmp; -- created.push_front(tmp); -- return &(*(created.begin())); -- } -- } -- -- /** -- * Release a no more needed instance of a pooled object -- * @param item the no more needed instance of the object -- */ -- void release(T *item) -- { -- free.push_front(item); -- } --}; -- --#endif -Index: lttoolbox/compiler.h -=================================================================== ---- lttoolbox/compiler.h (revision 21745) -+++ lttoolbox/compiler.h (working copy) -@@ -44,6 +44,26 @@ - xmlTextReaderPtr reader; - - /** -+ * The alt value -+ */ -+ wstring alt; -+ -+ /** -+ * The variant value (monodix) -+ */ -+ wstring variant; -+ -+ /** -+ * The variant value (left side of bidix) -+ */ -+ wstring variant_left; -+ -+ /** -+ * The variant value (right side of bidix) -+ */ -+ wstring variant_right; -+ -+ /** - * The paradigm being compiled - */ - wstring current_paradigm; -@@ -65,6 +85,16 @@ - wstring letters; - - /** -+ * Set verbose mode: warnings which may or may not be correct -+ */ -+ bool verbose; -+ -+ /** -+ * First element (of an entry) -+ */ -+ bool first_element; -+ -+ /** - * Identifier of all the symbols during the compilation - */ - Alphabet alphabet; -@@ -264,10 +294,14 @@ - static wstring const COMPILER_LEMMA_ATTR; - static wstring const COMPILER_IGNORE_ATTR; - static wstring const COMPILER_IGNORE_YES_VAL; -+ static wstring const COMPILER_ALT_ATTR; -+ static wstring const COMPILER_V_ATTR; -+ static wstring const COMPILER_VL_ATTR; -+ static wstring const COMPILER_VR_ATTR; - - - /** -- * Copnstructor -+ * Constructor - */ - Compiler(); - -@@ -292,6 +326,35 @@ - * @param fd the stream where write the result - */ - void write(FILE *fd); -+ -+ /** -+ * Set verbose output -+ */ -+ void setVerbose(bool verbosity = false); -+ -+ /** -+ * Set the alt value to use in compilation -+ * @param a the value -+ */ -+ void setAltValue(string const &a); -+ -+ /** -+ * Set the variant value to use in compilation -+ * @param v the value -+ */ -+ void setVariantValue(string const &v); -+ -+ /** -+ * Set the variant_left value to use in compilation -+ * @param v the value -+ */ -+ void setVariantLeftValue(string const &v); -+ -+ /** -+ * Set the variant_right value to use in compilation -+ * @param v the value -+ */ -+ void setVariantRightValue(string const &v); - }; - - -Index: lttoolbox/lt-tmxcomp.1 -=================================================================== ---- lttoolbox/lt-tmxcomp.1 (revision 21745) -+++ lttoolbox/lt-tmxcomp.1 (working copy) -@@ -38,5 +38,4 @@ - .SH BUGS - Lots of...lurking in the dark and waiting for you! - .SH AUTHOR --(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights --reserved. -+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. -Index: lttoolbox/alphabet.h -=================================================================== ---- lttoolbox/alphabet.h (revision 21745) -+++ lttoolbox/alphabet.h (working copy) -@@ -145,6 +145,13 @@ - */ - bool isTag(int const symbol) const; - -+ /** -+ * Sets an already existing symbol to represent a new value -+ * @param symbol the code of the symbol to set -+ * @param newSymbolString the new string for this symbol -+ */ -+ void setSymbol(int symbol, wstring newSymbolString); -+ - pair const & decode(int const code) const; - - }; -Index: lttoolbox/state.h -=================================================================== ---- lttoolbox/state.h (revision 21745) -+++ lttoolbox/state.h (working copy) -@@ -19,6 +19,7 @@ - #ifndef _STATE_ - #define _STATE_ - -+#include - #include - #include - #include -@@ -26,7 +27,9 @@ - - #include - #include --#include -+#include -+#include -+#include - - using namespace std; - -@@ -43,7 +46,7 @@ - { - Node *where; - vector *sequence; -- bool dirty; -+ bool dirty; // What does "dirty" mean ? - - TNodeState(Node * const &w, vector * const &s, bool const &d): where(w), sequence(s), dirty(d){} - TNodeState & operator=(TNodeState const &other) -@@ -58,17 +61,6 @@ - vector state; - - /** -- * Pool of wchar_t vectors, for efficience (static class) -- */ -- Pool > *pool; -- -- /** -- * Copy function -- * @param s the state to be copied -- */ -- void copy(State const &s); -- -- /** - * Destroy function - */ - void destroy(); -@@ -86,6 +78,8 @@ - */ - void apply(int const input, int const alt); - -+ void apply(int const input, int const alt1, int const alt2); -+ - /** - * Calculate the epsilon closure over the current state, replacing - * its content. -@@ -92,11 +86,21 @@ - */ - void epsilonClosure(); - -+ bool lastPartHasRequiredSymbol(const vector &seq, int requiredSymbol, int separationSymbol); -+ - public: -+ - /** -+ * Copy function -+ * @param s the state to be copied -+ */ -+ void copy(State const &s); -+ -+ -+ /** - * Constructor - */ -- State(Pool > *); -+ State(); - - /** - * Destructor -@@ -135,6 +139,13 @@ - */ - void step(int const input, int const alt); - -+ void step(int const input, int const alt1, int const alt2); -+ -+ void step_case(wchar_t val, bool caseSensitive); -+ -+ void step_case(wchar_t val, wchar_t val2, bool caseSensitive); -+ -+ - /** - * Init the state with the initial node and empty output - * @param initial the initial node of the transducer -@@ -142,6 +153,21 @@ - void init(Node *initial); - - /** -+ * Remove states not containing a specific symbol in their last 'part', and states -+ * with more than a number of 'parts' -+ * @param requieredSymbol the symbol requiered in the last part -+ * @param separationSymbol the symbol that represent the separation between two parts -+ * @param compound_max_elements the maximum part number allowed -+ */ -+ void pruneCompounds(int requiredSymbol, int separationSymbol, int compound_max_elements); -+ -+ /** -+ * Remove states containing a forbidden symbol -+ * @param forbiddenSymbol the symbol forbidden -+ */ -+ void pruneStatesWithForbiddenSymbol(int forbiddenSymbol); -+ -+ /** - * Print all outputs of current parsing, preceded by a bar '/', - * from the final nodes of the state - * @param finals the set of final nodes -@@ -156,8 +182,8 @@ - wstring filterFinals(set const &finals, Alphabet const &a, - set const &escaped_chars, - bool uppercase = false, -- bool firstupper = false, -- int firstchar = 0) const; -+ bool firstupper = false, -+ int firstchar = 0) const; - - /** - * Same as previous one, but the output is adapted to the SAO system -@@ -173,11 +199,44 @@ - wstring filterFinalsSAO(set const &finals, Alphabet const &a, - set const &escaped_chars, - bool uppercase = false, -- bool firstupper = false, -- int firstchar = 0) const; -+ bool firstupper = false, -+ int firstchar = 0) const; - - - /** -+ * Same as previous one, but the output is adapted to the LRX system -+ * @param finals the set of final nodes -+ * @param a the alphabet to decode strings -+ * @param escaped_chars the set of chars to be preceded with one -+ * backslash -+ * @param uppercase true if the word is uppercase -+ * @param firstupper true if the first letter of a word is uppercase -+ * @param firstchar first character of the word -+ * @return the result of the transduction -+ */ -+ -+ set > > filterFinalsLRX(set const &finals, Alphabet const &a, -+ set const &escaped_chars, -+ bool uppercase = false, -+ bool firstupper = false, -+ int firstchar = 0) const; -+ -+ -+ -+ -+ -+ /** -+ * Find final states, remove those that not has a requiredSymbol and 'restart' each of them as the -+ * set of initial states, but remembering the sequence and adding a separationSymbol -+ * @param finals -+ * @param requiredSymbol -+ * @param restart_state -+ * @param separationSymbol -+ */ -+ void restartFinals(const set &finals, int requiredSymbol, State *restart_state, int separationSymbol); -+ -+ -+ /** - * Returns true if at least one record of the state references a - * final node of the set - * @param finals set of final nodes @return -@@ -185,6 +244,11 @@ - */ - bool isFinal(set const &finals) const; - -+ /** -+ * Return the full states string (to allow debuging...) using a Java ArrayList.toString style -+ */ -+ wstring getReadableString(const Alphabet &a); -+ - wstring filterFinalsTM(set const &finals, - Alphabet const &alphabet, - set const &escaped_chars, -Index: lttoolbox/Makefile.am -=================================================================== ---- lttoolbox/Makefile.am (revision 21745) -+++ lttoolbox/Makefile.am (working copy) -@@ -2,7 +2,7 @@ - h_sources = alphabet.h buffer.h compiler.h compression.h \ - entry_token.h expander.h fst_processor.h lt_locale.h ltstr.h \ - match_exe.h match_node.h match_state.h my_stdio.h node.h \ -- pattern_list.h pool.h regexp_compiler.h sorted_vector.h state.h \ -+ pattern_list.h regexp_compiler.h sorted_vector.h state.h \ - transducer.h trans_exe.h xml_parse_util.h exception.h tmx_compiler.h - cc_sources = alphabet.cc compiler.cc compression.cc entry_token.cc \ - expander.cc fst_processor.cc lt_locale.cc match_exe.cc \ -@@ -13,7 +13,7 @@ - library_includedir = $(includedir)/$(GENERIC_LIBRARY_NAME)-$(GENERIC_API_VERSION)/$(GENERIC_LIBRARY_NAME) - library_include_HEADERS = $(h_sources) - --bin_PROGRAMS = lt-comp lt-proc lt-expand lt-tmxcomp lt-tmxproc -+bin_PROGRAMS = lt-comp lt-proc lt-expand lt-tmxcomp lt-tmxproc lt-print - instdir = lttoolbox - - lib_LTLIBRARIES= liblttoolbox3.la -@@ -26,6 +26,10 @@ - - lttoolbox_DATA = dix.dtd - -+lt_print_SOURCES = lt_print.cc -+lt_print_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la -+lt_print_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS) -+ - lt_comp_SOURCES = lt_comp.cc - lt_comp_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la - lt_comp_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS) -@@ -46,8 +50,18 @@ - lt_tmxproc_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la - lt_tmxproc_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS) - --man_MANS = lt-comp.1 lt-expand.1 lt-proc.1 lt-tmxcomp.1 lt-tmxproc.1 -+#lt-validate-dictionary: Makefile.am validate-header.sh -+# @echo "Creating lt-validate-dictionary script" -+# @echo "#!$(BASH)" > $@ -+# @cat validate-header.sh >> $@ -+# @echo "$(XMLLINT) --dtdvalid $(apertiumdir)/dix.dtd --noout \$$FILE1 && exit 0;" >> $@ -+# @echo "exit 1;" >> $@ -+# @chmod a+x $@ - -+ -+ -+man_MANS = lt-comp.1 lt-expand.1 lt-proc.1 lt-tmxcomp.1 lt-tmxproc.1 lt-print.1 -+ - INCLUDES = -I$(top_srcdir) $(LTTOOLBOX_CFLAGS) - CLEANFILES = *~ - -Index: lttoolbox/lt-print.1 -=================================================================== ---- lttoolbox/lt-print.1 (revision 0) -+++ lttoolbox/lt-print.1 (revision 44914) -@@ -0,0 +1,34 @@ -+.TH lt-print 1 2006-03-08 "" "" -+.SH NAME -+lt-print \- This application is part of the lexical processing modules -+and tools ( -+.B lttoolbox -+) -+.PP -+This tool is part of the apertium machine translation -+architecture: \fBhttp://www.apertium.org\fR. -+.SH SYNOPSIS -+.B lt-print -+ bin_file -+.PP -+.SH DESCRIPTION -+.BR lt-print -+Is the application responsible for printing compiled dictionaries in -+ATT format. -+.PP -+.B bin_file -+The compiled input file . -+.PP -+.B output_file -+The transducer in ATT format . -+ -+.SH SEE ALSO -+.I lt-comp\fR(1), -+.I lt-proc\fR(1), -+.I lt-expand\fR(1), -+.I apertium-tagger\fR(1), -+.I apertium\fR(1). -+.SH BUGS -+Lots of...lurking in the dark and waiting for you! -+.SH AUTHOR -+(c) 2005--2012 Universitat d'Alacant / Universidad de Alicante. -Index: lttoolbox/lt_print.cc -=================================================================== ---- lttoolbox/lt_print.cc (revision 0) -+++ lttoolbox/lt_print.cc (revision 44914) -@@ -0,0 +1,106 @@ -+/* -+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License as -+ * published by the Free Software Foundation; either version 2 of the -+ * License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, but -+ * WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -+ * 02111-1307, USA. -+ */ -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+using namespace std; -+ -+void endProgram(char *name) -+{ -+ if(name != NULL) -+ { -+ cout << basename(name) << " v" << PACKAGE_VERSION <<": dump a transducer to text in ATT format" << endl; -+ cout << "USAGE: " << basename(name) << " bin_file " << endl; -+ } -+ exit(EXIT_FAILURE); -+} -+ -+ -+int main(int argc, char *argv[]) -+{ -+ if(argc != 2) -+ { -+ endProgram(argv[0]); -+ } -+ -+ LtLocale::tryToSetLocale(); -+ -+ -+ FILE *input = fopen(argv[1], "r"); -+ -+ Alphabet new_alphabet; -+ set alphabetic_chars; -+ -+ map transducers; -+ -+ // letters -+ int len = Compression::multibyte_read(input); -+ while(len > 0) -+ { -+ alphabetic_chars.insert(static_cast(Compression::multibyte_read(input))); -+ len--; -+ } -+ -+ // symbols -+ new_alphabet.read(input); -+ -+ len = Compression::multibyte_read(input); -+ -+ while(len > 0) -+ { -+ int len2 = Compression::multibyte_read(input); -+ wstring name = L""; -+ while(len2 > 0) -+ { -+ name += static_cast(Compression::multibyte_read(input)); -+ len2--; -+ } -+ transducers[name].read(input); -+ -+ len--; -+ } -+ -+ ///////////////////// -+ -+ FILE *output = stdout; -+ map::iterator penum = transducers.end(); -+ penum--; -+ for(map::iterator it = transducers.begin(); it != transducers.end(); it++) -+ { -+ //it->second.minimize(); -+ it->second.show(new_alphabet, output); -+ if(it != penum) -+ { -+ fwprintf(output, L"--\n", it->first.c_str()); -+ } -+ } -+ -+ fclose(input); -+ -+ return 0; -+} diff --git a/lttoolbox.spec b/lttoolbox.spec index 72b4a69..0a7eae4 100644 --- a/lttoolbox.spec +++ b/lttoolbox.spec @@ -1,17 +1,13 @@ Summary: Augmented letter transducer tools for natural language processing Summary(pl.UTF-8): Narzędzia do przetwarzania słów w językach naturalnych Name: lttoolbox -Version: 3.2.0 -%define subver svn20130412 -%define rel 1 -Release: 2.%{subver}.1 +Version: 3.3.1 +Release: 1 License: GPL v2+ Group: Applications/Text Source0: http://downloads.sourceforge.net/apertium/%{name}-%{version}.tar.gz -# Source0-md5: 708e7de837ed363f7103035ef2849fe4 -Patch0: %{name}-svn20130412.patch -Patch1: %{name}-soname.patch -Patch2: %{name}-opt.patch +# Source0-md5: d50479b2376a4839b7acac352505623e +Patch0: %{name}-opt.patch URL: http://wiki.apertium.org/wiki/Lttoolbox BuildRequires: autoconf >= 2.52 BuildRequires: automake @@ -62,9 +58,7 @@ Statyczna biblioteka lttoolbox. %prep %setup -q -%patch0 -p0 -%patch1 -p1 -%patch2 -p1 +%patch0 -p1 %build %{__libtoolize} @@ -97,8 +91,9 @@ rm -rf $RPM_BUILD_ROOT %attr(755,root,root) %{_bindir}/lt-proc %attr(755,root,root) %{_bindir}/lt-tmxcomp %attr(755,root,root) %{_bindir}/lt-tmxproc -%attr(755,root,root) %{_libdir}/liblttoolbox3-3.2.so.*.*.* -%attr(755,root,root) %ghost %{_libdir}/liblttoolbox3-3.2.so.1 +%attr(755,root,root) %{_bindir}/lt-trim +%attr(755,root,root) %{_libdir}/liblttoolbox3-3.3.so.*.*.* +%attr(755,root,root) %ghost %{_libdir}/liblttoolbox3-3.3.so.0 %{_datadir}/lttoolbox %{_mandir}/man1/lt-comp.1* %{_mandir}/man1/lt-expand.1* @@ -106,13 +101,14 @@ rm -rf $RPM_BUILD_ROOT %{_mandir}/man1/lt-proc.1* %{_mandir}/man1/lt-tmxcomp.1* %{_mandir}/man1/lt-tmxproc.1* +%{_mandir}/man1/lt-trim.1* %files devel %defattr(644,root,root,755) %attr(755,root,root) %{_libdir}/liblttoolbox3.so %{_libdir}/liblttoolbox3.la -%{_includedir}/lttoolbox-3.2 -%{_pkgconfigdir}/lttoolbox-3.2.pc +%{_includedir}/lttoolbox-3.3 +%{_pkgconfigdir}/lttoolbox.pc %files static %defattr(644,root,root,755) -- 2.44.0