FreeLing
4.0
|
00001 00002 // 00003 // FreeLing - Open Source Language Analyzers 00004 // 00005 // Copyright (C) 2014 TALP Research Center 00006 // Universitat Politecnica de Catalunya 00007 // 00008 // This library is free software; you can redistribute it and/or 00009 // modify it under the terms of the GNU Affero General Public 00010 // License as published by the Free Software Foundation; either 00011 // version 3 of the License, or (at your option) any later version. 00012 // 00013 // This library is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // Affero General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU Affero General Public 00019 // License along with this library; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00021 // 00022 // contact: Lluis Padro (padro@lsi.upc.es) 00023 // TALP Research Center 00024 // despatx C6.212 - Campus Nord UPC 00025 // 08034 Barcelona. SPAIN 00026 // 00028 00029 #ifndef _DICTIONARY 00030 #define _DICTIONARY 00031 00032 #include <map> 00033 00034 #include "freeling/windll.h" 00035 #include "freeling/morfo/language.h" 00036 #include "freeling/morfo/processor.h" 00037 #include "freeling/morfo/database.h" 00038 #include "freeling/morfo/suffixes.h" 00039 #ifndef NO_LIBFOMA 00040 #include "freeling/morfo/compounds.h" 00041 #endif 00042 00043 namespace freeling { 00044 00051 00052 class WINDLL dictionary : public processor { 00053 00054 private: 00056 bool InverseDict; 00057 00059 bool RetokenizeContractions; 00060 bool AffixAnalysis; 00061 bool CompoundAnalysis; 00062 00064 affixes *suf; 00065 00066 #ifndef NO_LIBFOMA 00067 00068 compounds *comp; 00069 #endif 00070 00072 database *morfodb; 00073 database *inverdb; 00074 00077 bool check_contracted(const std::wstring &, std::wstring, 00078 std::wstring, std::list<word> &) const; 00079 00081 std::list<std::wstring> tag_combinations(std::list<std::wstring>::const_iterator, std::list<std::wstring>::const_iterator) 00082 const; 00084 bool parse_dict_entry(const std::wstring &, std::list<std::pair<std::wstring,std::list<std::wstring> > >&) const; 00086 std::wstring compact_data(const std::list<std::pair<std::wstring,std::list<std::wstring> > > &) const; 00087 00088 public: 00090 dictionary(const std::wstring &Lang, const std::wstring &dicFile, 00091 const std::wstring &sufFile, const std::wstring &compFile, 00092 bool invDic=false, bool retok=true); 00094 ~dictionary(); 00095 00097 void add_analysis(const std::wstring &, const analysis &); 00099 void remove_entry(const std::wstring &); 00100 00102 void set_retokenize_contractions(bool); 00103 void set_affix_analysis(bool); 00104 void set_compound_analysis(bool); 00105 00107 bool has_affixes() const; 00109 bool has_compounds() const; 00110 00112 void search_form(const std::wstring &, std::list<analysis> &) const; 00116 bool annotate_word(word &, std::list<word> &, bool override=false) const; 00120 void annotate_word(word &) const; 00122 std::list<std::wstring> get_forms(const std::wstring &, const std::wstring &) const; 00123 00125 void dump_dictionary(std::wostream &, bool keysonly=false) const; 00126 00128 void analyze(sentence &) const; 00129 00131 using processor::analyze; 00132 }; 00133 00134 } // namespace 00135 00136 #endif