FreeLing
4.0
|
00001 00003 // 00004 // FreeLing - Open Source Language Analyzers 00005 // 00006 // Copyright (C) 2014 TALP Research Center 00007 // Universitat Politecnica de Catalunya 00008 // 00009 // This library is free software; you can redistribute it and/or 00010 // modify it under the terms of the GNU Affero General Public 00011 // License as published by the Free Software Foundation; either 00012 // version 3 of the License, or (at your option) any later version. 00013 // 00014 // This library is distributed in the hope that it will be useful, 00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 // Affero General Public License for more details. 00018 // 00019 // You should have received a copy of the GNU Affero General Public 00020 // License along with this library; if not, write to the Free Software 00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00022 // 00023 // contact: Lluis Padro (padro@lsi.upc.es) 00024 // TALP Research Center 00025 // despatx C6.212 - Campus Nord UPC 00026 // 08034 Barcelona. SPAIN 00027 // 00029 00030 #ifndef _PROBABILITIES 00031 #define _PROBABILITIES 00032 00033 #include <map> 00034 00035 #include "freeling/windll.h" 00036 #include "freeling/morfo/language.h" 00037 #include "freeling/morfo/processor.h" 00038 #include "freeling/morfo/tagset.h" 00039 00040 namespace freeling { 00041 00042 const std::wstring RE_FZ=L"^[FZ]"; 00043 00048 00049 class WINDLL probabilities : public processor { 00050 private: 00052 freeling::regexp RE_PunctNum; 00053 00055 double ProbabilityThreshold; 00057 const tagset *Tags; 00058 00061 double BiassSuffixes; 00062 00064 double LidstoneLambdaLexical; 00065 double LidstoneLambdaClass; 00066 00068 bool activate_guesser; 00069 00071 std::map<std::wstring,double> single_tags; 00073 std::map<std::wstring,std::map<std::wstring,double> > class_tags; 00075 std::map<std::wstring,std::map<std::wstring,double> > lexical_tags; 00077 std::map<std::wstring,double> unk_tags; 00079 std::map<std::wstring,std::map<std::wstring,double> > unk_suffs; 00081 double theeta; 00083 std::wstring::size_type long_suff; 00084 00085 // list of default preferences of lemma/pos to sort analysis 00086 // in case tagger leaves residual ambiguity 00087 std::map<std::wstring,std::wstring> lemma_prefs; 00088 std::map<std::wstring,std::wstring> pos_prefs; 00089 00091 void smoothing(word &) const; 00093 double compute_probability(const std::wstring &, double, const std::wstring &) const; 00095 double guesser(word &, double) const; 00097 bool less(const analysis &a1, const analysis &a2) const; 00099 void sort_list(std::list<analysis> &ls) const; 00100 00101 public: 00103 probabilities(const std::wstring &, double); 00105 ~probabilities(); 00106 00108 void annotate_word(word &) const; 00109 00111 void set_activate_guesser(bool); 00112 00114 void analyze(sentence &) const; 00115 00117 using processor::analyze; 00118 }; 00119 00120 } // namespace 00121 00122 #endif