FreeLing
4.0
|
00001 00002 // 00003 // FreeLing - Open Source Language Analyzers 00004 // 00005 // Copyright (C) 2014 TALP Research Center 00006 // Universitat Politecnica de Catalunya 00007 // 00008 // This library is free software; you can redistribute it and/or 00009 // modify it under the terms of the GNU Affero General Public 00010 // License as published by the Free Software Foundation; either 00011 // version 3 of the License, or (at your option) any later version. 00012 // 00013 // This library is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // Affero General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU Affero General Public 00019 // License along with this library; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00021 // 00022 // contact: Lluis Padro (padro@lsi.upc.es) 00023 // TALP Research Center 00024 // despatx C6.212 - Campus Nord UPC 00025 // 08034 Barcelona. SPAIN 00026 // 00028 00029 #ifndef _NUMBERS_MOD 00030 #define _NUMBERS_MOD 00031 00032 #include <map> 00033 00034 #include "freeling/regexp.h" 00035 #include "freeling/morfo/language.h" 00036 #include "freeling/morfo/automat.h" 00037 00038 namespace freeling { 00039 00040 #define RE_NUM L"^(\\d{1,3}(\\"+MACO_Thousand+L"\\d{3})*|\\d+)(\\"+MACO_Decimal+L"\\d+)?$" 00041 #define RE_NUM_NEG L"^([\\-]?)(\\d{1,3}(\\"+MACO_Thousand+L"\\d{3})*|\\d+)(\\"+MACO_Decimal+L"\\d+)?$" 00042 #define RE_CODE L"^.*[0-9].*$" 00043 00044 // Auxiliary, kind of code (normal CODE, e.g. "X-23-12A"; ORDinal number, e.g. "4th") 00045 #define CODE 1 00046 #define ORD 2 00047 00048 00052 00053 class numbers_status : public automat_status { 00054 public: 00056 long double bilion,milion,units; 00057 int block; 00058 int iscode; 00059 00060 // These are used only in numbers_it. !! unify process with other languages !! 00061 long double hundreds; // this is additional. 00062 long double thousands; // this is additional. 00063 long double floatUnits; // "e tre quarto". Count of how many "halfs", "quartrs" we have 00064 }; 00065 00066 00071 00072 class numbers_module : public automat<numbers_status> { 00073 00074 protected: 00075 // configuration options 00076 std::wstring MACO_Decimal, MACO_Thousand; 00077 00079 std::map<std::wstring,float> value; 00081 std::map<std::wstring,int> tok; 00083 std::map<int,long double> power; 00084 00085 freeling::regexp RE_code; 00086 freeling::regexp RE_number; 00087 freeling::regexp RE_number_neg; 00088 00089 // reset accumulators 00090 virtual void ResetActions(numbers_status *) const; 00091 00092 public: 00094 numbers_module(const std::wstring &, const std::wstring &); 00095 }; 00096 00101 00102 class numbers_default : public numbers_module { 00103 00104 private: 00105 int ComputeToken(int,sentence::iterator&, sentence &) const; 00106 void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const; 00107 void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const; 00108 00109 public: 00111 numbers_default(const std::wstring &, const std::wstring &); 00112 }; 00113 00114 00119 00120 class numbers_es : public numbers_module { 00121 00122 private: 00123 int ComputeToken(int,sentence::iterator&, sentence &) const; 00124 void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const; 00125 void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const; 00126 00127 public: 00129 numbers_es(const std::wstring &, const std::wstring &); 00130 }; 00131 00132 00137 00138 class numbers_ca : public numbers_module { 00139 00140 private: 00141 int ComputeToken(int,sentence::iterator&, sentence &) const; 00142 void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const; 00143 void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const; 00144 00145 public: 00147 numbers_ca(const std::wstring &, const std::wstring &); 00148 }; 00149 00154 00155 class numbers_gl : public numbers_module { 00156 00157 private: 00158 int ComputeToken(int,sentence::iterator&, sentence &) const; 00159 void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const; 00160 void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const; 00161 00162 public: 00164 numbers_gl(const std::wstring &, const std::wstring &); 00165 }; 00166 00171 00172 class numbers_pt : public numbers_module { 00173 00174 private: 00175 int ComputeToken(int,sentence::iterator&, sentence &) const; 00176 void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const; 00177 void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const; 00178 00179 public: 00181 numbers_pt(const std::wstring &, const std::wstring &); 00182 }; 00183 00184 00189 00190 class numbers_it : public numbers_module { 00191 00192 private: 00193 int ComputeToken(int,sentence::iterator&, sentence &) const; 00194 void ResetActions(numbers_status *) const; 00195 void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const; 00196 void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const; 00197 00198 public: 00200 numbers_it(const std::wstring &, const std::wstring &); 00201 }; 00202 00203 00208 00209 class numbers_en : public numbers_module { 00210 00211 private: 00212 int ComputeToken(int,sentence::iterator&, sentence &) const; 00213 void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const; 00214 void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const; 00215 00216 public: 00218 numbers_en(const std::wstring &, const std::wstring &); 00219 }; 00220 00225 00226 class numbers_ru : public numbers_module 00227 { 00228 private: 00229 int ComputeToken(int,sentence::iterator&, sentence &) const; 00230 void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const; 00231 void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const; 00232 00233 public: 00234 numbers_ru(const std::wstring &, const std::wstring &); 00235 }; 00236 00241 00242 class numbers_cs : public numbers_module 00243 { 00244 private: 00245 int ComputeToken(int,sentence::iterator&, sentence &) const; 00246 void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const; 00247 void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const; 00248 00249 public: 00250 numbers_cs(const std::wstring &, const std::wstring &); 00251 }; 00252 00253 00258 #define DEDEBUG 00259 class numbers_de : public numbers_module 00260 { 00261 private: 00262 int ComputeToken(int,sentence::iterator&, sentence &) const; 00263 void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const; 00264 void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const; 00265 00266 #ifdef DEDEBUG 00267 std::map<int, std::wstring> stateNames; 00268 std::map<int, std::wstring> tokenNames; 00269 std::wstring tokenName(const int token) const; 00270 std::wstring stateName(const int state) const; 00271 #endif 00272 00273 // mutable int lastValue; 00274 public: 00275 numbers_de(const std::wstring &, const std::wstring &); 00276 }; 00277 00278 } // namespace 00279 00280 #endif 00281