FreeLing  4.0
numbers_modules.h
Go to the documentation of this file.
00001 
00002 //
00003 //    FreeLing - Open Source Language Analyzers
00004 //
00005 //    Copyright (C) 2014   TALP Research Center
00006 //                         Universitat Politecnica de Catalunya
00007 //
00008 //    This library is free software; you can redistribute it and/or
00009 //    modify it under the terms of the GNU Affero General Public
00010 //    License as published by the Free Software Foundation; either
00011 //    version 3 of the License, or (at your option) any later version.
00012 //
00013 //    This library is distributed in the hope that it will be useful,
00014 //    but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016 //    Affero General Public License for more details.
00017 //
00018 //    You should have received a copy of the GNU Affero General Public
00019 //    License along with this library; if not, write to the Free Software
00020 //    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00021 //
00022 //    contact: Lluis Padro (padro@lsi.upc.es)
00023 //             TALP Research Center
00024 //             despatx C6.212 - Campus Nord UPC
00025 //             08034 Barcelona.  SPAIN
00026 //
00028 
00029 #ifndef _NUMBERS_MOD
00030 #define _NUMBERS_MOD
00031 
00032 #include <map>
00033 
00034 #include "freeling/regexp.h"
00035 #include "freeling/morfo/language.h"
00036 #include "freeling/morfo/automat.h"
00037 
00038 namespace freeling {
00039 
00040 #define RE_NUM L"^(\\d{1,3}(\\"+MACO_Thousand+L"\\d{3})*|\\d+)(\\"+MACO_Decimal+L"\\d+)?$"
00041 #define RE_NUM_NEG L"^([\\-]?)(\\d{1,3}(\\"+MACO_Thousand+L"\\d{3})*|\\d+)(\\"+MACO_Decimal+L"\\d+)?$"
00042 #define RE_CODE L"^.*[0-9].*$"
00043 
00044   // Auxiliary, kind of code (normal CODE, e.g. "X-23-12A";  ORDinal number, e.g. "4th")
00045 #define CODE 1
00046 #define ORD 2
00047 
00048 
00052 
00053   class numbers_status : public automat_status {
00054   public:
00056     long double bilion,milion,units;
00057     int block;
00058     int iscode;
00059 
00060     // These are used only in numbers_it. !! unify process with other languages !! 
00061     long double hundreds;   // this is additional.
00062     long double thousands;  // this is additional.
00063     long double floatUnits; // "e tre quarto". Count of how many "halfs", "quartrs" we have
00064   };
00065 
00066 
00071 
00072   class numbers_module : public automat<numbers_status> {
00073 
00074   protected:
00075     // configuration options
00076     std::wstring MACO_Decimal, MACO_Thousand;
00077 
00079     std::map<std::wstring,float> value;
00081     std::map<std::wstring,int> tok;
00083     std::map<int,long double> power;
00084 
00085     freeling::regexp RE_code;
00086     freeling::regexp RE_number;
00087     freeling::regexp RE_number_neg;
00088 
00089     // reset accumulators
00090     virtual void ResetActions(numbers_status *) const;
00091  
00092   public:
00094     numbers_module(const std::wstring &, const std::wstring &);
00095   };
00096 
00101 
00102   class numbers_default : public numbers_module {
00103 
00104   private: 
00105     int ComputeToken(int,sentence::iterator&, sentence &) const;
00106     void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const;
00107     void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const;
00108 
00109   public:
00111     numbers_default(const std::wstring &, const std::wstring &);
00112   };
00113 
00114 
00119 
00120   class numbers_es : public numbers_module {
00121 
00122   private:
00123     int ComputeToken(int,sentence::iterator&, sentence &) const;
00124     void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const;
00125     void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const;
00126 
00127   public:
00129     numbers_es(const std::wstring &, const std::wstring &);
00130   };
00131 
00132 
00137 
00138   class numbers_ca : public numbers_module {
00139 
00140   private:
00141     int ComputeToken(int,sentence::iterator&, sentence &) const;
00142     void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const;
00143     void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const;
00144 
00145   public:
00147     numbers_ca(const std::wstring &, const std::wstring &);
00148   };
00149 
00154 
00155   class numbers_gl : public numbers_module {
00156 
00157   private:
00158     int ComputeToken(int,sentence::iterator&, sentence &) const;
00159     void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const;
00160     void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const;
00161 
00162   public:
00164     numbers_gl(const std::wstring &, const std::wstring &);
00165   };
00166 
00171 
00172   class numbers_pt : public numbers_module {
00173 
00174   private:
00175     int ComputeToken(int,sentence::iterator&, sentence &) const;
00176     void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const;
00177     void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const;
00178 
00179   public:
00181     numbers_pt(const std::wstring &, const std::wstring &);
00182   };
00183 
00184 
00189 
00190   class numbers_it : public numbers_module {
00191 
00192   private:
00193     int ComputeToken(int,sentence::iterator&, sentence &) const;
00194     void ResetActions(numbers_status *) const;
00195     void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const;
00196     void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const;
00197 
00198   public:
00200     numbers_it(const std::wstring &, const std::wstring &);
00201   };
00202 
00203 
00208 
00209   class numbers_en : public numbers_module {
00210 
00211   private: 
00212     int ComputeToken(int,sentence::iterator&, sentence &) const;
00213     void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const;
00214     void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const;
00215 
00216   public:
00218     numbers_en(const std::wstring &, const std::wstring &);
00219   };
00220 
00225 
00226   class numbers_ru : public numbers_module 
00227   {
00228   private: 
00229     int ComputeToken(int,sentence::iterator&, sentence &) const;
00230     void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const;
00231     void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const;
00232 
00233   public:
00234     numbers_ru(const std::wstring &, const std::wstring &);
00235   };
00236 
00241 
00242   class numbers_cs : public numbers_module 
00243   {
00244   private: 
00245     int ComputeToken(int,sentence::iterator&, sentence &) const;
00246     void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const;
00247     void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const;
00248 
00249   public:
00250     numbers_cs(const std::wstring &, const std::wstring &);
00251   };
00252 
00253 
00258 #define DEDEBUG
00259   class numbers_de : public numbers_module 
00260   {
00261   private: 
00262     int ComputeToken(int,sentence::iterator&, sentence &) const;
00263     void StateActions(int, int, int, sentence::const_iterator, numbers_status *) const;
00264     void SetMultiwordAnalysis(sentence::iterator, int, const numbers_status *) const;
00265 
00266 #ifdef DEDEBUG
00267    std::map<int, std::wstring> stateNames;
00268    std::map<int, std::wstring> tokenNames;
00269    std::wstring tokenName(const int token) const;
00270    std::wstring stateName(const int state) const;
00271 #endif
00272 
00273    // mutable int lastValue;
00274   public:
00275     numbers_de(const std::wstring &, const std::wstring &);
00276   };
00277 
00278 } // namespace
00279 
00280 #endif
00281