FreeLing
4.0
|
00001 00002 // 00003 // FreeLing - Open Source Language Analyzers 00004 // 00005 // Copyright (C) 2014 TALP Research Center 00006 // Universitat Politecnica de Catalunya 00007 // 00008 // This library is free software; you can redistribute it and/or 00009 // modify it under the terms of the GNU Affero General Public 00010 // License as published by the Free Software Foundation; either 00011 // version 3 of the License, or (at your option) any later version. 00012 // 00013 // This library is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // Affero General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU Affero General Public 00019 // License along with this library; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00021 // 00022 // contact: Lluis Padro (padro@lsi.upc.es) 00023 // TALP Research Center 00024 // despatx C6.212 - Campus Nord UPC 00025 // 08034 Barcelona. SPAIN 00026 // 00028 00030 // 00031 // Author: Stanilovsky Evgeny, stanilovsky@gmail.com 00032 // 00033 // 00034 // This class is just a wrapper to a regular expression engine. 00035 // All Freeling modules access regexps via this class. 00036 // 00037 // Currently, the engine is boost::xpressive, but can be changed 00038 // just writting a new version of this class (with the same API), 00039 // with no need to alter any other freeling module. 00040 // 00042 00043 #ifndef _FL_REGEXP_H_ 00044 #define _FL_REGEXP_H_ 00045 00046 #if defined USE_XPRESSIVE_REGEX && defined USE_BOOST_LOCALE 00047 #error boost::xpressive does not support boost::locale. USE_XPRESSIVE_REGEX and USE_BOOST_LOCALE can not be defined simultaneously. 00048 #endif 00049 00050 #if defined USE_XPRESSIVE_REGEX 00051 #include <boost/xpressive/xpressive.hpp> 00052 #else 00053 #include <boost/regex/icu.hpp> 00054 #endif 00055 00056 #include <string> 00057 #include <vector> 00058 00059 namespace freeling { 00060 00061 class regexp { 00062 00063 private: 00064 #if defined USE_XPRESSIVE_REGEX 00065 typedef boost::xpressive::wsregex regex_type; 00066 typedef boost::xpressive::wsmatch match_type; 00067 #else 00068 typedef boost::u32regex regex_type; 00069 typedef boost::wsmatch match_type; 00070 #endif 00071 00072 // internal regular expression 00073 regex_type re; 00074 00075 // private function: convert internal match list to vector<string> 00076 void extract_matches(const match_type &, std::vector<std::wstring> &) const; 00077 // private function: convert internal match list to vector<string> and positions to vector<int> 00078 void extract_matches(const match_type &, std::vector<std::wstring> &, std::vector<int> &) const; 00079 00080 public: 00081 regexp (const regexp&); 00082 regexp (const std::wstring &expr, bool icase=false); 00083 ~regexp (); 00085 bool search (const std::wstring &in, bool continuous=false) const; 00087 bool search (const std::wstring &in, std::vector<std::wstring> &out, bool continuous=false) const; 00089 bool search (const std::wstring &in, std::vector<std::wstring> &out, 00090 std::vector<int> &pos, bool continuous=false) const; 00092 bool search (std::wstring::const_iterator i1, std::wstring::const_iterator i2, 00093 std::vector<std::wstring> &out, bool continuous=false) const; 00095 bool search (std::wstring::const_iterator i1, std::wstring::const_iterator i2, 00096 std::vector<std::wstring> &out, std::vector<int> &pos, bool continuous=false) const; 00098 bool match (const std::wstring &in) const; 00100 bool match (const std::wstring &in, std::vector<std::wstring> &out) const; 00101 }; 00102 } 00103 00104 #endif