FreeLing
4.0
|
00001 00002 // 00003 // FreeLing - Open Source Language Analyzers 00004 // 00005 // Copyright (C) 2014 TALP Research Center 00006 // Universitat Politecnica de Catalunya 00007 // 00008 // This library is free software; you can redistribute it and/or 00009 // modify it under the terms of the GNU Affero General Public 00010 // License as published by the Free Software Foundation; either 00011 // version 3 of the License, or (at your option) any later version. 00012 // 00013 // This library is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // Affero General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU Affero General Public 00019 // License along with this library; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00021 // 00022 // contact: Lluis Padro (padro@lsi.upc.es) 00023 // TALP Research Center 00024 // despatx C6.212 - Campus Nord UPC 00025 // 08034 Barcelona. SPAIN 00026 // 00028 00029 #ifndef _CONLL_HANDLER 00030 #define _CONLL_HANDLER 00031 00032 #include <iostream> 00033 #include <vector> 00034 #include "freeling/morfo/language.h" 00035 00036 namespace freeling { 00037 00038 namespace io { 00039 00040 class conll_sentence : public std::vector<std::vector<std::wstring> > { 00041 00042 public: 00044 conll_sentence(); 00045 00046 // destructor 00047 ~conll_sentence(); 00048 00049 void clear (); 00050 00051 void print_conll_sentence(std::wostream &sout) const; 00052 00053 void add_token (const std::vector<std::wstring> &token); 00054 size_t get_n_columns () const; 00055 std::wstring get_value(size_t i, size_t col) const; 00056 void set_value(size_t i, size_t col, const std::wstring &val); 00057 00058 }; 00059 00060 00061 // Abstract class with common infrastructure for conll_input and conll_output 00062 00063 class conll_handler { 00064 00065 protected: 00066 typedef enum {ID, SPAN_BEGIN, SPAN_END, FORM, LEMMA, TAG, SHORT_TAG, MSD, 00067 NEC, SENSE, ALL_SENSES, SYNTAX, DEPHEAD, DEPREL, COREF, SRL, 00068 NO_FIELD} ConllColumns; 00069 00070 // given a position, find out which field is stored there 00071 std::vector<std::wstring> FieldName; 00072 // given a field name, find out in which position it is 00073 std::map<std::wstring,size_t> FieldPos; 00074 00075 // constructor (with default fields) 00076 conll_handler(); 00077 // constructor (from config file) 00078 conll_handler(const std::wstring &cfgFile); 00079 // destructor 00080 ~conll_handler(); 00081 00082 void init_default(); 00083 static ConllColumns field_code(const std::wstring &field); 00084 00085 static const std::wstring UserPrefix; 00086 static const std::map<std::wstring,ConllColumns> ValidFields; 00087 }; 00088 } 00089 } 00090 #endif