FreeLing
4.0
|
00001 00002 // 00003 // FreeLing - Open Source Language Analyzers 00004 // 00005 // Copyright (C) 2014 TALP Research Center 00006 // Universitat Politecnica de Catalunya 00007 // 00008 // This library is free software; you can redistribute it and/or 00009 // modify it under the terms of the GNU Affero General Public 00010 // License as published by the Free Software Foundation; either 00011 // version 3 of the License, or (at your option) any later version. 00012 // 00013 // This library is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // Affero General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU Affero General Public 00019 // License along with this library; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00021 // 00022 // contact: Lluis Padro (padro@lsi.upc.es) 00023 // TALP Research Center 00024 // despatx C6.212 - Campus Nord UPC 00025 // 08034 Barcelona. SPAIN 00026 // 00028 00029 #ifndef _LEXCHAIN_RELATION 00030 #define _LEXCHAIN_RELATION 00031 00032 #include <set> 00033 #include <string> 00034 #include <vector> 00035 #include <unordered_map> 00036 00037 #include "freeling/morfo/semdb.h" 00038 #include "freeling/morfo/language.h" 00039 00040 namespace freeling { 00041 00045 00046 struct word_pos { 00047 const freeling::word &w; 00048 const freeling::sentence &s; 00049 int n_paragraph; 00050 int n_sentence; 00051 int position; 00052 00053 word_pos(const freeling::word &w_p, const freeling::sentence &s_p, 00054 int n_paragraph, int n_sentence, int position); 00055 00057 bool operator==(word_pos other) const; 00058 00060 bool operator<(word_pos other) const; 00061 00063 bool operator>(word_pos other) const; 00064 00066 std::wstring toString() const; 00067 }; 00068 00072 00073 struct related_words { 00074 const word_pos &w1; 00075 const word_pos &w2; 00077 double relatedness; 00078 00079 related_words(const word_pos &w_p1, const word_pos &w_p2, double relatedness); 00080 00082 std::wstring toString() const; 00083 }; 00084 00091 00092 class relation { 00093 public: 00094 00096 const std::wstring label; 00098 static int max_distance; 00099 00101 relation(const std::wstring s, const std::wstring t); 00102 00104 ~relation(); 00105 00107 bool is_compatible(const freeling::word &w) const; 00108 00109 virtual bool compute_word (const freeling::word &w, const freeling::sentence &s, 00110 const freeling::document &doc, int n_paragraph, 00111 int n_sentence, int position, std::list<word_pos> &words, 00112 std::list<related_words> &relations, std::unordered_map<std::wstring, 00113 std::pair<int, word_pos*> > &unique_words) const = 0; 00114 00115 virtual double get_homogeneity_index(const std::list<word_pos> &words, 00116 const std::list<related_words> &relations, 00117 const std::unordered_map<std::wstring, 00118 std::pair<int, word_pos*> > &unique_words) const = 0; 00119 00120 virtual std::list<word_pos> order_words_by_weight(const std::unordered_map<std::wstring, 00121 std::pair<int, word_pos*> > &unique_words) const = 0; 00122 00123 00124 protected: 00125 00128 const freeling::regexp compatible_tag; 00129 }; 00130 00135 00136 class same_word : public relation { 00137 00138 public: 00139 00141 same_word(std::wstring expr); 00142 00145 double get_homogeneity_index(const std::list<word_pos> &words, 00146 const std::list<related_words> &relations, 00147 const std::unordered_map<std::wstring, 00148 std::pair<int, word_pos*> > &unique_words) const; 00149 00153 bool compute_word (const freeling::word &w, const freeling::sentence &s, 00154 const freeling::document &doc, int n_paragraph, 00155 int n_sentence, int position, std::list<word_pos> &words, 00156 std::list<related_words> &relations, std::unordered_map<std::wstring, 00157 std::pair<int, word_pos*> > &unique_words) const; 00158 00161 std::list<word_pos> order_words_by_weight(const std::unordered_map<std::wstring, 00162 std::pair<int, word_pos*> > &unique_words) const; 00163 }; 00164 00170 00171 class hypernymy : public relation { 00172 00173 public: 00174 00176 hypernymy(int k, double alpha, const std::wstring &semfile, std::wstring expr); 00177 00180 double get_homogeneity_index(const std::list<word_pos> &words, 00181 const std::list<related_words> &relations, 00182 const std::unordered_map<std::wstring, 00183 std::pair<int, word_pos*> > &unique_words) const; 00184 00188 bool compute_word (const freeling::word &w, const freeling::sentence &s, 00189 const freeling::document &doc, int n_paragraph, int n_sentence, 00190 int position, std::list<word_pos> &words, std::list<related_words> &relations, 00191 std::unordered_map<std::wstring, std::pair<int, word_pos*> > &unique_words) const; 00192 00194 std::list<word_pos> order_words_by_weight(const std::unordered_map<std::wstring, 00195 std::pair<int, word_pos*> > &unique_words) const; 00196 00197 private: 00198 00200 static freeling::semanticDB * semdb; 00202 static int depth; 00204 static double alpha; 00205 00207 int hypernymyAux(std::wstring s1, std::wstring s2, int k) const; 00208 00210 const word_pos &count_relations(int n, const std::list<related_words> &relations) const; 00211 }; 00212 00218 00219 class same_coref_group : public relation { 00220 00221 public: 00222 00224 same_coref_group(std::wstring expr); 00225 00228 double get_homogeneity_index(const std::list<word_pos> &words, 00229 const std::list<related_words> &relations, 00230 const std::unordered_map<std::wstring, 00231 std::pair<int, word_pos*> > &unique_words) const; 00232 00236 bool compute_word (const freeling::word &w, const freeling::sentence &s, 00237 const freeling::document &doc, int n_paragraph, int n_sentence, 00238 int position, std::list<word_pos> &words, std::list<related_words> &relations, 00239 std::unordered_map<std::wstring, std::pair<int, word_pos*> > &unique_words) const; 00240 00242 std::list<word_pos> order_words_by_weight(const std::unordered_map<std::wstring, 00243 std::pair<int, word_pos*> > &unique_words) const; 00244 }; 00245 00246 } 00247 00248 #endif 00249