FreeLing
4.0
|
Class for the feature extractor. More...
#include <relaxcor_fex.h>
Public Types | |
typedef std::map< std::wstring, relaxcor_model::Tfeatures > | Mfeatures |
Public Member Functions | |
relaxcor_fex (const std::wstring &, relaxcor_model *, const std::wstring &lang=L"") | |
~relaxcor_fex () | |
void | extract (std::vector< mention > &, Mfeatures &) |
Static Public Member Functions | |
static void | print (relaxcor_fex::Mfeatures &, unsigned int) |
Just for debugging!!! | |
Private Types | |
enum | mentionFeature { IN_QUOTES, HEAD_TERM, IS_ACRONYM, POSSESSIVE, NUMBER, GENDER, SEM_CLASS, THIRD_PERSON, REFLEXIVE, DEF_NP, INDEF_NP, DEM_NP, MAXIMAL_NP, EMBEDDED_NOUN } |
enum | mentionWsFeature { ARGUMENTS, ROLES } |
Private Member Functions | |
void | set_feature (int, mentionFeature, unsigned int) |
auxiliary functions for feature extraction | |
void | set_feature (int, mentionWsFeature, const std::vector< std::wstring > &) |
void | clean_features () |
unsigned int | get_feature (int, mentionFeature) const |
const std::vector< std::wstring > & | get_feature (int, mentionWsFeature) const |
bool | computed_feature (int, mentionFeature) const |
bool | computed_feature (int, mentionWsFeature) const |
std::wstring | subvector2wstring (const std::vector< std::wstring > &, unsigned int, unsigned int, const std::wstring &) |
void | get_structural (const mention &, const mention &, relaxcor_model::Tfeatures &) |
group feature functions | |
void | get_lexical (const mention &, const mention &, relaxcor_model::Tfeatures &) |
void | get_morphological (const mention &, const mention &, relaxcor_model::Tfeatures &, std::vector< mention > &) |
void | get_syntactic (const mention &, const mention &, relaxcor_model::Tfeatures &, std::vector< mention > &) |
void | get_semantic (const mention &, const mention &, relaxcor_model::Tfeatures &, std::vector< mention > &) |
void | get_discourse (const mention &, const mention &, relaxcor_model::Tfeatures &) |
void | get_group_features (std::vector< mention > &, relaxcor_model::Tfeatures &) |
unsigned int | dist_in_phrases (const mention &, const mention &) |
feature functions | |
unsigned int | in_quotes (const mention &) |
bool | appositive (const mention &, const mention &) |
bool | nested (const mention &, const mention &) |
bool | intersected (const mention &, const mention &) |
bool | string_match (const mention &, const mention &) |
bool | pronoun_string_match (const mention &, const mention &, bool) |
bool | proper_noun_string_match (const mention &, const mention &, bool) |
bool | no_pronoun_string_match (const mention &, const mention &, bool) |
unsigned int | head_is_term (const mention &) |
unsigned int | alias (const mention &, const mention &) |
unsigned int | is_possessive (const mention &) |
unsigned int | same_number (const mention &, const mention &) |
unsigned int | same_gender (const mention &, const mention &) |
unsigned int | is_3rd_person (const mention &) |
unsigned int | agreement (const mention &, const mention &) |
unsigned int | closest_agreement (const mention &, const mention &, std::vector< mention > &) |
unsigned int | is_reflexive (const mention &) |
unsigned int | is_def_NP (const mention &) |
unsigned int | is_dem_NP (const mention &) |
bool | share_maximal_NP (const mention &, const mention &, std::vector< mention > &) |
unsigned int | is_maximal_NP (const mention &, std::vector< mention > &) |
unsigned int | is_indef_NP (const mention &) |
unsigned int | is_embedded_noun (const mention &, std::vector< mention > &) |
bool | binding_pos (const mention &, const mention &, bool) |
bool | binding_neg (const mention &, const mention &, bool) |
void | get_arguments (const mention &, std::wstring &, std::wstring &) |
bool | same_preds (bool, const std::wstring &, const std::wstring &) |
bool | same_args (bool, const std::wstring &, const std::wstring &, relaxcor_model::Tfeatures &) |
bool | separated_by_verb_is (const mention &, const mention &, std::vector< mention > &) |
bool | sem_class_match (const mention &, const mention &) |
bool | is_semantic_type (const mention &, const std::wstring &) |
bool | animacy (const mention &, const mention &) |
bool | incompatible (const mention &, const mention &) |
void | get_roles (const mention &, std::vector< std::wstring > &) |
bool | same_roles (const std::vector< std::wstring > &, const std::vector< std::wstring > &) |
void | read_countries_capitals (const std::wstring &) |
auxiliar functions | |
void | read_gpe_regexps (const std::wstring &) |
void | read_pairs (const std::wstring &, std::map< std::wstring, std::wstring > &) |
void | read_same_names (const std::wstring &, std::map< std::wstring, std::vector< unsigned int > > &) |
std::wstring | drop_det (const mention &) |
std::wstring | compute_term (const mention &) |
unsigned int | geo_match (const mention &, const mention &) |
std::wstring | string_merge (const mention &, bool) |
std::vector< std::wstring > | split_words (const std::wstring &) |
bool | is_acronym (const std::wstring &) |
unsigned int | acronym_of (const std::vector< std::wstring > &, const std::vector< std::wstring > &) |
unsigned int | initials_match (const std::vector< std::wstring > &, const std::vector< std::wstring > &) |
double | lex_dist (const std::wstring &, const std::wstring &) |
unsigned int | nick_name_match (const std::wstring &, const std::wstring &) |
unsigned int | forenames_match (const std::vector< std::wstring > &, const std::vector< std::wstring > &) |
unsigned int | first_name_match (const std::wstring &, const std::vector< std::wstring > &) |
double | levenshtein (const std::wstring &, const std::wstring &) |
unsigned int | get_number (const mention &) |
unsigned int | get_gender (const mention &) |
std::wstring | extract_msd_feature (const std::wstring &tag, const std::wstring &feature) const |
mention::SEMmentionType | extract_semclass (const mention &) |
void | isa (const std::wstring &, std::vector< bool > &) |
int | get_maximal_NP (const mention &, std::vector< mention > &) |
const std::wstring & | get_argument (sentence::predicates::const_iterator, dep_tree::const_iterator, paragraph::const_iterator) |
bool | verb_is_between (const mention &, const mention &) |
void | extract_pair (mention &, mention &, relaxcor_model::Tfeatures &, std::vector< mention > &) |
Private Attributes | |
std::wstring | _Language |
FROM PARAMETERS Language. | |
relaxcor_model * | model |
coreference model including feature model | |
semanticDB * | _Semdb |
semantic database to check for semantic properties | |
unsigned int | _Active_features |
active features | |
tagset * | _POS_tagset |
Tagset. | |
std::map< std::wstring, freeling::regexp > | _Labels |
Regexp for syntactic labels useful to compute some features. | |
std::set< std::wstring > | _Det_words |
determinants to be dropped out from mentions to compute string_matching | |
std::map< std::wstring, std::map< std::wstring, std::wstring > > | _Prons_feat |
words being pronouns and their features | |
std::map< std::wstring, std::pair< std::wstring, freeling::regexp > > | _Sem_classes |
semantic classes of common nouns | |
std::map< std::wstring, std::wstring > | _Capitals |
capitals, countries and nationalitites (with GPE regexps also) | |
std::map< std::wstring, std::wstring > | _Nationalities |
std::multimap< std::wstring, std::wstring > | _Countries |
std::vector< freeling::regexp > | _GPE_regexps |
std::map< std::wstring, std::vector< unsigned int > > | _Forenames |
forename aliases | |
std::map< std::wstring, std::vector< unsigned int > > | _Nicks |
nick names | |
std::map< std::wstring, std::wstring > | _Person_Names |
person names with gender | |
std::map< std::wstring, std::wstring > | _Titles |
titles with gender | |
std::map< std::wstring, freeling::regexp > | _AcroTerms |
acronym terms (infix and suffix) | |
std::map< int, std::map < mentionFeature, unsigned int > > | features |
auxiliar maps of some feature values for individual mentions | |
std::map< int, std::map < mentionWsFeature, std::vector< std::wstring > > > | wsfeatures |
Static Private Attributes | |
static const freeling::regexp | acronym_re1 |
static const freeling::regexp | acronym_re2 |
static const freeling::regexp | en_reflexive_re |
static const freeling::regexp | en_demostrative_re |
static const freeling::regexp | en_indefinite_re |
static const freeling::regexp | initial_letter_re1 |
static const freeling::regexp | initial_letter_re2 |
static const freeling::regexp | en_det_singular_re |
static const freeling::regexp | en_det_plural_re |
static const freeling::regexp | cat_verb_be_re1 |
static const freeling::regexp | cat_verb_be_re2 |
static const freeling::regexp | en_verb_be_re |
static const freeling::regexp | es_verb_be_re |
static const freeling::regexp | arg_re |
static const freeling::regexp | role_re |
Class for the feature extractor.
typedef std::map<std::wstring, relaxcor_model::Tfeatures > freeling::relaxcor_fex::Mfeatures |
enum freeling::relaxcor_fex::mentionFeature [private] |
enum freeling::relaxcor_fex::mentionWsFeature [private] |
freeling::relaxcor_fex::relaxcor_fex | ( | const std::wstring & | , |
relaxcor_model * | , | ||
const std::wstring & | lang = L"" |
||
) |
unsigned int freeling::relaxcor_fex::acronym_of | ( | const std::vector< std::wstring > & | , |
const std::vector< std::wstring > & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::agreement | ( | const mention & | , |
const mention & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::alias | ( | const mention & | , |
const mention & | |||
) | [private] |
bool freeling::relaxcor_fex::animacy | ( | const mention & | , |
const mention & | |||
) | [private] |
bool freeling::relaxcor_fex::appositive | ( | const mention & | , |
const mention & | |||
) | [private] |
bool freeling::relaxcor_fex::binding_neg | ( | const mention & | , |
const mention & | , | ||
bool | |||
) | [private] |
bool freeling::relaxcor_fex::binding_pos | ( | const mention & | , |
const mention & | , | ||
bool | |||
) | [private] |
void freeling::relaxcor_fex::clean_features | ( | ) | [private] |
unsigned int freeling::relaxcor_fex::closest_agreement | ( | const mention & | , |
const mention & | , | ||
std::vector< mention > & | |||
) | [private] |
std::wstring freeling::relaxcor_fex::compute_term | ( | const mention & | ) | [private] |
bool freeling::relaxcor_fex::computed_feature | ( | int | , |
mentionFeature | |||
) | const [private] |
bool freeling::relaxcor_fex::computed_feature | ( | int | , |
mentionWsFeature | |||
) | const [private] |
unsigned int freeling::relaxcor_fex::dist_in_phrases | ( | const mention & | , |
const mention & | |||
) | [private] |
feature functions
std::wstring freeling::relaxcor_fex::drop_det | ( | const mention & | ) | [private] |
void freeling::relaxcor_fex::extract | ( | std::vector< mention > & | , |
Mfeatures & | |||
) |
std::wstring freeling::relaxcor_fex::extract_msd_feature | ( | const std::wstring & | tag, |
const std::wstring & | feature | ||
) | const [private] |
void freeling::relaxcor_fex::extract_pair | ( | mention & | , |
mention & | , | ||
relaxcor_model::Tfeatures & | , | ||
std::vector< mention > & | |||
) | [private] |
mention::SEMmentionType freeling::relaxcor_fex::extract_semclass | ( | const mention & | ) | [private] |
unsigned int freeling::relaxcor_fex::first_name_match | ( | const std::wstring & | , |
const std::vector< std::wstring > & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::forenames_match | ( | const std::vector< std::wstring > & | , |
const std::vector< std::wstring > & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::geo_match | ( | const mention & | , |
const mention & | |||
) | [private] |
const std::wstring& freeling::relaxcor_fex::get_argument | ( | sentence::predicates::const_iterator | , |
dep_tree::const_iterator | , | ||
paragraph::const_iterator | |||
) | [private] |
void freeling::relaxcor_fex::get_arguments | ( | const mention & | , |
std::wstring & | , | ||
std::wstring & | |||
) | [private] |
void freeling::relaxcor_fex::get_discourse | ( | const mention & | , |
const mention & | , | ||
relaxcor_model::Tfeatures & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::get_feature | ( | int | , |
mentionFeature | |||
) | const [private] |
const std::vector<std::wstring>& freeling::relaxcor_fex::get_feature | ( | int | , |
mentionWsFeature | |||
) | const [private] |
unsigned int freeling::relaxcor_fex::get_gender | ( | const mention & | ) | [private] |
void freeling::relaxcor_fex::get_group_features | ( | std::vector< mention > & | , |
relaxcor_model::Tfeatures & | |||
) | [private] |
void freeling::relaxcor_fex::get_lexical | ( | const mention & | , |
const mention & | , | ||
relaxcor_model::Tfeatures & | |||
) | [private] |
int freeling::relaxcor_fex::get_maximal_NP | ( | const mention & | , |
std::vector< mention > & | |||
) | [private] |
void freeling::relaxcor_fex::get_morphological | ( | const mention & | , |
const mention & | , | ||
relaxcor_model::Tfeatures & | , | ||
std::vector< mention > & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::get_number | ( | const mention & | ) | [private] |
void freeling::relaxcor_fex::get_roles | ( | const mention & | , |
std::vector< std::wstring > & | |||
) | [private] |
void freeling::relaxcor_fex::get_semantic | ( | const mention & | , |
const mention & | , | ||
relaxcor_model::Tfeatures & | , | ||
std::vector< mention > & | |||
) | [private] |
void freeling::relaxcor_fex::get_structural | ( | const mention & | , |
const mention & | , | ||
relaxcor_model::Tfeatures & | |||
) | [private] |
group feature functions
void freeling::relaxcor_fex::get_syntactic | ( | const mention & | , |
const mention & | , | ||
relaxcor_model::Tfeatures & | , | ||
std::vector< mention > & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::head_is_term | ( | const mention & | ) | [private] |
unsigned int freeling::relaxcor_fex::in_quotes | ( | const mention & | ) | [private] |
bool freeling::relaxcor_fex::incompatible | ( | const mention & | , |
const mention & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::initials_match | ( | const std::vector< std::wstring > & | , |
const std::vector< std::wstring > & | |||
) | [private] |
bool freeling::relaxcor_fex::intersected | ( | const mention & | , |
const mention & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::is_3rd_person | ( | const mention & | ) | [private] |
bool freeling::relaxcor_fex::is_acronym | ( | const std::wstring & | ) | [private] |
unsigned int freeling::relaxcor_fex::is_def_NP | ( | const mention & | ) | [private] |
unsigned int freeling::relaxcor_fex::is_dem_NP | ( | const mention & | ) | [private] |
unsigned int freeling::relaxcor_fex::is_embedded_noun | ( | const mention & | , |
std::vector< mention > & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::is_indef_NP | ( | const mention & | ) | [private] |
unsigned int freeling::relaxcor_fex::is_maximal_NP | ( | const mention & | , |
std::vector< mention > & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::is_possessive | ( | const mention & | ) | [private] |
unsigned int freeling::relaxcor_fex::is_reflexive | ( | const mention & | ) | [private] |
bool freeling::relaxcor_fex::is_semantic_type | ( | const mention & | , |
const std::wstring & | |||
) | [private] |
void freeling::relaxcor_fex::isa | ( | const std::wstring & | , |
std::vector< bool > & | |||
) | [private] |
double freeling::relaxcor_fex::levenshtein | ( | const std::wstring & | , |
const std::wstring & | |||
) | [private] |
double freeling::relaxcor_fex::lex_dist | ( | const std::wstring & | , |
const std::wstring & | |||
) | [private] |
bool freeling::relaxcor_fex::nested | ( | const mention & | , |
const mention & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::nick_name_match | ( | const std::wstring & | , |
const std::wstring & | |||
) | [private] |
bool freeling::relaxcor_fex::no_pronoun_string_match | ( | const mention & | , |
const mention & | , | ||
bool | |||
) | [private] |
static void freeling::relaxcor_fex::print | ( | relaxcor_fex::Mfeatures & | , |
unsigned | int | ||
) | [static] |
Just for debugging!!!
bool freeling::relaxcor_fex::pronoun_string_match | ( | const mention & | , |
const mention & | , | ||
bool | |||
) | [private] |
bool freeling::relaxcor_fex::proper_noun_string_match | ( | const mention & | , |
const mention & | , | ||
bool | |||
) | [private] |
void freeling::relaxcor_fex::read_countries_capitals | ( | const std::wstring & | ) | [private] |
auxiliar functions
void freeling::relaxcor_fex::read_gpe_regexps | ( | const std::wstring & | ) | [private] |
void freeling::relaxcor_fex::read_pairs | ( | const std::wstring & | , |
std::map< std::wstring, std::wstring > & | |||
) | [private] |
void freeling::relaxcor_fex::read_same_names | ( | const std::wstring & | , |
std::map< std::wstring, std::vector< unsigned int > > & | |||
) | [private] |
bool freeling::relaxcor_fex::same_args | ( | bool | , |
const std::wstring & | , | ||
const std::wstring & | , | ||
relaxcor_model::Tfeatures & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::same_gender | ( | const mention & | , |
const mention & | |||
) | [private] |
unsigned int freeling::relaxcor_fex::same_number | ( | const mention & | , |
const mention & | |||
) | [private] |
bool freeling::relaxcor_fex::same_preds | ( | bool | , |
const std::wstring & | , | ||
const std::wstring & | |||
) | [private] |
bool freeling::relaxcor_fex::same_roles | ( | const std::vector< std::wstring > & | , |
const std::vector< std::wstring > & | |||
) | [private] |
bool freeling::relaxcor_fex::sem_class_match | ( | const mention & | , |
const mention & | |||
) | [private] |
bool freeling::relaxcor_fex::separated_by_verb_is | ( | const mention & | , |
const mention & | , | ||
std::vector< mention > & | |||
) | [private] |
void freeling::relaxcor_fex::set_feature | ( | int | , |
mentionFeature | , | ||
unsigned | int | ||
) | [private] |
auxiliary functions for feature extraction
void freeling::relaxcor_fex::set_feature | ( | int | , |
mentionWsFeature | , | ||
const std::vector< std::wstring > & | |||
) | [private] |
bool freeling::relaxcor_fex::share_maximal_NP | ( | const mention & | , |
const mention & | , | ||
std::vector< mention > & | |||
) | [private] |
std::vector<std::wstring> freeling::relaxcor_fex::split_words | ( | const std::wstring & | ) | [private] |
bool freeling::relaxcor_fex::string_match | ( | const mention & | , |
const mention & | |||
) | [private] |
std::wstring freeling::relaxcor_fex::string_merge | ( | const mention & | , |
bool | |||
) | [private] |
std::wstring freeling::relaxcor_fex::subvector2wstring | ( | const std::vector< std::wstring > & | , |
unsigned | int, | ||
unsigned | int, | ||
const std::wstring & | |||
) | [private] |
bool freeling::relaxcor_fex::verb_is_between | ( | const mention & | , |
const mention & | |||
) | [private] |
std::map<std::wstring, freeling::regexp> freeling::relaxcor_fex::_AcroTerms [private] |
acronym terms (infix and suffix)
unsigned int freeling::relaxcor_fex::_Active_features [private] |
active features
std::map<std::wstring,std::wstring> freeling::relaxcor_fex::_Capitals [private] |
capitals, countries and nationalitites (with GPE regexps also)
std::multimap<std::wstring,std::wstring> freeling::relaxcor_fex::_Countries [private] |
std::set<std::wstring> freeling::relaxcor_fex::_Det_words [private] |
determinants to be dropped out from mentions to compute string_matching
std::map<std::wstring, std::vector<unsigned int> > freeling::relaxcor_fex::_Forenames [private] |
forename aliases
std::vector<freeling::regexp> freeling::relaxcor_fex::_GPE_regexps [private] |
std::map<std::wstring, freeling::regexp> freeling::relaxcor_fex::_Labels [private] |
Regexp for syntactic labels useful to compute some features.
std::wstring freeling::relaxcor_fex::_Language [private] |
FROM PARAMETERS Language.
std::map<std::wstring,std::wstring> freeling::relaxcor_fex::_Nationalities [private] |
std::map<std::wstring, std::vector<unsigned int> > freeling::relaxcor_fex::_Nicks [private] |
nick names
std::map<std::wstring, std::wstring> freeling::relaxcor_fex::_Person_Names [private] |
person names with gender
tagset* freeling::relaxcor_fex::_POS_tagset [private] |
Tagset.
std::map<std::wstring, std::map<std::wstring, std::wstring> > freeling::relaxcor_fex::_Prons_feat [private] |
words being pronouns and their features
std::map<std::wstring,std::pair<std::wstring, freeling::regexp> > freeling::relaxcor_fex::_Sem_classes [private] |
semantic classes of common nouns
semanticDB* freeling::relaxcor_fex::_Semdb [private] |
semantic database to check for semantic properties
std::map<std::wstring, std::wstring> freeling::relaxcor_fex::_Titles [private] |
titles with gender
const freeling::regexp freeling::relaxcor_fex::acronym_re1 [static, private] |
const freeling::regexp freeling::relaxcor_fex::acronym_re2 [static, private] |
const freeling::regexp freeling::relaxcor_fex::arg_re [static, private] |
const freeling::regexp freeling::relaxcor_fex::cat_verb_be_re1 [static, private] |
const freeling::regexp freeling::relaxcor_fex::cat_verb_be_re2 [static, private] |
const freeling::regexp freeling::relaxcor_fex::en_demostrative_re [static, private] |
const freeling::regexp freeling::relaxcor_fex::en_det_plural_re [static, private] |
const freeling::regexp freeling::relaxcor_fex::en_det_singular_re [static, private] |
const freeling::regexp freeling::relaxcor_fex::en_indefinite_re [static, private] |
const freeling::regexp freeling::relaxcor_fex::en_reflexive_re [static, private] |
const freeling::regexp freeling::relaxcor_fex::en_verb_be_re [static, private] |
const freeling::regexp freeling::relaxcor_fex::es_verb_be_re [static, private] |
std::map<int, std::map<mentionFeature, unsigned int> > freeling::relaxcor_fex::features [private] |
auxiliar maps of some feature values for individual mentions
const freeling::regexp freeling::relaxcor_fex::initial_letter_re1 [static, private] |
const freeling::regexp freeling::relaxcor_fex::initial_letter_re2 [static, private] |
relaxcor_model* freeling::relaxcor_fex::model [private] |
coreference model including feature model
const freeling::regexp freeling::relaxcor_fex::role_re [static, private] |
std::map<int, std::map<mentionWsFeature, std::vector<std::wstring> > > freeling::relaxcor_fex::wsfeatures [private] |