FreeLing
4.0
|
Class suffixes implements suffixation rules and dictionary search for suffixed word forms. More...
#include <suffixes.h>
Public Member Functions | |
affixes (const std::wstring &, const std::wstring &, const dictionary &) | |
Constructor. | |
void | look_for_affixes (word &) const |
look up possible roots of a suffixed/prefixed form | |
Private Member Functions | |
void | look_for_affixes_in_list (int, const std::multimap< std::wstring, sufrule > &, word &) const |
find all applicable affix rules for a word | |
void | look_for_combined_affixes (const std::multimap< std::wstring, sufrule > &, const std::multimap< std::wstring, sufrule > &, word &) const |
find all applicable prefix+sufix rules combination for a word | |
std::set< std::wstring > | GenerateRoots (int, const sufrule &, const std::wstring &) const |
generate roots according to rules. | |
void | SearchRootsList (std::set< std::wstring > &, const std::wstring &, const sufrule &, word &) const |
find roots in dictionary and apply matching rules | |
void | ApplyRule (const std::wstring &, const std::list< analysis > &, const std::wstring &, const sufrule &, word &) const |
actually apply a affix rule | |
void | CheckRetokenizable (const sufrule &, const std::wstring &, const std::wstring &, const std::wstring &, std::list< word > &, int) const |
auxiliary method to deal with retokenization | |
Private Attributes | |
accents | accen |
Language-specific accent handler. | |
const dictionary & | dic |
std::multimap< std::wstring, sufrule > | affix [2] |
all suffixation/prefixation rules | |
std::multimap< std::wstring, sufrule > | affix_always [2] |
suffixation/prefixation rules applied unconditionally | |
std::set< unsigned int > | ExistingLength [2] |
index of existing suffix/prefixs lengths. | |
unsigned int | Longest [2] |
Length of longest suffix/prefix. |
Class suffixes implements suffixation rules and dictionary search for suffixed word forms.
freeling::affixes::affixes | ( | const std::wstring & | Lang, |
const std::wstring & | sufFile, | ||
const dictionary & | d | ||
) |
Constructor.
Create a suffixed words analyzer.
References freeling::sufrule::acc, affix, affix_always, freeling::sufrule::always, freeling::sufrule::enc, ERROR_CRASH, ExistingLength, int2wstring, freeling::sufrule::lema, Longest, freeling::sufrule::nomore, freeling::util::open_utf8_file(), freeling::sufrule::output, PREF, freeling::sufrule::retok, SUF, freeling::sufrule::term, and TRACE.
void freeling::affixes::ApplyRule | ( | const std::wstring & | , |
const std::list< analysis > & | , | ||
const std::wstring & | , | ||
const sufrule & | , | ||
word & | |||
) | const [private] |
actually apply a affix rule
Actually apply a rule.
if (not suf.cond.Search(pos->get_tag()) ) {
References freeling::word::add_analysis(), freeling::util::capitalization(), CheckRetokenizable(), freeling::sufrule::cond, freeling::sufrule::expression, freeling::word::get_form(), freeling::word::get_lc_form(), freeling::analysis::init(), freeling::sufrule::lema, freeling::sufrule::nomore, freeling::sufrule::output, freeling::regexp::search(), freeling::word::set_found_in_dict(), freeling::analysis::set_retokenizable(), TRACE, and wstring2list.
Referenced by look_for_combined_affixes(), and SearchRootsList().
void freeling::affixes::CheckRetokenizable | ( | const sufrule & | suf, |
const std::wstring & | form, | ||
const std::wstring & | lem, | ||
const std::wstring & | tag, | ||
std::list< word > & | rtk, | ||
int | caps | ||
) | const [private] |
auxiliary method to deal with retokenization
Check whether the suffix carries retokenization information, and create alternative word list if necessary.
References freeling::word::add_analysis(), freeling::util::capitalize(), dic, ERROR_CRASH, freeling::word::get_form(), freeling::word::get_lemma(), freeling::word::get_tag(), freeling::analysis::init(), freeling::sufrule::retok, freeling::dictionary::search_form(), freeling::word::set_form(), TRACE, and wstring2list.
Referenced by ApplyRule().
set< wstring > freeling::affixes::GenerateRoots | ( | int | kind, |
const sufrule & | suf, | ||
const std::wstring & | rt | ||
) | const [private] |
generate roots according to rules.
Generate all possible forms expanding root rt with all possible terminations according to the given suffix rule.
References PREF, SUF, freeling::sufrule::term, and TRACE.
Referenced by look_for_affixes_in_list(), and look_for_combined_affixes().
void freeling::affixes::look_for_affixes | ( | word & | w | ) | const |
look up possible roots of a suffixed/prefixed form
Look up possible roots of a suffixed form.
Words already analyzed are only applied the "always"-marked suffix rules. So-far unrecognized words, are applied all the sufix rules.
References affix, affix_always, freeling::word::get_form(), freeling::word::get_n_analysis(), int2wstring, look_for_affixes_in_list(), look_for_combined_affixes(), PREF, SUF, and TRACE.
void freeling::affixes::look_for_affixes_in_list | ( | int | kind, |
const std::multimap< std::wstring, sufrule > & | suff, | ||
word & | w | ||
) | const [private] |
find all applicable affix rules for a word
References accen, ExistingLength, freeling::accents::fix_accentuation(), GenerateRoots(), freeling::word::get_lc_form(), int2wstring, Longest, PREF, SearchRootsList(), SUF, and TRACE.
Referenced by look_for_affixes().
void freeling::affixes::look_for_combined_affixes | ( | const std::multimap< std::wstring, sufrule > & | suff, |
const std::multimap< std::wstring, sufrule > & | pref, | ||
word & | w | ||
) | const [private] |
find all applicable prefix+sufix rules combination for a word
References accen, ApplyRule(), ExistingLength, freeling::accents::fix_accentuation(), freeling::word::found_in_dict(), GenerateRoots(), freeling::word::get_lc_form(), int2wstring, Longest, PREF, SearchRootsList(), freeling::word::set_found_in_dict(), SUF, and TRACE.
Referenced by look_for_affixes().
void freeling::affixes::SearchRootsList | ( | std::set< std::wstring > & | , |
const std::wstring & | , | ||
const sufrule & | , | ||
word & | |||
) | const [private] |
find roots in dictionary and apply matching rules
Search candidate forms in dictionary, discarding invalid forms and annotating the valid ones.
References ApplyRule(), dic, int2wstring, freeling::dictionary::search_form(), and TRACE.
Referenced by look_for_affixes_in_list(), and look_for_combined_affixes().
accents freeling::affixes::accen [private] |
Language-specific accent handler.
Referenced by look_for_affixes_in_list(), and look_for_combined_affixes().
std::multimap<std::wstring,sufrule> freeling::affixes::affix[2] [private] |
all suffixation/prefixation rules
Referenced by affixes(), and look_for_affixes().
std::multimap<std::wstring,sufrule> freeling::affixes::affix_always[2] [private] |
suffixation/prefixation rules applied unconditionally
Referenced by affixes(), and look_for_affixes().
const dictionary& freeling::affixes::dic [private] |
Referenced by CheckRetokenizable(), and SearchRootsList().
std::set<unsigned int> freeling::affixes::ExistingLength[2] [private] |
index of existing suffix/prefixs lengths.
Referenced by affixes(), look_for_affixes_in_list(), and look_for_combined_affixes().
unsigned int freeling::affixes::Longest[2] [private] |
Length of longest suffix/prefix.
Referenced by affixes(), look_for_affixes_in_list(), and look_for_combined_affixes().