FreeLing
4.0
|
The class dictionary implements dictionary search and suffix analysis for word forms. More...
#include <dictionary.h>
Public Member Functions | |
dictionary (const std::wstring &Lang, const std::wstring &dicFile, const std::wstring &sufFile, const std::wstring &compFile, bool invDic=false, bool retok=true) | |
Constructor. | |
~dictionary () | |
Destructor. | |
void | add_analysis (const std::wstring &, const analysis &) |
add analysis to dictionary entry (create entry if not there) | |
void | remove_entry (const std::wstring &) |
remove entry from dictionary | |
void | set_retokenize_contractions (bool) |
customize behaviour of dictionary for further analysis | |
void | set_affix_analysis (bool) |
customize behaviour of dictionary for further analysis | |
void | set_compound_analysis (bool) |
customize behaviour of dictionary for further analysis | |
bool | has_affixes () const |
find out whether the dictionary has loaded an affix module | |
bool | has_compounds () const |
find out whether the dictionary has loaded a compounds module | |
void | search_form (const std::wstring &, std::list< analysis > &) const |
Get dictionary entry for a given form, add to given list. | |
bool | annotate_word (word &, std::list< word > &, bool override=false) const |
Fills the analysis list of a word, checking for suffixes and contractions. | |
void | annotate_word (word &) const |
Fills the analysis list of a word, checking for suffixes and contractions. | |
std::list< std::wstring > | get_forms (const std::wstring &, const std::wstring &) const |
Get possible forms for a lemma+pos. | |
void | dump_dictionary (std::wostream &, bool keysonly=false) const |
dump dictionary to a buffer. Either full entries or keys only | |
void | analyze (sentence &) const |
analyze given sentence | |
Private Member Functions | |
bool | check_contracted (const std::wstring &, std::wstring, std::wstring, std::list< word > &) const |
check whether the word is a contraction, and if so, fill the list with the contracted words | |
std::list< std::wstring > | tag_combinations (std::list< std::wstring >::const_iterator, std::list< std::wstring >::const_iterator) const |
Generate valid tag combinations for an ambiguous contraction. | |
bool | parse_dict_entry (const std::wstring &, std::list< std::pair< std::wstring, std::list< std::wstring > > > &) const |
parse data string into a map lemma->list of tags | |
std::wstring | compact_data (const std::list< std::pair< std::wstring, std::list< std::wstring > > > &) const |
compact data in format lema1 pos1a|pos1b|pos1c lema2 pos2a|posb to save memory | |
Private Attributes | |
bool | InverseDict |
configuration options | |
bool | RetokenizeContractions |
Analysis settings. | |
bool | AffixAnalysis |
bool | CompoundAnalysis |
affixes * | suf |
suffix analyzer | |
compounds * | comp |
compounds analyzer | |
database * | morfodb |
key-value file or hash | |
database * | inverdb |
The class dictionary implements dictionary search and suffix analysis for word forms.
freeling::dictionary::dictionary | ( | const std::wstring & | Lang, |
const std::wstring & | dicFile, | ||
const std::wstring & | sufFile, | ||
const std::wstring & | compFile, | ||
bool | invDic = false , |
||
bool | retok = true |
||
) |
Constructor.
Create a dictionary module, open database.
References freeling::config_file::add_section(), freeling::config_file::close(), DB_MAP, DB_PREFTREE, ERROR_CRASH, freeling::config_file::get_content_line(), freeling::config_file::get_section(), freeling::config_file::open(), and TRACE.
Destructor.
Destroy dictionary module, close database.
void freeling::dictionary::add_analysis | ( | const std::wstring & | form, |
const analysis & | newan | ||
) |
add analysis to dictionary entry (create entry if not there)
References freeling::analysis::get_lemma(), freeling::analysis::get_tag(), freeling::LEMMA_DIVIDER, list2wstring, freeling::TAG_DIVIDER, and wstring2list.
void freeling::dictionary::analyze | ( | sentence & | se | ) | const [virtual] |
analyze given sentence
Dictionary search and affix analysis for all words in a sentence, using given options.
Implements freeling::processor.
References int2wstring, freeling::sentence::rebuild_word_index(), TRACE, and TRACE_SENTENCE.
bool freeling::dictionary::annotate_word | ( | word & | , |
std::list< word > & | , | ||
bool | override = false |
||
) | const |
Fills the analysis list of a word, checking for suffixes and contractions.
Returns true iff the form is a contraction, returns contraction components in given list
Referenced by freeling::compounds::check_compound().
void freeling::dictionary::annotate_word | ( | word & | w | ) | const |
Fills the analysis list of a word, checking for suffixes and contractions.
Search form in the dictionary.
Never retokenizing contractions, nor returning component list. It is just a convenience equivalent to "annotate_word(w,dummy,true)"
Add* found analysis to the given word. Do not retokenize contractions, nor return a component list.
bool freeling::dictionary::check_contracted | ( | const std::wstring & | , |
std::wstring | , | ||
std::wstring | , | ||
std::list< word > & | |||
) | const [private] |
check whether the word is a contraction, and if so, fill the list with the contracted words
Check whether the given word is a contraction, if so, obtain composing words (and store them into lw).
References freeling::word::add_analysis(), ERROR_CRASH, freeling::word::get_n_analysis(), list2wstring, TRACE, and wstring2list.
wstring freeling::dictionary::compact_data | ( | const std::list< std::pair< std::wstring, std::list< std::wstring > > > & | ) | const [private] |
compact data in format lema1 pos1a|pos1b|pos1c lema2 pos2a|posb to save memory
References freeling::LEMMA_DIVIDER, list2wstring, and freeling::TAG_DIVIDER.
void freeling::dictionary::dump_dictionary | ( | std::wostream & | buff, |
bool | keysonly = false |
||
) | const |
dump dictionary to a buffer. Either full entries or keys only
dump dictionary to a buffer.
Either full entries or keys only
Referenced by freeling::compounds::compounds().
list< wstring > freeling::dictionary::get_forms | ( | const std::wstring & | , |
const std::wstring & | |||
) | const |
Get possible forms for a lemma+pos.
References WARNING, and wstring2list.
bool freeling::dictionary::has_affixes | ( | ) | const |
find out whether the dictionary has loaded an affix module
bool freeling::dictionary::has_compounds | ( | ) | const |
find out whether the dictionary has loaded a compounds module
bool freeling::dictionary::parse_dict_entry | ( | const std::wstring & | , |
std::list< std::pair< std::wstring, std::list< std::wstring > > > & | |||
) | const [private] |
parse data string into a map lemma->list of tags
References wstring2list.
void freeling::dictionary::remove_entry | ( | const std::wstring & | form | ) |
remove entry from dictionary
References list2wstring, and wstring2list.
void freeling::dictionary::search_form | ( | const std::wstring & | s, |
std::list< analysis > & | la | ||
) | const |
Get dictionary entry for a given form, add to given list.
Search form in the dictionary, according to given options, Add* found analysis to the given list.
References freeling::analysis::init(), int2wstring, freeling::LEMMA_DIVIDER, list2wstring, freeling::TAG_DIVIDER, TRACE, and wstring2list.
Referenced by freeling::affixes::CheckRetokenizable(), and freeling::affixes::SearchRootsList().
void freeling::dictionary::set_affix_analysis | ( | bool | aff | ) |
customize behaviour of dictionary for further analysis
void freeling::dictionary::set_compound_analysis | ( | bool | cmp | ) |
customize behaviour of dictionary for further analysis
void freeling::dictionary::set_retokenize_contractions | ( | bool | rtk | ) |
customize behaviour of dictionary for further analysis
list< wstring > freeling::dictionary::tag_combinations | ( | std::list< std::wstring >::const_iterator | , |
std::list< std::wstring >::const_iterator | |||
) | const [private] |
Generate valid tag combinations for an ambiguous contraction.
References wstring2list.
bool freeling::dictionary::AffixAnalysis [private] |
compounds* freeling::dictionary::comp [private] |
compounds analyzer
bool freeling::dictionary::CompoundAnalysis [private] |
database* freeling::dictionary::inverdb [private] |
bool freeling::dictionary::InverseDict [private] |
configuration options
database* freeling::dictionary::morfodb [private] |
key-value file or hash
bool freeling::dictionary::RetokenizeContractions [private] |
Analysis settings.
affixes* freeling::dictionary::suf [private] |
suffix analyzer