FreeLing
4.0
|
Class word stores all info related to a word: form, list of analysis, list of tokens (if multiword). More...
#include <language.h>
Classes | |
class | const_iterator |
const_iterator over word analysis (either all, only selected, only unselected) More... | |
class | iterator |
iterator over word analysis (either all, only selected, only unselected) More... | |
Public Member Functions | |
word () | |
constructor | |
word (const std::wstring &) | |
constructor | |
word (const std::wstring &, const std::list< word > &) | |
constructor | |
word (const std::wstring &, const std::list< analysis > &, const std::list< word > &) | |
constructor | |
word (const word &) | |
Copy constructor. | |
word & | operator= (const word &) |
assignment | |
void | copy_analysis (const word &) |
copy analysis from another word | |
int | get_n_selected (int k=0) const |
Get the number of selected analysis. | |
int | get_n_unselected (int k=0) const |
get the number of unselected analysis | |
bool | is_multiword () const |
true iff the word is a multiword compound | |
bool | is_ambiguous_mw () const |
true iff the word is a multiword marked as ambiguous | |
void | set_ambiguous_mw (bool) |
set mw ambiguity status | |
int | get_n_words_mw () const |
get number of words in compound | |
const std::list< word > & | get_words_mw () const |
get word objects that compound the multiword | |
const std::wstring & | get_form () const |
get word form | |
const std::wstring & | get_lc_form () const |
Get word form, lowercased. | |
const std::wstring & | get_ph_form () const |
Get word phonetic form. | |
word::iterator | selected_begin (int k=0) |
Get an iterator to the first selected analysis. | |
word::const_iterator | selected_begin (int k=0) const |
Get an iterator to the first selected analysis. | |
word::iterator | selected_end (int k=0) |
Get an iterator to the end of selected analysis list. | |
word::const_iterator | selected_end (int k=0) const |
Get an iterator to the end of selected analysis list. | |
word::iterator | unselected_begin (int k=0) |
Get an iterator to the first unselected analysis. | |
word::const_iterator | unselected_begin (int k=0) const |
Get an iterator to the first unselected analysis. | |
word::iterator | unselected_end (int k=0) |
Get an iterator to the end of unselected analysis list. | |
word::const_iterator | unselected_end (int k=0) const |
Get an iterator to the end of unselected analysis list. | |
unsigned int | num_kbest () const |
Get how many kbest tags the word has. | |
const std::wstring & | get_lemma (int k=0) const |
get lemma for the selected analysis in list | |
const std::wstring & | get_tag (int k=0) const |
get tag for the selected analysis | |
const std::list< std::pair < std::wstring, double > > & | get_senses (int k=0) const |
get sense list for the selected analysis | |
std::list< std::pair < std::wstring, double > > & | get_senses (int k=0) |
get reference to sense list for the selected analysis | |
std::wstring | get_senses_string (int k=0) const |
get sense list (as string) for the selected analysis | |
void | set_senses (const std::list< std::pair< std::wstring, double > > &, int k=0) |
set sense list for the selected analysis | |
unsigned long | get_span_start () const |
get token span. | |
unsigned long | get_span_finish () const |
bool | found_in_dict () const |
get in_dict | |
void | set_found_in_dict (bool) |
set in_dict | |
bool | has_retokenizable () const |
check if there is any retokenizable analysis | |
void | lock_analysis () |
mark word as having definitive analysis | |
bool | is_locked () const |
check if word is marked as having definitive analysis | |
void | add_alternative (const std::wstring &, int) |
add an alternative to the alternatives list | |
void | set_alternatives (const std::list< std::pair< std::wstring, int > > &) |
replace alternatives list with list given | |
void | clear_alternatives () |
clear alternatives list | |
bool | has_alternatives () const |
find out if the speller checked alternatives | |
const std::list< std::pair < std::wstring, int > > & | get_alternatives () const |
get alternatives list const & | |
std::list< std::pair < std::wstring, int > > & | get_alternatives () |
get alternatives list & | |
std::list< std::pair < std::wstring, int > >::iterator | alternatives_begin () |
get alternatives begin iterator | |
std::list< std::pair < std::wstring, int > >::iterator | alternatives_end () |
get alternatives end iterator | |
std::list< std::pair < std::wstring, int > >::const_iterator | alternatives_begin () const |
get alternatives begin iterator | |
std::list< std::pair < std::wstring, int > >::const_iterator | alternatives_end () const |
get alternatives end iterator | |
void | add_analysis (const analysis &) |
add one analysis to current analysis list (no duplicate check!) | |
void | set_analysis (const analysis &) |
set analysis list to one single analysis, overwriting current values | |
void | set_analysis (const std::list< analysis > &) |
set analysis list, overwriting current values | |
void | set_form (const std::wstring &) |
set word form | |
void | set_ph_form (const std::wstring &) |
Set word phonetic form. | |
void | set_span (unsigned long, unsigned long) |
set token span | |
void | set_position (size_t) |
size_t | get_position () const |
bool | find_tag_match (const freeling::regexp &) const |
look for an analysis with a tag matching given regexp | |
int | get_n_analysis () const |
get number of analysis in current list | |
void | unselect_all_analysis (int k=0) |
empty the list of selected analysis | |
void | select_all_analysis (int k=0) |
mark all analysisi as selected | |
void | select_analysis (word::iterator, int k=0) |
add the given analysis to selected list. | |
void | unselect_analysis (word::iterator, int k=0) |
remove the given analysis from selected list. | |
std::list< analysis > | get_analysis () const |
get list of analysis (useful for perl API) | |
word::iterator | analysis_begin () |
get begin iterator to analysis list (useful for perl/java API) | |
word::const_iterator | analysis_begin () const |
word::iterator | analysis_end () |
get end iterator to analysis list (useful for perl/java API) | |
word::const_iterator | analysis_end () const |
Public Attributes | |
std::vector< std::wstring > | user |
user-managed data, we just store it. | |
Private Member Functions | |
void | clone (const word &) |
clone word (used by assignment/copy constructors) | |
Private Attributes | |
std::wstring | form |
lexical form | |
std::wstring | lc_form |
lexical form, lowercased | |
std::wstring | ph_form |
phonetic form | |
std::list< word > | multiword |
empty list if not a multiword | |
bool | ambiguous_mw |
whether the multiword presents segmentantion ambiguity (i.e. could not be a mw) | |
std::list< std::pair < std::wstring, int > > | alternatives |
alternative forms provided by orthographic or phonetic SED | |
unsigned long | start |
token span | |
unsigned long | finish |
bool | in_dict |
word form found in dictionary | |
bool | locked |
word morphological shouldn't be further modified | |
size_t | position |
position of word in the sentence (count from 0) | |
Static Private Attributes | |
static const int | SELECTED = 0 |
Values for word::iterator types. | |
static const int | UNSELECTED = 1 |
static const int | ALL = 2 |
static const std::wstring | NOT_FOUND = "" |
Class word stores all info related to a word: form, list of analysis, list of tokens (if multiword).
constructor
Class word stores all info related to a word: form, list of analysis, list of tokens (if multiword).
Create an empty new word
freeling::word::word | ( | const std::wstring & | ) |
constructor
freeling::word::word | ( | const std::wstring & | , |
const std::list< word > & | |||
) |
constructor
freeling::word::word | ( | const std::wstring & | , |
const std::list< analysis > & | , | ||
const std::list< word > & | |||
) |
constructor
freeling::word::word | ( | const word & | w | ) |
Copy constructor.
void freeling::word::add_alternative | ( | const std::wstring & | , |
int | |||
) |
add an alternative to the alternatives list
void freeling::word::add_analysis | ( | const analysis & | a | ) |
add one analysis to current analysis list (no duplicate check!)
Add one analysis to word analysis list.
Referenced by freeling::RE_map::annotate_word(), freeling::affixes::ApplyRule(), freeling::compounds::check_compound(), freeling::dictionary::check_contracted(), freeling::affixes::CheckRetokenizable(), and freeling::probabilities::guesser().
list< pair< wstring, int > >::iterator freeling::word::alternatives_begin | ( | ) |
get alternatives begin iterator
Referenced by freeling::alternatives::filter_alternatives().
list< pair< wstring, int > >::const_iterator freeling::word::alternatives_begin | ( | ) | const |
get alternatives begin iterator
get alternatives begin const iterator
list< pair< wstring, int > >::iterator freeling::word::alternatives_end | ( | ) |
get alternatives end iterator
Referenced by freeling::alternatives::filter_alternatives().
list< pair< wstring, int > >::const_iterator freeling::word::alternatives_end | ( | ) | const |
get alternatives end iterator
get alternatives end const iterator
get begin iterator to analysis list (useful for perl/java API)
get begin iterator to analysis list.
get end iterator to analysis list (useful for perl/java API)
get end iterator to analysis list.
void freeling::word::clear_alternatives | ( | ) |
clear alternatives list
Referenced by freeling::alternatives::filter_alternatives().
void freeling::word::clone | ( | const word & | w | ) | [private] |
void freeling::word::copy_analysis | ( | const word & | w | ) |
copy analysis from another word
Copy analysis list of given word.
bool freeling::word::find_tag_match | ( | const freeling::regexp & | re | ) | const |
look for an analysis with a tag matching given regexp
look for a tag in the analysis list of a word
References freeling::regexp::search().
Referenced by freeling::probabilities::annotate_word().
bool freeling::word::found_in_dict | ( | ) | const |
get in_dict
Referenced by freeling::probabilities::annotate_word(), and freeling::affixes::look_for_combined_affixes().
const list< pair< wstring, int > > & freeling::word::get_alternatives | ( | ) | const |
get alternatives list const &
Referenced by freeling::alternatives::filter_alternatives().
list< pair< wstring, int > > & freeling::word::get_alternatives | ( | ) |
get alternatives list &
list< analysis > freeling::word::get_analysis | ( | ) | const |
get list of analysis (useful for perl API)
get list of analysis (only useful for perl API)
const wstring & freeling::word::get_form | ( | ) | const |
get word form
Get word form.
Referenced by freeling::RE_map::annotate_word(), freeling::probabilities::annotate_word(), freeling::affixes::ApplyRule(), freeling::affixes::CheckRetokenizable(), freeling::alternatives::filter_alternatives(), freeling::affixes::look_for_affixes(), freeling::dep_tree::PrintDepTree(), freeling::parse_tree::PrintTree(), freeling::probabilities::smoothing(), and freeling::traces::trace_word().
const wstring & freeling::word::get_lc_form | ( | ) | const |
Get word form, lowercased.
Referenced by freeling::affixes::ApplyRule(), freeling::compounds::check_compound(), freeling::alternatives::filter_alternatives(), freeling::probabilities::guesser(), freeling::affixes::look_for_affixes_in_list(), freeling::affixes::look_for_combined_affixes(), freeling::hmm_tagger::ProbB_log(), freeling::probabilities::smoothing(), and freeling::locutions::ValidMultiWord().
const wstring & freeling::word::get_lemma | ( | int | k = 0 | ) | const |
get lemma for the selected analysis in list
Get lemma for the selected analysis in list.
Referenced by freeling::affixes::CheckRetokenizable(), freeling::dep_tree::PrintDepTree(), and freeling::parse_tree::PrintTree().
int freeling::word::get_n_analysis | ( | ) | const |
get number of analysis in current list
Get length of analysis list.
Referenced by freeling::probabilities::annotate_word(), freeling::compounds::check_compound(), freeling::dictionary::check_contracted(), freeling::probabilities::guesser(), freeling::affixes::look_for_affixes(), and freeling::probabilities::smoothing().
int freeling::word::get_n_selected | ( | int | k = 0 | ) | const |
Get the number of selected analysis.
int freeling::word::get_n_unselected | ( | int | k = 0 | ) | const |
get the number of unselected analysis
Get the number of unselected analysis.
int freeling::word::get_n_words_mw | ( | ) | const |
get number of words in compound
Get number of words in compound.
const wstring & freeling::word::get_ph_form | ( | ) | const |
Get word phonetic form.
size_t freeling::word::get_position | ( | ) | const |
const list< pair< wstring, double > > & freeling::word::get_senses | ( | int | k = 0 | ) | const |
get sense list for the selected analysis
get reference to sense list for the selected analysis
list< pair< wstring, double > > & freeling::word::get_senses | ( | int | k = 0 | ) |
get reference to sense list for the selected analysis
wstring freeling::word::get_senses_string | ( | int | k = 0 | ) | const |
get sense list (as string) for the selected analysis
unsigned long freeling::word::get_span_finish | ( | ) | const |
Referenced by freeling::traces::trace_word().
unsigned long freeling::word::get_span_start | ( | ) | const |
const wstring & freeling::word::get_tag | ( | int | k = 0 | ) | const |
get tag for the selected analysis
Get PoS tag for the selected analysis in list.
Referenced by freeling::affixes::CheckRetokenizable(), freeling::dep_tree::PrintDepTree(), and freeling::parse_tree::PrintTree().
const list< word > & freeling::word::get_words_mw | ( | ) | const |
get word objects that compound the multiword
Get list of words in compound.
Referenced by freeling::traces::trace_word(), and freeling::ner_module::ValidMultiWord().
bool freeling::word::has_alternatives | ( | ) | const |
find out if the speller checked alternatives
bool freeling::word::has_retokenizable | ( | ) | const |
check if there is any retokenizable analysis
Referenced by freeling::probabilities::annotate_word().
bool freeling::word::is_ambiguous_mw | ( | ) | const |
true iff the word is a multiword marked as ambiguous
Check whether the word is an ambiguous multiword.
bool freeling::word::is_locked | ( | ) | const |
check if word is marked as having definitive analysis
bool freeling::word::is_multiword | ( | ) | const |
true iff the word is a multiword compound
Check whether the word is a compound.
Referenced by freeling::traces::trace_word().
void freeling::word::lock_analysis | ( | ) |
mark word as having definitive analysis
Referenced by freeling::RE_map::annotate_word().
unsigned int freeling::word::num_kbest | ( | ) | const |
Get how many kbest tags the word has.
Get how many kbest tags the word stores.
void freeling::word::select_all_analysis | ( | int | k = 0 | ) |
mark all analysisi as selected
mark all analysis as selected for k-th best sequence
Referenced by freeling::probabilities::annotate_word().
void freeling::word::select_analysis | ( | word::iterator | tag, |
int | k = 0 |
||
) |
add the given analysis to selected list.
Mark given analysis as selected.
Get an iterator to the first selected analysis.
Get the first selected analysis iterator.
Referenced by freeling::traces::trace_word().
word::const_iterator freeling::word::selected_begin | ( | int | k = 0 | ) | const |
Get an iterator to the first selected analysis.
Get the first selected analysis iterator.
Get an iterator to the end of selected analysis list.
Get the end of selected analysis list.
Referenced by freeling::traces::trace_word().
word::const_iterator freeling::word::selected_end | ( | int | k = 0 | ) | const |
Get an iterator to the end of selected analysis list.
Get the end of selected analysis list.
void freeling::word::set_alternatives | ( | const std::list< std::pair< std::wstring, int > > & | ) |
replace alternatives list with list given
void freeling::word::set_ambiguous_mw | ( | bool | a | ) |
set mw ambiguity status
Set mw ambiguity status.
void freeling::word::set_analysis | ( | const analysis & | a | ) |
set analysis list to one single analysis, overwriting current values
Set (override) word analysis list with one single analysis.
Referenced by freeling::probabilities::guesser().
void freeling::word::set_analysis | ( | const std::list< analysis > & | ) |
set analysis list, overwriting current values
void freeling::word::set_form | ( | const std::wstring & | ) |
void freeling::word::set_found_in_dict | ( | bool | b | ) |
set in_dict
Referenced by freeling::affixes::ApplyRule(), freeling::compounds::check_compound(), and freeling::affixes::look_for_combined_affixes().
void freeling::word::set_ph_form | ( | const std::wstring & | ) |
Set word phonetic form.
void freeling::word::set_position | ( | size_t | p | ) |
void freeling::word::set_senses | ( | const std::list< std::pair< std::wstring, double > > & | , |
int | k = 0 |
||
) |
set sense list for the selected analysis
void freeling::word::set_span | ( | unsigned long | s, |
unsigned long | e | ||
) |
set token span
Set token span.
void freeling::word::unselect_all_analysis | ( | int | k = 0 | ) |
empty the list of selected analysis
un mark all analysis as selected for k-th best sequence
void freeling::word::unselect_analysis | ( | word::iterator | tag, |
int | k = 0 |
||
) |
remove the given analysis from selected list.
Unmark given analysis as selected.
Get an iterator to the first unselected analysis.
Get the first unselected analysis iterator.
Referenced by freeling::traces::trace_word().
word::const_iterator freeling::word::unselected_begin | ( | int | k = 0 | ) | const |
Get an iterator to the first unselected analysis.
Get the first unselected analysis iterator.
Get an iterator to the end of unselected analysis list.
Get the end of unselected analysis list.
Referenced by freeling::traces::trace_word().
word::const_iterator freeling::word::unselected_end | ( | int | k = 0 | ) | const |
Get an iterator to the end of unselected analysis list.
Get the end of unselected analysis list.
const int freeling::word::ALL = 2 [static, private] |
Referenced by freeling::word::iterator::operator++(), and freeling::word::const_iterator::operator++().
std::list<std::pair<std::wstring,int> > freeling::word::alternatives [private] |
alternative forms provided by orthographic or phonetic SED
Referenced by clone().
bool freeling::word::ambiguous_mw [private] |
whether the multiword presents segmentantion ambiguity (i.e. could not be a mw)
Referenced by clone().
unsigned long freeling::word::finish [private] |
Referenced by clone().
std::wstring freeling::word::form [private] |
lexical form
Referenced by clone().
bool freeling::word::in_dict [private] |
word form found in dictionary
Referenced by clone().
std::wstring freeling::word::lc_form [private] |
lexical form, lowercased
Referenced by clone().
bool freeling::word::locked [private] |
word morphological shouldn't be further modified
Referenced by clone().
std::list<word> freeling::word::multiword [private] |
empty list if not a multiword
Referenced by clone().
const wstring freeling::word::NOT_FOUND = "" [static, private] |
std::wstring freeling::word::ph_form [private] |
phonetic form
Referenced by clone().
size_t freeling::word::position [private] |
position of word in the sentence (count from 0)
Referenced by clone().
const int freeling::word::SELECTED = 0 [static, private] |
Values for word::iterator types.
Referenced by freeling::word::iterator::operator++(), and freeling::word::const_iterator::operator++().
unsigned long freeling::word::start [private] |
token span
Referenced by clone().
const int freeling::word::UNSELECTED = 1 [static, private] |
std::vector<std::wstring> freeling::word::user |
user-managed data, we just store it.
Referenced by clone().