FreeLing
4.0
|
Class analyzer is a meta class that just calls all modules in FreeLing in the right order. More...
#include <analyzer.h>
Classes | |
class | analyzer_config_options |
Class analyzer::config_options contains the configuration options that define which modules are active and which configuration files are loaded for each of them at construction time. More... | |
class | analyzer_invoke_options |
Class analyzer::invoke_options contains the options that define the behaviour of each module in the analyze on the next analysis. More... | |
Public Types | |
typedef analyzer_config_options | config_options |
typedef analyzer_invoke_options | invoke_options |
Public Member Functions | |
analyzer (const config_options &cfg) | |
void | set_current_invoke_options (const invoke_options &opt, bool check=true) |
const invoke_options & | get_current_invoke_options () const |
~analyzer () | |
void | analyze (document &doc) const |
analyze further levels on a partially analyzed document | |
void | analyze (std::list< sentence > &ls) const |
analyze further levels on partially analyzed sentences | |
void | analyze (const wstring &text, document &doc, bool parag=false) const |
analyze text as a whole document | |
void | analyze (const wstring &text, std::list< sentence > &ls, bool flush=false) |
Analyze text as a partial document. | |
void | flush_buffer (std::list< sentence > &ls) |
void | reset_offset () |
Private Member Functions | |
template<class T > | |
void | do_analysis (T &doc) const |
analyze further levels on a partially analyzed document | |
void | tokenize_split (const std::wstring &text, std::list< sentence > &ls, unsigned long &offs, std::list< word > &av, unsigned long &nsent, bool flush, splitter::session_id sp_ses) const |
Private Attributes | |
tokenizer * | tk |
splitter * | sp |
maco * | morfo |
nec * | neclass |
senses * | sens |
ukb * | dsb |
POS_tagger * | hmm |
POS_tagger * | relax |
phonetics * | phon |
chart_parser * | parser |
dep_txala * | deptxala |
dep_treeler * | deptreeler |
relaxcor * | corfc |
semgraph_extract * | sge |
analyzer_invoke_options | current_invoke_options |
splitter::session_id | sp_id |
unsigned long | offs |
unsigned long | nsentence |
std::list< word > | tokens |
Class analyzer is a meta class that just calls all modules in FreeLing in the right order.
Its construction options allow to instantiate different kinds of analysis pipelines, and or different languages. Also, invocation options may be altered at each call, tuning the analysis to each particular sentence or document needs. For a finer control, underlying modules should be called directly.
freeling::analyzer::analyzer | ( | const config_options & | cfg | ) |
--------------------------------------------- Create analyzers defined by config_options. ---------------------------------------------
References freeling::analyzer::analyzer_config_options::COREF_CorefFile, freeling::analyzer::analyzer_config_options::DEP_TreelerFile, freeling::analyzer::analyzer_config_options::DEP_TxalaFile, freeling::analyzer::analyzer_config_options::Lang, freeling::analyzer::analyzer_config_options::MACO_AffixFile, freeling::analyzer::analyzer_config_options::MACO_CompoundFile, freeling::analyzer::analyzer_config_options::MACO_Decimal, freeling::analyzer::analyzer_config_options::MACO_DictionaryFile, freeling::analyzer::analyzer_config_options::MACO_LocutionsFile, freeling::analyzer::analyzer_config_options::MACO_NPDataFile, freeling::analyzer::analyzer_config_options::MACO_ProbabilityFile, freeling::analyzer::analyzer_config_options::MACO_ProbabilityThreshold, freeling::analyzer::analyzer_config_options::MACO_PunctuationFile, freeling::analyzer::analyzer_config_options::MACO_QuantitiesFile, freeling::analyzer::analyzer_config_options::MACO_Thousand, freeling::analyzer::analyzer_config_options::MACO_UserMapFile, freeling::analyzer::analyzer_config_options::NEC_NECFile, freeling::NO_DEP, freeling::NO_TAGGER, freeling::NO_WSD, freeling::analyzer::analyzer_config_options::PARSER_GrammarFile, freeling::analyzer::analyzer_config_options::PHON_PhoneticsFile, freeling::analyzer::analyzer_config_options::SEMGRAPH_SemGraphFile, freeling::analyzer::analyzer_config_options::SENSE_ConfigFile, freeling::maco_options::set_data_files(), freeling::maco_options::set_inverse_dict(), freeling::maco_options::set_nummerical_points(), freeling::maco_options::set_threshold(), freeling::analyzer::analyzer_config_options::SPLIT_SplitterFile, freeling::analyzer::analyzer_config_options::TAGGER_ForceSelect, freeling::analyzer::analyzer_config_options::TAGGER_HMMFile, freeling::analyzer::analyzer_config_options::TAGGER_RelaxEpsilon, freeling::analyzer::analyzer_config_options::TAGGER_RelaxFile, freeling::analyzer::analyzer_config_options::TAGGER_RelaxMaxIter, freeling::analyzer::analyzer_config_options::TAGGER_RelaxScaleFactor, freeling::analyzer::analyzer_config_options::TAGGER_Retokenize, freeling::TEXT, freeling::analyzer::analyzer_config_options::TOK_TokenizerFile, and freeling::analyzer::analyzer_config_options::UKB_ConfigFile.
void freeling::analyzer::analyze | ( | document & | doc | ) | const |
analyze further levels on a partially analyzed document
References freeling::COREF, and freeling::SEMGRAPH.
void freeling::analyzer::analyze | ( | std::list< sentence > & | ls | ) | const |
analyze further levels on partially analyzed sentences
void freeling::analyzer::analyze | ( | const wstring & | text, |
document & | doc, | ||
bool | parag = false |
||
) | const |
analyze text as a whole document
void freeling::analyzer::analyze | ( | const wstring & | text, |
std::list< sentence > & | ls, | ||
bool | flush = false |
||
) |
Analyze text as a partial document.
Retain incomplete sentences in buffer in case next call completes them (except if flush==true)
void freeling::analyzer::do_analysis | ( | T & | doc | ) | const [private] |
analyze further levels on a partially analyzed document
References freeling::COREF, freeling::DEP, freeling::HMM, freeling::MORFO, freeling::NO_WSD, freeling::PARSED, freeling::RELAX, freeling::SHALLOW, freeling::TAGGED, freeling::TREELER, freeling::TXALA, and freeling::UKB.
void freeling::analyzer::flush_buffer | ( | std::list< sentence > & | ls | ) |
const analyzer::invoke_options & freeling::analyzer::get_current_invoke_options | ( | ) | const |
void freeling::analyzer::reset_offset | ( | ) |
void freeling::analyzer::set_current_invoke_options | ( | const invoke_options & | opt, |
bool | check = true |
||
) |
References freeling::COREF, freeling::DEP, freeling::analyzer::analyzer_invoke_options::DEP_which, ERROR_CRASH, freeling::HMM, freeling::analyzer::analyzer_invoke_options::InputLevel, freeling::analyzer::analyzer_invoke_options::MACO_AffixAnalysis, freeling::analyzer::analyzer_invoke_options::MACO_CompoundAnalysis, freeling::analyzer::analyzer_invoke_options::MACO_DatesDetection, freeling::analyzer::analyzer_invoke_options::MACO_DictionarySearch, freeling::analyzer::analyzer_invoke_options::MACO_MultiwordsDetection, freeling::analyzer::analyzer_invoke_options::MACO_NERecognition, freeling::analyzer::analyzer_invoke_options::MACO_NumbersDetection, freeling::analyzer::analyzer_invoke_options::MACO_ProbabilityAssignment, freeling::analyzer::analyzer_invoke_options::MACO_PunctuationDetection, freeling::analyzer::analyzer_invoke_options::MACO_QuantitiesDetection, freeling::analyzer::analyzer_invoke_options::MACO_RetokContractions, freeling::analyzer::analyzer_invoke_options::MACO_UserMap, freeling::MORFO, freeling::analyzer::analyzer_invoke_options::NEC_NEClassification, freeling::NO_DEP, freeling::NO_TAGGER, freeling::NO_WSD, freeling::analyzer::analyzer_invoke_options::OutputLevel, freeling::PARSED, freeling::analyzer::analyzer_invoke_options::PHON_Phonetics, freeling::RELAX, freeling::SEMGRAPH, freeling::analyzer::analyzer_invoke_options::SENSE_WSD_which, freeling::SHALLOW, freeling::SPLITTED, freeling::TAGGED, freeling::analyzer::analyzer_invoke_options::TAGGER_which, freeling::TEXT, freeling::TOKEN, freeling::TREELER, freeling::TXALA, freeling::UKB, and WARNING.
void freeling::analyzer::tokenize_split | ( | const std::wstring & | text, |
std::list< sentence > & | ls, | ||
unsigned long & | offs, | ||
std::list< word > & | av, | ||
unsigned long & | nsent, | ||
bool | flush, | ||
splitter::session_id | sp_ses | ||
) | const [private] |
References int2wstring, freeling::SPLITTED, and freeling::TOKEN.
relaxcor* freeling::analyzer::corfc [private] |
dep_treeler* freeling::analyzer::deptreeler [private] |
dep_txala* freeling::analyzer::deptxala [private] |
ukb* freeling::analyzer::dsb [private] |
POS_tagger* freeling::analyzer::hmm [private] |
maco* freeling::analyzer::morfo [private] |
nec* freeling::analyzer::neclass [private] |
unsigned long freeling::analyzer::nsentence [private] |
unsigned long freeling::analyzer::offs [private] |
chart_parser* freeling::analyzer::parser [private] |
phonetics* freeling::analyzer::phon [private] |
POS_tagger* freeling::analyzer::relax [private] |
senses* freeling::analyzer::sens [private] |
semgraph_extract* freeling::analyzer::sge [private] |
splitter* freeling::analyzer::sp [private] |
tokenizer* freeling::analyzer::tk [private] |
std::list<word> freeling::analyzer::tokens [private] |