FreeLing
4.0
|
Class "idioma" implements a visible Markov's model that calculates the probability that a text is in a certain language. More...
#include <idioma.h>
Public Member Functions | |
idioma (const std::wstring &) | |
Constructor, given the model file to load. | |
~idioma () | |
double | sequence_probability (std::wistream &, size_t &) const |
Calculates the probability that the text is in the instance language. | |
double | compute_perplexity (const std::wstring &) const |
Compute normalized language probability for given string. | |
std::wstring | get_language_code () const |
get iso code for current language | |
double | get_threshold () const |
get maximum allowed perplexity | |
Static Public Member Functions | |
static void | create_model (const std::wstring &modelFile, std::wistream &f, const std::wstring &code, int order, wchar_t phantom) |
Use given text file to count ngrams and create a model file. | |
Static Private Member Functions | |
static std::wstring | to_writable (wchar_t c) |
convert a char to a writable represntation for the model file | |
static std::wstring | to_writable (const std::wstring &) |
convert a ngram to a writable represntation for the model file | |
static void | initial_ngram (std::wistream &f, std::wstring &ngram, wchar_t &z, int ord, wchar_t ph) |
Initial ngram: n-1 phantom characters plus the first actual letter. | |
static void | next_ngram (std::wistream &f, std::wstring &ngram, wchar_t &z) |
slide ngram window one position to the left | |
Private Attributes | |
std::wstring | LangCode |
std::map< std::wstring, double > | count |
auxiliary for training | |
wchar_t | phantom |
char to use to create initial state n-gram | |
int | order |
order of ngram model | |
double | threshold |
maximum perplexity to accept a sequence | |
smoothingLD< std::wstring, wchar_t > * | smooth |
Class "idioma" implements a visible Markov's model that calculates the probability that a text is in a certain language.
freeling::idioma::idioma | ( | const std::wstring & | ) |
Constructor, given the model file to load.
double freeling::idioma::compute_perplexity | ( | const std::wstring & | ) | const |
Compute normalized language probability for given string.
static void freeling::idioma::create_model | ( | const std::wstring & | modelFile, |
std::wistream & | f, | ||
const std::wstring & | code, | ||
int | order, | ||
wchar_t | phantom | ||
) | [static] |
Use given text file to count ngrams and create a model file.
std::wstring freeling::idioma::get_language_code | ( | ) | const |
get iso code for current language
double freeling::idioma::get_threshold | ( | ) | const |
get maximum allowed perplexity
static void freeling::idioma::initial_ngram | ( | std::wistream & | f, |
std::wstring & | ngram, | ||
wchar_t & | z, | ||
int | ord, | ||
wchar_t | ph | ||
) | [static, private] |
Initial ngram: n-1 phantom characters plus the first actual letter.
static void freeling::idioma::next_ngram | ( | std::wistream & | f, |
std::wstring & | ngram, | ||
wchar_t & | z | ||
) | [static, private] |
slide ngram window one position to the left
double freeling::idioma::sequence_probability | ( | std::wistream & | , |
size_t & | |||
) | const |
Calculates the probability that the text is in the instance language.
static std::wstring freeling::idioma::to_writable | ( | wchar_t | c | ) | [static, private] |
convert a char to a writable represntation for the model file
static std::wstring freeling::idioma::to_writable | ( | const std::wstring & | ) | [static, private] |
convert a ngram to a writable represntation for the model file
std::map<std::wstring,double> freeling::idioma::count [private] |
auxiliary for training
std::wstring freeling::idioma::LangCode [private] |
int freeling::idioma::order [private] |
order of ngram model
wchar_t freeling::idioma::phantom [private] |
char to use to create initial state n-gram
smoothingLD<std::wstring,wchar_t>* freeling::idioma::smooth [private] |
double freeling::idioma::threshold [private] |
maximum perplexity to accept a sequence