FreeLing
4.0
|
Class util implements some utilities for NLP analyzers: "tolower" for latin alfabets, tags manipulation, wstring2number and viceversa conversions, etc. More...
#include <util.h>
Public Member Functions | |
template<> | |
std::wstring | wstring_from (const long double &x) |
Convert a long double to wstring, removing trailing zeros. | |
template<> | |
std::wstring | wstring_from (const std::string &s) |
Convert a string (possibly with utf8 chars) to a wstring. | |
template<> | |
std::wstring | wstring_from (const char *cp) |
Convert a char* (possibly with utf8 chars) to a wstring. | |
Static Public Member Functions | |
static void | init_locale (const std::wstring &s=L"default") |
Init the locale of the program, to properly handle unicode. | |
static void | open_utf8_file (std::wifstream &, const std::wstring &) |
open an UTF8 file for reading | |
static void | open_utf8_file (std::wofstream &, const std::wstring &) |
open an UTF8 file for writting | |
static std::wstring | lowercase (const std::wstring &) |
Lowercase a wstring, even with latin characters. | |
static std::wstring | uppercase (const std::wstring &) |
uppercase a wstring, even with latin characters | |
static bool | is_absolute (const std::string &p) |
check if given path is absolute, | |
static bool | is_absolute (const std::wstring &p) |
check if given path is absolute, | |
static std::string | get_current_path () |
filename management: get current working directory | |
static std::string | absolute (const std::string &, const std::string &) |
filename management: absolutize a maybe relative path | |
static std::wstring | absolute (const std::wstring &, const std::wstring &) |
filename management: absolutize a maybe relative path | |
static std::string | expand_filename (const std::string &) |
filename management: expand environment variables in a path | |
static std::wstring | expand_filename (const std::wstring &) |
filename management: expand environment variables in a path | |
static std::wstring | new_tempfile_name () |
filename management: get unique tempfile name | |
static std::wstring | remove_chars (const std::wstring &, const std::wstring &) |
remove occurrences of given chars | |
static void | find_and_replace (std::wstring &, const std::wstring &, const std::wstring &) |
wstring handling | |
static int | wstring2int (const std::wstring &) |
conversion utilities | |
static double | wstring2double (const std::wstring &) |
static long double | wstring2longdouble (const std::wstring &) |
template<class C > | |
static std::wstring | wstring_from (const C &, const std::wstring &) |
Convert a set/vector/list<T> into a wstring with separators. | |
template<class C > | |
static std::wstring | wstring_from (const C &) |
Convert a number (int, double) to wstring. | |
template<class C > | |
static std::wstring | wstring_from (const C *) |
template<class C > | |
static C | wstring_to (const std::wstring &, const std::wstring &, bool mcsep=true) |
Convert a wstring with separators into a set/vector/list<wstring>. | |
template<class C > | |
static C | wstring_to (const std::wstring &) |
Convert a wstring to int/double/longdouble. | |
template<class P1 , class P2 > | |
static std::wstring | pairlist2wstring (const std::list< std::pair< P1, P2 > > &, const std::wstring &, const std::wstring &) |
Create a single wstring concatenatig all elements in given list with given separators (one for list elements, one for pair elements) | |
template<class P1 , class P2 > | |
static std::list< std::pair < P1, P2 > > | wstring2pairlist (const std::wstring &, const std::wstring &, const std::wstring &) |
Built a list of pairs from given string and separators. | |
static int | capitalization (const std::wstring &) |
Find out capitalizatin pattern: AAAA vs Aaaaa. | |
static std::wstring | capitalize (const std::wstring &, int, bool) |
Format a string to the specified capitalization pattern. | |
template<class T1 , class T2 > | |
static bool | ascending_first (const std::pair< T1, T2 > &, const std::pair< T1, T2 > &) |
sorting criteria for lists of pairs | |
template<class T1 , class T2 > | |
static bool | ascending_second (const std::pair< T1, T2 > &, const std::pair< T1, T2 > &) |
Sort lists of pairs by ascending second component. | |
template<class T1 , class T2 > | |
static bool | descending_first (const std::pair< T1, T2 > &, const std::pair< T1, T2 > &) |
Sort lists of pairs by descending first component. | |
template<class T1 , class T2 > | |
static bool | descending_second (const std::pair< T1, T2 > &, const std::pair< T1, T2 > &) |
Sort lists of pairs by descending second component. | |
Static Public Attributes | |
static regexp | RE_has_lowercase |
useful regexps | |
static regexp | RE_has_alphanum |
static regexp | RE_is_capitalized |
static regexp | RE_all_digits |
static regexp | RE_all_caps |
static regexp | RE_initial_dot |
static regexp | RE_all_caps_dot |
static regexp | RE_capitalized_dot |
static regexp | RE_has_digits |
static regexp | RE_lowercase_dot |
static regexp | RE_win_absolute_path |
Class util implements some utilities for NLP analyzers: "tolower" for latin alfabets, tags manipulation, wstring2number and viceversa conversions, etc.
static std::string freeling::util::absolute | ( | const std::string & | , |
const std::string & | |||
) | [static] |
filename management: absolutize a maybe relative path
Referenced by freeling::alternatives::alternatives(), freeling::bioner::bioner(), freeling::hmm_tagger::hmm_tagger(), freeling::locutions::locutions(), freeling::probabilities::probabilities(), and freeling::ukb::ukb().
static std::wstring freeling::util::absolute | ( | const std::wstring & | , |
const std::wstring & | |||
) | [static] |
filename management: absolutize a maybe relative path
bool freeling::util::ascending_first | ( | const std::pair< T1, T2 > & | p1, |
const std::pair< T1, T2 > & | p2 | ||
) | [inline, static] |
sorting criteria for lists of pairs
Sort lists of pairs by ascending first component.
bool freeling::util::ascending_second | ( | const std::pair< T1, T2 > & | p1, |
const std::pair< T1, T2 > & | p2 | ||
) | [inline, static] |
Sort lists of pairs by ascending second component.
int freeling::util::capitalization | ( | const std::wstring & | ) | [static] |
Find out capitalizatin pattern: AAAA vs Aaaaa.
References UPPER_1ST, UPPER_ALL, and UPPER_NONE.
Referenced by freeling::affixes::ApplyRule().
wstring freeling::util::capitalize | ( | const std::wstring & | , |
int | , | ||
bool | |||
) | [static] |
Format a string to the specified capitalization pattern.
References UPPER_1ST, and UPPER_ALL.
Referenced by freeling::affixes::CheckRetokenizable().
bool freeling::util::descending_first | ( | const std::pair< T1, T2 > & | p1, |
const std::pair< T1, T2 > & | p2 | ||
) | [inline, static] |
Sort lists of pairs by descending first component.
bool freeling::util::descending_second | ( | const std::pair< T1, T2 > & | p1, |
const std::pair< T1, T2 > & | p2 | ||
) | [inline, static] |
Sort lists of pairs by descending second component.
static std::string freeling::util::expand_filename | ( | const std::string & | ) | [static] |
filename management: expand environment variables in a path
static std::wstring freeling::util::expand_filename | ( | const std::wstring & | ) | [static] |
filename management: expand environment variables in a path
void freeling::util::find_and_replace | ( | std::wstring & | , |
const std::wstring & | , | ||
const std::wstring & | |||
) | [static] |
wstring handling
Replace all occurrences of s in t by r.
string freeling::util::get_current_path | ( | ) | [static] |
void freeling::util::init_locale | ( | const std::wstring & | s = L"default" | ) | [static] |
Init the locale of the program, to properly handle unicode.
Init the locale of the program.
If no parameter given, the default locale en_US.utf8 is used. If "system" is specified, the system locale is used. Otherwise, the given locale is used. In any case the selected locale is used only for alphanumerical functions (utf8 encoding, tolower, isalpha, etc) Note that for FreeLing to work with UTF8 texts, the locale must be set to some UTF-8 locale (e.g "en_US.utf8") installed in the system.
Init useful regexps
References freeling::current_locale, DEFAULT_LOCALE, ERROR_CRASH, string2wstring, and wstring2string.
static bool freeling::util::is_absolute | ( | const std::string & | p | ) | [static] |
check if given path is absolute,
static bool freeling::util::is_absolute | ( | const std::wstring & | p | ) | [static] |
check if given path is absolute,
wstring freeling::util::lowercase | ( | const std::wstring & | ) | [static] |
Lowercase a wstring, even with latin characters.
Lowercase an string, possibly with accents.
References freeling::current_locale.
Referenced by freeling::alternatives::alternatives(), freeling::bioner::bioner(), freeling::ukb::init_synset_vector(), and freeling::np::np().
std::wstring freeling::util::new_tempfile_name | ( | ) | [inline, static] |
filename management: get unique tempfile name
Return a hopefully unique name for a temporary file.
References err_type, ERROR_CRASH, NEW_TMPNAME, TMPNAME_FAILED, TMPNAME_PREFIX, and wstring_from().
Referenced by freeling::alternatives::alternatives().
static void freeling::util::open_utf8_file | ( | std::wifstream & | , |
const std::wstring & | |||
) | [static] |
open an UTF8 file for reading
Referenced by freeling::affixes::affixes(), freeling::alternatives::alternatives(), freeling::csr_kb::csr_kb(), freeling::config_file::open(), and freeling::phd< T >::phd().
static void freeling::util::open_utf8_file | ( | std::wofstream & | , |
const std::wstring & | |||
) | [static] |
open an UTF8 file for writting
std::wstring freeling::util::pairlist2wstring | ( | const std::list< std::pair< P1, P2 > > & | ls, |
const std::wstring & | sep_pair, | ||
const std::wstring & | sep_list | ||
) | [inline, static] |
Create a single wstring concatenatig all elements in given list with given separators (one for list elements, one for pair elements)
Referenced by freeling::tagset::get_msd_string().
wstring freeling::util::remove_chars | ( | const std::wstring & | , |
const std::wstring & | |||
) | [static] |
remove occurrences of given chars
Auxiliar function: delete from text any char present in clist.
wstring freeling::util::uppercase | ( | const std::wstring & | ) | [static] |
uppercase a wstring, even with latin characters
Uppercase an string, possibly with accents.
References freeling::current_locale.
Referenced by freeling::tagset::compute_msd_features(), and freeling::tagset::tagset().
static double freeling::util::wstring2double | ( | const std::wstring & | ) | [static] |
static int freeling::util::wstring2int | ( | const std::wstring & | ) | [static] |
conversion utilities
Referenced by freeling::tagset::tagset().
static long double freeling::util::wstring2longdouble | ( | const std::wstring & | ) | [static] |
std::list< std::pair< P1, P2 > > freeling::util::wstring2pairlist | ( | const std::wstring & | s, |
const std::wstring & | sep_pair, | ||
const std::wstring & | sep_list | ||
) | [inline, static] |
Built a list of pairs from given string and separators.
Referenced by freeling::tagset::get_msd_features().
std::wstring freeling::util::wstring_from | ( | const C & | ls, |
const std::wstring & | sep | ||
) | [inline, static] |
Convert a set/vector/list<T> into a wstring with separators.
Referenced by new_tempfile_name().
std::wstring freeling::util::wstring_from | ( | const C & | x | ) | [inline, static] |
Convert a number (int, double) to wstring.
static std::wstring freeling::util::wstring_from | ( | const C * | ) | [static] |
std::wstring freeling::util::wstring_from | ( | const long double & | x | ) | [inline] |
Convert a long double to wstring, removing trailing zeros.
std::wstring freeling::util::wstring_from | ( | const std::string & | s | ) | [inline] |
Convert a string (possibly with utf8 chars) to a wstring.
References WARNING.
std::wstring freeling::util::wstring_from | ( | const char * | cp | ) | [inline] |
Convert a char* (possibly with utf8 chars) to a wstring.
C freeling::util::wstring_to | ( | const std::wstring & | ws, |
const std::wstring & | sep, | ||
bool | mcsep = true |
||
) | [inline, static] |
Convert a wstring with separators into a set/vector/list<wstring>.
If mcsep=true, "sep" is treated as a single (multichar) separator If mcsep=false, "sep" is treated as a set of possible separator chars.
std::string freeling::util::wstring_to | ( | const std::wstring & | ws | ) | [inline, static] |
Convert a wstring to int/double/longdouble.
Convert a wstring to a string (possibly with utf8 chars)
Convert a wstring (likely of 1 character) to wchar_t.
Referenced by freeling::punts::analyze().
useful regexps
Create useful regexps.
Referenced by freeling::np::np().