summarizer class summarizes a document using the lexical chains method. More...

#include <summarizer.h>

Collaboration diagram for freeling::summarizer:

Public Types
enum	Heuristics { FIRST_WORD, FIRST_MOST_WEIGHT, WEIGHT_SUM }
Public Member Functions
	summarizer (const std::wstring &datFile)
	Constructor.
	~summarizer ()
	Destructor.
std::list< const freeling::sentence * >	summarize (const freeling::document &doc, int num_words) const
	Summarizes a document and returns the list of sentences that composes the summary.
Private Member Functions
std::map< std::wstring, std::list< lexical_chain > >	build_lexical_chains (const freeling::document &doc) const
	Builds all the lexical chains.
void	remove_one_word_lexical_chains (std::map< std::wstring, std::list< lexical_chain >> &chains) const
	Remove the lexical chains with only one word.
void	remove_weak_lexical_chains (std::map< std::wstring, std::list< lexical_chain >> &chains) const
	Remove the lexical chains which does not satisfy the strength criterion.
void	print_lexical_chains (std::map< std::wstring, std::list< lexical_chain >> &chains) const
	Print the lexical chains. Only for debugging.
int	count_occurences (const freeling::word &w, const freeling::document &doc) const
	Counts the number of occurences of the word w in the document doc.
double	average_scores (std::map< std::wstring, std::list< lexical_chain > > &chains_type) const
	Computes and returns the average scores of the lexical chains.
double	standard_deviation_scores (std::map< std::wstring, std::list< lexical_chain > > &chains_type, const double avg) const
	Computes and returns the standard deviation of the lexical chains scores.
std::list< lexical_chain >	map_to_lists (std::map< std::wstring, std::list< lexical_chain > > &chains_type) const
	Concatenate all the lists in the map chains_type into a single list.
void	compute_sentence (const std::list< word_pos > &wps, std::list< word_pos > &wp_list, std::set< const freeling::sentence * > &sent_set, int &acc_n_words, int num_words) const
	Auxiliar function for first_word and first_most_weighted_word function.
std::list< word_pos >	first_word (std::map< std::wstring, std::list< lexical_chain > > &chains_type, int num_words) const
	Returns the list of sentences embedded in a word_pos struct which composes the summary using the heuristic FirstWord.
std::list< word_pos >	first_most_weighted_word (std::map< std::wstring, std::list< lexical_chain > > &chains, int num_words) const
	Returns the list of sentences embedded in a word_pos struct which composes the summary using the heuristic FirstMostWeightedWord.
std::list< word_pos >	sum_of_chain_weights (std::map< std::wstring, std::list< lexical_chain > > &chains, int num_words) const
	Returns the list of sentences embedded in a word_pos struct which composes the summary using the heuristic SumOfChainWeights.
Private Attributes
bool	remove_used_lexical_chains
	If true, the used lexical_chains will be removed.
bool	only_strong
	If true, the summarizer will use only strong chains.
int	hypernymy_depth
	Maximum hypernymy depth.
double	alpha
	Parameter to compute the homogeinity index in the hypernymy relation.
std::wstring	semdb_path
	Path to the semantic DB.
std::set< relation * >	used_relations
	A set with the relations that will be used.
Heuristics	heuristic
	A string that indicates the heuristic that will be used.

Detailed Description

summarizer class summarizes a document using the lexical chains method.

Member Enumeration Documentation

enum freeling::summarizer::Heuristics

Enumerator:

FIRST_WORD
FIRST_MOST_WEIGHT
WEIGHT_SUM

Constructor & Destructor Documentation

freeling::summarizer::summarizer ( const std::wstring & datFile )

Constructor.

freeling::summarizer::~summarizer ( )

Destructor.

Member Function Documentation

double freeling::summarizer::average_scores ( std::map< std::wstring, std::list< lexical_chain > > & chains_type ) const [private]

Computes and returns the average scores of the lexical chains.

std::map<std::wstring, std::list<lexical_chain> > freeling::summarizer::build_lexical_chains ( const freeling::document & doc ) const [private]

Builds all the lexical chains.

void freeling::summarizer::compute_sentence	(	const std::list< word_pos > &	wps,
		std::list< word_pos > &	wp_list,
		std::set< const freeling::sentence * > &	sent_set,
		int &	acc_n_words,
		int	num_words
	)		const `[private]`

Auxiliar function for first_word and first_most_weighted_word function.

Computes a sentence to include it in the summary or not.

int freeling::summarizer::count_occurences	(	const freeling::word &	w,
		const freeling::document &	doc
	)		const `[private]`

Counts the number of occurences of the word w in the document doc.

std:: list<word_pos> freeling::summarizer::first_most_weighted_word	(	std::map< std::wstring, std::list< lexical_chain > > &	chains,
		int	num_words
	)		const `[private]`

Returns the list of sentences embedded in a word_pos struct which composes the summary using the heuristic FirstMostWeightedWord.

std::list<word_pos> freeling::summarizer::first_word	(	std::map< std::wstring, std::list< lexical_chain > > &	chains_type,
		int	num_words
	)		const `[private]`

Returns the list of sentences embedded in a word_pos struct which composes the summary using the heuristic FirstWord.

std::list<lexical_chain> freeling::summarizer::map_to_lists ( std::map< std::wstring, std::list< lexical_chain > > & chains_type ) const [private]

Concatenate all the lists in the map chains_type into a single list.

void freeling::summarizer::print_lexical_chains ( std::map< std::wstring, std::list< lexical_chain >> & chains ) const [private]

Print the lexical chains. Only for debugging.

void freeling::summarizer::remove_one_word_lexical_chains ( std::map< std::wstring, std::list< lexical_chain >> & chains ) const [private]

Remove the lexical chains with only one word.

void freeling::summarizer::remove_weak_lexical_chains ( std::map< std::wstring, std::list< lexical_chain >> & chains ) const [private]

Remove the lexical chains which does not satisfy the strength criterion.

double freeling::summarizer::standard_deviation_scores	(	std::map< std::wstring, std::list< lexical_chain > > &	chains_type,
		const double	avg
	)		const `[private]`

Computes and returns the standard deviation of the lexical chains scores.

std::list<word_pos> freeling::summarizer::sum_of_chain_weights	(	std::map< std::wstring, std::list< lexical_chain > > &	chains,
		int	num_words
	)		const `[private]`