#include <SimpleKneser.hh>
Public Types | |
typedef SymbolMap< std::string, int > | SymbolMap |
typedef std::vector< int > | Ngram |
typedef std::vector< std::string > | StrNgram |
typedef std::vector< int > | IntVec |
typedef std::vector< float > | FloatVec |
typedef std::map< Ngram, int > | IntMap |
typedef std::map< Ngram, float > | FloatMap |
typedef std::map< Ngram, FloatInt > | FloatIntMap |
Public Member Functions | |
SimpleKneser () | |
SimpleKneser (const std::string &str) | |
void | init () |
void | set_discounts (const std::string &str) |
float | get_discount (unsigned int order) const |
float | get_beta_discount (unsigned int order) const |
Ngram | ngram (const std::string &str) const |
int | get_count (const Ngram &ngram) const |
int | get_sum_nonzero_xg (const Ngram &ngram) const |
int | get_sum_nonzero_xgx (const Ngram &ngram) const |
int | get_sum_nonzero_gx (const Ngram &ngram) const |
int | get_sum_xg_not_pruned (const Ngram &ngram) const |
int | get_sum_nonzero_xg_not_pruned (const Ngram &ngram) const |
int | get_sum_nonzero_gx_not_pruned (const Ngram &ngram) const |
float | get_d1 (const Ngram &ngram) const |
float | get_d2 (const Ngram &ngram) const |
const FloatInt & | get_d2_pair (const Ngram &ngram) const |
FloatInt & | get_d2_pair (const Ngram &ngram) |
float | get_beta_numerator (const Ngram &ngram) const |
float | get_beta_denominator (const Ngram &ngram) const |
void | add_count (const Ngram &ngram, int count) |
void | read_counts (FILE *file) |
void | compute_modified_counts () |
float | ngram_prob (Ngram ngram) const |
void | compute_d1 () |
void | compute_d2 () |
void | compute_beta_denominator () |
bool | is_pruned (const Ngram &ngram) const |
void | prune_ngram (Ngram ngram) |
Prune ngram (and possible children), and modify d2 measure for the parent ngrams. | |
void | prune (float threshold) |
Ngram | parent (const Ngram &ngram) const |
Ngram | backoff (const Ngram &ngram) const |
float | inter_kn (const Ngram &ngram) const |
float | prob_kn_lower (Ngram ngram) const |
float | prob_kn (const Ngram &ngram) const |
float | inter_beta (const Ngram &ngram) const |
float | prob_beta_lower (Ngram ngram) const |
float | prob_beta (const Ngram &ngram) const |
std::string | ngram_str (const Ngram &ngram) |
void | write_beta_arpa (FILE *file) |
Private Attributes | |
std::string | m_sentence_start_str |
int | m_sentence_start_id |
int | m_num_events |
int | m_progress_skip |
SymbolMap | m_symbol_map |
FloatVec | m_discounts |
FloatVec | m_beta_discounts |
IntMap | m_counts |
IntMap | m_sum_nonzero_xg |
IntMap | m_sum_nonzero_xgx |
IntMap | m_sum_nonzero_gx |
IntMap | m_sum_xg_not_pruned |
IntMap | m_sum_nonzero_xg_not_pruned |
IntMap | m_sum_nonzero_gx_not_pruned |
FloatMap | m_beta_denominator |
FloatMap | m_d1 |
FloatIntMap | m_d2 |
Classes | |
struct | FloatInt |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Prune ngram (and possible children), and modify d2 measure for the parent ngrams. The pruned ngrams are marked by setting the integer denominator of d2 to zero.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|