#include <Perplexity.hh>
Public Member Functions | |
Perplexity (const LM &lm) | |
Create a new perplexity computer with a language model. | |
void | reset () |
Reset perplexity counters. | |
int | num_symbols () const |
Number of symbols. | |
int | num_words () const |
Number of words. | |
double | score () const |
Log-probability of the test data. | |
float | cross_entropy_per_word () const |
Compute cross-entropy in bits assuming that score is in log10. | |
float | add_symbol (const std::string &symbol_str) |
Add a symbol to eval set. | |
Public Attributes | |
struct { | |
std::string word_boundary_str | |
Symbol used for computing number of words. | |
std::string unk_str | |
Symbol used for unknown word. | |
} | opt |
Private Attributes | |
bool | m_start_pending |
Are we waiting the start of sentence? | |
double | m_score |
Score of the test data. | |
int | m_num_symbols |
Number of symbols in the test data (not including sentence starts). | |
int | m_num_words |
Number of words in the test data. | |
int | m_num_sentences |
Number of sentences in the test data. | |
const LM * | m_lm |
The language model used for computing the perplexity. | |
LM::Iterator | m_it |
Iterator for walking in the language model. |
|
Create a new perplexity computer with a language model.
|
|
Add a symbol to eval set.
|
|
Compute cross-entropy in bits assuming that score is in log10.
|
|
Number of symbols.
|
|
Number of words.
|
|
Reset perplexity counters.
|
|
Log-probability of the test data.
|
|
Iterator for walking in the language model.
|
|
The language model used for computing the perplexity.
|
|
Number of sentences in the test data.
|
|
Number of symbols in the test data (not including sentence starts).
|
|
Number of words in the test data.
|
|
Score of the test data.
|
|
Are we waiting the start of sentence?
|
|
|
|
Symbol used for unknown word. Empty if unk is not used. |
|
Symbol used for computing number of words. It is assumed that word boundary comes always after sentence start and before sentence end. |