#include <LM.hh>
Public Types | |
typedef Trie< CompressedArray > | Trie |
Internal Trie type. | |
typedef Trie::Iterator | Iterator |
Iterator type. | |
typedef SymbolMap< std::string, int > | SymbolMap |
Type for symbol map. | |
Public Member Functions | |
LM () | |
Default constructor. | |
void | reset () |
Reset the model to initial state. | |
unsigned int | order () const |
Order of the model. | |
u64 | size () const |
The number of bytes required to store all bit-buffers. | |
const FloatArray & | score_array (unsigned int level) const |
Access to score arrays. | |
const FloatArray & | backoff_array (unsigned int level) const |
Access to backoff arrays. | |
const CompressedArray & | symbol_array (unsigned int level) const |
Access to symbol arrays of the trie. | |
const CompressedArray & | pointer_array (unsigned int level) const |
Access to pointer arrays of the trie. | |
const CompressedArray & | child_limit_array (unsigned int level) const |
Access to child limit arrays of the trie. | |
void | read_arpa (FILE *file, const std::string &sentence_start_str="<s>", const std::string &sentence_end_str="</s>", bool verbose=false) |
Read language model from file in ARPA format. | |
void | write_arpa (FILE *file) const |
Write the model in ARPA format. | |
void | write (FILE *file) const |
Write the model in binary format. | |
void | read (FILE *file) |
Read the model from file stored in binary format. | |
void | linear_quantization (unsigned int bits) |
Quantize all floats linearly. | |
void | compress_trie (unsigned int level) |
Compress the arrays of the trie on the given level. | |
void | compress_trie () |
Compress all levels of the trie. | |
void | uncompress_trie (unsigned int level) |
Uncompress one level of the trie. | |
void | uncompress_trie () |
Uncompress the trie. | |
void | separate_leafs (unsigned int level) |
Separate leafs of the trie on a given level, and modify the backoff array accordingly. | |
void | unseparate_leafs (unsigned int level) |
Unseparate leafs (and remove possible compression) of the trie on a given level, and modify the backoff array accordingly. | |
void | insert_ngram (const std::vector< int > &ngram, float score, float backoff) |
Insert a new ngram to the model. | |
void | insert_ngram (const std::string &str, float score, float backoff) |
Insert a new ngram to the model allowing to insert new symbols. | |
void | set_start_symbol (const std::string &str) |
Set sentence start symbol and add the string in symbol mapping if not there already. | |
void | set_end_symbol (const std::string &str) |
Set sentence end symbol and add the string in symbol mapping if not there already. | |
int | start_symbol () const |
The symbol starting the sentence. | |
int | end_symbol () const |
The symbol ending the sentence. | |
const SymbolMap & | symbol_map () const |
The mapping between symbols and strings. | |
template<class T> | |
std::string | ngram_str (const std::vector< T > &vec) const |
Printable string of a ngram. | |
Iterator | root () const |
Trie iterator pointing to the root. | |
float | backoff (const Iterator &it) const |
Backoff weight at the iterator position. | |
float | backoff (unsigned int level, u64 index) const |
Backoff weight from a given level. | |
float | score (const Iterator &it) const |
Probability score at the iterator position. | |
float | walk (Iterator &it, int symbol) const |
Walk iterator to given symbol backoffing if necessary. | |
Private Member Functions | |
int | compare_ngrams (const std::vector< int > &a, const std::vector< int > &b) |
Compare two ngrams. | |
Private Attributes | |
SymbolMap | m_symbol_map |
Mapping between model symbols strings and symbols. | |
int | m_start_symbol |
Symbol corresponding to the sentence start symbol. | |
int | m_end_symbol |
Symbol corresponding to the sentence end symbol. | |
Trie | m_trie |
The internal Trie structure. | |
std::vector< FloatArray > | m_backoff_arrays |
Arrays containing backoff weights for each n-gram level. | |
std::vector< FloatArray > | m_score_arrays |
Arrays containing probability scores for each n-gram level. | |
std::vector< int > | m_previous_ngram |
Previous ngram inserted in the model. |
Note that the backoff weight must be zero for ngrams that do not have children.
|
Iterator type.
|
|
Type for symbol map.
|
|
Internal Trie type.
|
|
Default constructor.
|
|
Backoff weight from a given level.
|
|
Backoff weight at the iterator position.
|
|
Access to backoff arrays.
|
|
Access to child limit arrays of the trie.
|
|
Compare two ngrams.
|
|
Compress all levels of the trie.
|
|
Compress the arrays of the trie on the given level.
|
|
The symbol ending the sentence.
|
|
Insert a new ngram to the model allowing to insert new symbols.
|
|
Insert a new ngram to the model.
|
|
Quantize all floats linearly. Does nothing on levels that are already quantized.
|
|
Printable string of a ngram.
|
|
Order of the model.
|
|
Access to pointer arrays of the trie.
|
|
Read the model from file stored in binary format.
|
|
Read language model from file in ARPA format.
|
|
Reset the model to initial state.
|
|
Trie iterator pointing to the root.
|
|
Probability score at the iterator position.
|
|
Access to score arrays.
|
|
Separate leafs of the trie on a given level, and modify the backoff array accordingly.
|
|
Set sentence end symbol and add the string in symbol mapping if not there already.
|
|
Set sentence start symbol and add the string in symbol mapping if not there already.
|
|
The number of bytes required to store all bit-buffers.
|
|
The symbol starting the sentence.
|
|
Access to symbol arrays of the trie.
|
|
The mapping between symbols and strings.
|
|
Uncompress the trie.
|
|
Uncompress one level of the trie.
|
|
Unseparate leafs (and remove possible compression) of the trie on a given level, and modify the backoff array accordingly.
|
|
Walk iterator to given symbol backoffing if necessary. It is also ensured that the resulting iterator has children, backoffing if necessary.
|
|
Write the model in binary format.
|
|
Write the model in ARPA format.
|
|
Arrays containing backoff weights for each n-gram level.
|
|
Symbol corresponding to the sentence end symbol.
|
|
Previous ngram inserted in the model.
|
|
Arrays containing probability scores for each n-gram level.
|
|
Symbol corresponding to the sentence start symbol.
|
|
Mapping between model symbols strings and symbols.
|
|
The internal Trie structure.
|