ArpaReader.hh

Go to the documentation of this file.
00001 #ifndef ARPAREADER_HH
00002 #define ARPAREADER_HH
00003 
00004 #include "SymbolMap.hh"
00005 
00006 namespace bit {
00007 
00018   class ArpaReader {
00019   public:
00021     ArpaReader(FILE *file = NULL);
00022 
00024     void reset(FILE *file);
00025 
00027     void read_header();
00028 
00035     bool read_ngram();
00036 
00040     bool read_order_ngrams(bool sort = false);
00041 
00043     struct Options {
00046       bool throw_header_mismatch;
00047 
00049       bool show_progress;
00050 
00052       std::vector<std::string> ignore_symbols;
00053     } opt;
00054 
00056     struct Header {
00057       int order; 
00058       int num_ngrams_total; 
00059 
00061       std::vector<int> num_ngrams;
00062     } header;
00063 
00065     struct Ngram {
00066       std::vector<int> symbols; 
00067       float log_prob; 
00068       float backoff; 
00069       bool operator<(const Ngram &ngram) const 
00070       {
00071         size_t i = 0;
00072         while (1) {
00073           if (i == symbols.size() && i == ngram.symbols.size())
00074             return false;
00075           if (i == symbols.size())
00076             return true;
00077           if (i == ngram.symbols.size())
00078             return false;
00079           if (symbols[i] < ngram.symbols[i])
00080             return true;
00081           if (ngram.symbols[i] < symbols[i])
00082             return false;
00083           i++;
00084         }
00085       }
00086     } ngram;
00087 
00088     std::vector<Ngram> order_ngrams; 
00089     std::vector<int> sorted_order; 
00090 
00091     SymbolMap<std::string, int> symbol_map; 
00092     bool end_reached; 
00093 
00094   private:
00095     FILE *m_file; 
00096     int m_current_order; 
00097     int m_ngrams_read; 
00098   };
00099 
00100 };
00101 
00102 #endif /* ARPAREADER_HH */

Generated on Mon Jan 8 15:51:03 2007 for bit by  doxygen 1.4.6