1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
//! N-gram language model with Modified Kneser-Ney smoothing.
//!
//! This module provides a complete n-gram language model implementation that uses
//! liblevenshtein-rust's dictionary backends for efficient storage and retrieval.
//!
//! # Overview
//!
//! The n-gram model supports:
//! - Orders 1-5 (unigrams through 5-grams)
//! - Modified Kneser-Ney smoothing for probability estimation
//! - Streaming corpus training with Rayon parallelism
//! - Efficient probability queries via trie navigation
//!
//! # Key Encoding
//!
//! N-gram keys can be encoded in two ways:
//!
//! 1. **Legacy (pipe-separated)**: Simple `"the|quick|brown"` encoding. Deprecated
//! because it can corrupt data if tokens contain `|`.
//!
//! 2. **Vocabulary-indexed (varint)**: Each word maps to a u64 index, encoded as
//! LEB128 varint bytes stored as Latin-1 characters. This produces compact keys
//! and supports unlimited vocabulary size. See [`vocabulary`] module.
//!
//! # Dictionary Backend Type Aliases
//!
//! Two type aliases are provided for common use cases:
//!
//! - [`SerializableNgramModel`]: Uses `DynamicDawgChar` backend for models that need
//! to be saved/loaded. This backend supports full serde serialization.
//!
//! - [`PathMapNgramModel`]: Uses `PathMapDictionary` backend for integration with
//! lling-llang's shared lattice architecture. This backend does NOT support serde
//! serialization but provides better memory sharing characteristics.
//!
//! # Example
//!
//! ```ignore
//! use libgrammstein::ngram::NgramModel;
//! use libgrammstein::corpus::PlaintextReader;
//!
//! let reader = PlaintextReader::from_directory("corpus/")?;
//! let model = NgramModel::train(reader, 3)?; // trigram model
//!
//! let log_prob = model.log_prob("fox", &["quick", "brown"]);
//! ```
pub use ;
pub use ;
pub use NgramModel;
pub use ;
pub use ;
pub use ;
pub use ;
pub use ;
pub use ;
// Dictionary backend type aliases for common use cases
/// Serializable n-gram model using DynamicDawgChar backend.
///
/// Use this when you need to save/load models to/from disk.
/// This backend supports full serde serialization.
///
/// # Example
///
/// ```ignore
/// use libgrammstein::ngram::SerializableNgramModel;
/// use liblevenshtein::dictionary::dynamic_dawg_char::DynamicDawgChar;
///
/// // Train and save
/// let dictionary = DynamicDawgChar::<NgramEntry>::new();
/// let model = TrainerBuilder::new(dictionary).order(5).train(reader)?;
/// model.save("model.bin")?;
///
/// // Load later
/// let model: SerializableNgramModel = SerializableNgramModel::load("model.bin")?;
/// ```
pub type SerializableNgramModel =
;
/// Memory-efficient n-gram model using PathMapDictionary backend.
///
/// Use this for lling-llang integration with shared lattice structures.
/// This backend does NOT support serde serialization but provides
/// better memory sharing characteristics.
///
/// # Example
///
/// ```ignore
/// use libgrammstein::ngram::PathMapNgramModel;
/// use liblevenshtein::dictionary::pathmap::PathMapDictionary;
///
/// let dictionary = PathMapDictionary::<NgramEntry>::new();
/// let model = TrainerBuilder::new(dictionary).order(5).train(reader)?;
///
/// // Use with lling-llang's LanguageModelLayer
/// let lm = GrammsteinLanguageModel::from_ngram(model);
/// ```
pub type PathMapNgramModel =
;