1pub mod advanced_features;
29pub mod advanced_quant;
30pub mod batch;
31pub mod compat;
32mod continuous;
33pub mod domain_specific;
34pub mod enhanced_multiscale;
35pub mod entropy;
36mod error;
37pub mod gpu_quant;
38pub mod metrics;
39mod mulaw;
40mod multiscale;
41#[cfg(feature = "vqvae")]
42pub mod neural_codec;
43pub mod persistence;
44pub mod pretraining;
45pub mod profiling;
46mod quantizer;
47pub mod serde_utils;
48pub mod simd_quant;
49pub mod specialized;
50pub mod transformer;
51pub mod types;
52pub mod utils;
53
54#[cfg(feature = "vqvae")]
55pub mod vqvae;
56
57pub use continuous::{
58 ContinuousTokenizer, ReconstructionMetrics, TrainableContinuousTokenizer, TrainingConfig,
59};
60pub use error::{TokenizerError, TokenizerResult};
61pub use mulaw::MuLawCodec;
62pub use multiscale::{
63 MultiScaleTokenizer, PoolMethod, PyramidTokenizer, ScaleLevel, UpsampleMethod,
64};
65pub use quantizer::{LinearQuantizer, Quantizer};
66
67pub use advanced_quant::{AdaptiveQuantizer, DeadZoneQuantizer, NonUniformQuantizer};
69
70#[cfg(feature = "vqvae")]
71pub use vqvae::{
72 ProductQuantizer, ProductQuantizerConfig, RVQVAETokenizer, ResidualVQ, VQConfig,
73 VQVAETokenizer, VectorQuantizer,
74};
75
76pub use batch::{BatchTokenizer, StreamingTokenizer};
78
79pub use entropy::{
81 compression_ratio, compute_frequencies, ArithmeticDecoder, ArithmeticEncoder,
82 BitrateController, HuffmanDecoder, HuffmanEncoder, RangeDecoder, RangeEncoder,
83};
84
85pub use persistence::{load_config, save_config, ModelCheckpoint, ModelMetadata, ModelVersion};
87
88pub use specialized::{
90 DCTConfig, DCTTokenizer, FourierConfig, FourierTokenizer, KMeansConfig, KMeansTokenizer,
91 WaveletConfig, WaveletFamily, WaveletTokenizer,
92};
93
94pub use advanced_features::{
96 add_batch_jitter, add_jitter, apply_batch_token_dropout, apply_temporal_coherence,
97 apply_token_dropout, HierarchicalConfig, HierarchicalTokenizer, JitterConfig,
98 TemporalCoherenceConfig, TemporalFilterType, TokenDropoutConfig,
99};
100
101pub use compat::{AudioMetadata, DType, ModelConfig, OnnxConfig, PyTorchCompat, TensorInfo};
103
104#[cfg(feature = "vqvae")]
106pub use neural_codec::{NeuralCodec, NeuralCodecConfig};
107
108pub use domain_specific::{
110 EnvironmentalTokenizer, EnvironmentalTokenizerConfig, MusicTokenizer, MusicTokenizerConfig,
111 SpeechTokenizer, SpeechTokenizerConfig,
112};
113
114pub use transformer::{
116 FeedForward, LayerNorm, MultiHeadAttention, PositionalEncoding, TransformerConfig,
117 TransformerEncoderLayer, TransformerTokenizer,
118};
119
120pub use pretraining::{
122 ContrastiveConfig, ContrastiveLearning, MSMConfig, MaskedSignalModeling, TemporalPrediction,
123 TemporalPredictionConfig,
124};
125
126pub use profiling::{
128 AllocationEvent, EventType, MemoryProfiler, MemorySnapshot, ProfileScope, ScopeStats,
129 TimelineAnalyzer,
130};
131
132pub use scirs2_core::ndarray::{Array1, Array2};
134
135pub trait SignalTokenizer {
137 fn encode(&self, signal: &Array1<f32>) -> TokenizerResult<Array1<f32>>;
139
140 fn decode(&self, tokens: &Array1<f32>) -> TokenizerResult<Array1<f32>>;
142
143 fn embed_dim(&self) -> usize;
145
146 fn vocab_size(&self) -> usize;
148}
149
150#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
152pub enum TokenizerType {
153 Continuous { embed_dim: usize },
155 MuLaw { bits: u8 },
157 Linear { bits: u8, min: f32, max: f32 },
159 VectorQuantized {
161 codebook_size: usize,
162 embed_dim: usize,
163 },
164 MultiScale {
166 embed_dim_per_level: usize,
167 num_levels: usize,
168 },
169 Pyramid {
171 embed_dim_per_level: usize,
172 num_levels: usize,
173 },
174}
175
176impl Default for TokenizerType {
177 fn default() -> Self {
178 TokenizerType::Continuous { embed_dim: 256 }
179 }
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185
186 #[test]
187 fn test_tokenizer_type_default() {
188 let t = TokenizerType::default();
189 match t {
190 TokenizerType::Continuous { embed_dim } => assert_eq!(embed_dim, 256),
191 _ => panic!("Expected Continuous tokenizer"),
192 }
193 }
194}