1use crate::mem8::wave::MemoryWave;
5use anyhow::{anyhow, Result};
6use std::collections::HashMap;
7use std::io::Write;
8
9const M8_MAGIC: &[u8] = b"M8\x02\x09";
11
12#[derive(Debug, Clone, Copy, PartialEq)]
14#[repr(u8)]
15pub enum SectionType {
16 MarkqantText = 0x09,
17 QuantumDirectory = 0x0A,
18 WaveMemory = 0x0B,
19 Metadata = 0x0C,
20 Index = 0x0D,
21}
22
23#[derive(Debug)]
25pub struct M8Header {
26 pub version: u16,
28 pub section_count: u16,
30 pub file_size: u64,
32 pub timestamp: u64,
34}
35
36#[derive(Debug)]
38pub struct M8Section {
39 pub section_type: SectionType,
41 pub size: u32,
43 pub data: Vec<u8>,
45}
46
47#[derive(Debug, Clone)]
49pub struct CompressedWave {
50 pub id: u64, pub amplitude: u8, pub frequency: u16, pub phase: u8, pub valence: i8, pub arousal: u8, pub decay_tau: u16, pub timestamp: u64, pub interference: u64, }
60
61impl CompressedWave {
62 pub fn from_wave(wave: &MemoryWave, id: u64) -> Self {
64 Self {
65 id,
66 amplitude: quantize_amplitude(wave.amplitude),
67 frequency: wave.frequency as u16,
68 phase: ((wave.phase / std::f32::consts::PI + 1.0) * 127.5) as u8,
69 valence: (wave.valence * 127.0) as i8,
70 arousal: (wave.arousal * 255.0) as u8,
71 decay_tau: wave
72 .decay_tau
73 .map(|d| d.as_secs() as u16)
74 .unwrap_or(u16::MAX),
75 timestamp: wave.created_at.elapsed().as_secs(),
76 interference: 0, }
78 }
79
80 pub fn to_wave(&self) -> MemoryWave {
82 let mut wave = MemoryWave::new(self.frequency as f32, dequantize_amplitude(self.amplitude));
83
84 wave.phase = (self.phase as f32 / 127.5 - 1.0) * std::f32::consts::PI;
85 wave.valence = self.valence as f32 / 127.0;
86 wave.arousal = self.arousal as f32 / 255.0;
87 wave.decay_tau = if self.decay_tau == u16::MAX {
88 None
89 } else {
90 Some(std::time::Duration::from_secs(self.decay_tau as u64))
91 };
92
93 wave
94 }
95
96 pub fn to_bytes(&self) -> [u8; 32] {
98 let mut bytes = [0u8; 32];
99 bytes[0..8].copy_from_slice(&self.id.to_le_bytes());
100 bytes[8] = self.amplitude;
101 bytes[9..11].copy_from_slice(&self.frequency.to_le_bytes());
102 bytes[11] = self.phase;
103 bytes[12] = self.valence as u8;
104 bytes[13] = self.arousal;
105 bytes[14..16].copy_from_slice(&self.decay_tau.to_le_bytes());
106 bytes[16..24].copy_from_slice(&self.timestamp.to_le_bytes());
107 bytes[24..32].copy_from_slice(&self.interference.to_le_bytes());
108 bytes
109 }
110
111 pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
113 if bytes.len() != 32 {
114 return Err(anyhow!("CompressedWave must be exactly 32 bytes"));
115 }
116
117 Ok(Self {
118 id: u64::from_le_bytes(bytes[0..8].try_into()?),
119 amplitude: bytes[8],
120 frequency: u16::from_le_bytes(bytes[9..11].try_into()?),
121 phase: bytes[11],
122 valence: bytes[12] as i8,
123 arousal: bytes[13],
124 decay_tau: u16::from_le_bytes(bytes[14..16].try_into()?),
125 timestamp: u64::from_le_bytes(bytes[16..24].try_into()?),
126 interference: u64::from_le_bytes(bytes[24..32].try_into()?),
127 })
128 }
129}
130
131fn quantize_amplitude(amplitude: f32) -> u8 {
133 if amplitude <= 0.0 {
134 0
135 } else {
136 (32.0 * amplitude.log2()).clamp(0.0, 255.0) as u8
137 }
138}
139
140fn dequantize_amplitude(quantized: u8) -> f32 {
142 if quantized == 0 {
143 0.0
144 } else {
145 2.0_f32.powf(quantized as f32 / 32.0)
146 }
147}
148
149pub struct MarkqantEncoder {
151 tokens: HashMap<String, u8>,
153 patterns: HashMap<u8, String>,
155 frequencies: HashMap<String, usize>,
157 next_token: u8,
159}
160
161impl Default for MarkqantEncoder {
162 fn default() -> Self {
163 Self::new()
164 }
165}
166
167impl MarkqantEncoder {
168 pub fn new() -> Self {
169 Self {
170 tokens: HashMap::new(),
171 patterns: HashMap::new(),
172 frequencies: HashMap::new(),
173 next_token: 0x80, }
175 }
176
177 pub fn analyze(&mut self, text: &str) {
179 let words: Vec<&str> = text.split_whitespace().collect();
181
182 for window_size in 1..=5 {
184 for i in 0..words.len().saturating_sub(window_size - 1) {
185 let pattern = words[i..i + window_size].join(" ");
186 *self.frequencies.entry(pattern).or_insert(0) += 1;
187 }
188 }
189
190 let mut scored_patterns: Vec<_> = self
192 .frequencies
193 .iter()
194 .filter(|(_, &freq)| freq >= 2)
195 .map(|(pattern, &freq)| {
196 let score = (pattern.len() - 1) * (freq - 1);
197 (pattern.clone(), score)
198 })
199 .collect();
200
201 scored_patterns.sort_by_key(|(_, score)| std::cmp::Reverse(*score));
202
203 for (pattern, _) in scored_patterns.iter().take(128) {
205 if self.next_token < 255 {
206 self.tokens.insert(pattern.clone(), self.next_token);
207 self.patterns.insert(self.next_token, pattern.clone());
208 self.next_token = self.next_token.saturating_add(1);
209 }
210 }
211 }
212
213 pub fn encode(&self, text: &str) -> Vec<u8> {
215 let mut result = Vec::new();
216 let mut remaining = text;
217
218 while !remaining.is_empty() {
219 let mut found = false;
220
221 for len in (1..=remaining.len()).rev() {
223 if let Some(&token) = self.tokens.get(&remaining[..len]) {
224 result.push(token);
225 remaining = &remaining[len..];
226 found = true;
227 break;
228 }
229 }
230
231 if !found {
232 result.extend_from_slice(remaining.chars().next().unwrap().to_string().as_bytes());
234 remaining = &remaining[remaining.chars().next().unwrap().len_utf8()..];
235 }
236 }
237
238 result
239 }
240
241 pub fn decode(&self, data: &[u8]) -> Result<String> {
243 let mut result = String::new();
244 let mut i = 0;
245
246 while i < data.len() {
247 if data[i] >= 0x80 {
248 if let Some(pattern) = self.patterns.get(&data[i]) {
250 result.push_str(pattern);
251 } else {
252 return Err(anyhow!("Unknown token: 0x{:02x}", data[i]));
253 }
254 i += 1;
255 } else {
256 let ch = data[i] as char;
258 result.push(ch);
259 i += 1;
260 }
261 }
262
263 Ok(result)
264 }
265}
266
267pub struct M8Writer<W: Write> {
269 writer: W,
270 sections: Vec<M8Section>,
271}
272
273impl<W: Write> M8Writer<W> {
274 pub fn new(writer: W) -> Self {
275 Self {
276 writer,
277 sections: Vec::new(),
278 }
279 }
280
281 pub fn add_wave_memory(&mut self, waves: &[CompressedWave]) -> Result<()> {
283 let mut data = Vec::with_capacity(waves.len() * 32);
284
285 for wave in waves {
286 data.extend_from_slice(&wave.to_bytes());
287 }
288
289 self.sections.push(M8Section {
290 section_type: SectionType::WaveMemory,
291 size: data.len() as u32,
292 data,
293 });
294
295 Ok(())
296 }
297
298 pub fn add_markqant_text(&mut self, text: &str) -> Result<()> {
300 let mut encoder = MarkqantEncoder::new();
301 encoder.analyze(text);
302 let encoded = encoder.encode(text);
303
304 let mut data = Vec::new();
306
307 data.extend_from_slice(&(encoder.patterns.len() as u16).to_le_bytes());
309
310 for (token, pattern) in &encoder.patterns {
312 data.push(*token);
313 data.extend_from_slice(&(pattern.len() as u16).to_le_bytes());
314 data.extend_from_slice(pattern.as_bytes());
315 }
316
317 data.extend_from_slice(&(encoded.len() as u32).to_le_bytes());
319 data.extend_from_slice(&encoded);
320
321 self.sections.push(M8Section {
322 section_type: SectionType::MarkqantText,
323 size: data.len() as u32,
324 data,
325 });
326
327 Ok(())
328 }
329
330 pub fn finish(mut self) -> Result<()> {
332 self.writer.write_all(M8_MAGIC)?;
334
335 let header_size = 16; let section_headers_size = self.sections.len() * 8; let data_size: usize = self.sections.iter().map(|s| s.data.len()).sum();
339 let total_size = header_size + section_headers_size + data_size + 4; let header = M8Header {
343 version: 1,
344 section_count: self.sections.len() as u16,
345 file_size: total_size as u64,
346 timestamp: std::time::SystemTime::now()
347 .duration_since(std::time::UNIX_EPOCH)?
348 .as_secs(),
349 };
350
351 self.writer.write_all(&header.version.to_le_bytes())?;
352 self.writer.write_all(&header.section_count.to_le_bytes())?;
353 self.writer.write_all(&header.file_size.to_le_bytes())?;
354 self.writer.write_all(&header.timestamp.to_le_bytes())?;
355
356 for section in &self.sections {
358 self.writer.write_all(&[section.section_type as u8])?;
359 self.writer.write_all(§ion.size.to_le_bytes())?;
360 self.writer.write_all(§ion.data)?;
361 }
362
363 let crc = 0u32; self.writer.write_all(&crc.to_le_bytes())?;
366
367 Ok(())
368 }
369}
370
371pub fn create_example_m8() -> Result<Vec<u8>> {
373 let mut buffer = Vec::new();
374 let mut writer = M8Writer::new(&mut buffer);
375
376 let waves = vec![
378 CompressedWave::from_wave(&MemoryWave::new(440.0, 0.8), 1),
379 CompressedWave::from_wave(&MemoryWave::new(880.0, 0.6), 2),
380 ];
381 writer.add_wave_memory(&waves)?;
382
383 writer.add_markqant_text("The user is cooking in the kitchen at 6PM")?;
385
386 writer.finish()?;
387 Ok(buffer)
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393
394 #[test]
395 fn test_wave_compression() {
396 let mut wave = MemoryWave::new(440.0, 0.8);
397 wave.valence = 0.7;
398 wave.arousal = 0.4;
399
400 let compressed = CompressedWave::from_wave(&wave, 12345);
401 assert_eq!(compressed.to_bytes().len(), 32);
402
403 let decompressed = compressed.to_wave();
404 assert!((decompressed.frequency - 440.0).abs() < 1.0);
405 assert!((decompressed.valence - 0.7).abs() < 0.01);
406 }
407
408 #[test]
409 fn test_markqant_encoding() {
410 let mut encoder = MarkqantEncoder::new();
411 let text = "the cat in the hat sat on the mat";
412 encoder.analyze(text);
413
414 let encoded = encoder.encode(text);
415 let decoded = encoder.decode(&encoded).unwrap();
416
417 assert_eq!(decoded, text);
418 assert!(encoded.len() < text.len()); }
420
421 #[test]
422 fn test_m8_creation() {
423 let m8_data = create_example_m8().unwrap();
424 assert!(m8_data.starts_with(M8_MAGIC));
425 assert!(m8_data.len() > 100); }
427}