1use anyhow::{Context, Result};
6use std::collections::HashMap;
7use std::fs::{File, OpenOptions};
8use std::io::{Read, Seek, SeekFrom, Write};
9use std::path::{Path, PathBuf};
10use std::time::{SystemTime, UNIX_EPOCH};
11
12const M8_MAGIC: &[u8; 4] = b"MEM8";
14
15const M8_VERSION: u8 = 1;
17
18const BLOCK_SIZE: usize = 4096;
20
21#[repr(C, packed)]
23pub struct M8Header {
24 magic: [u8; 4],
25 version: u8,
26 flags: u8,
27 block_count: u32,
28 identity_freq: f64,
29 temporal_phase: f64,
30 crc32: u32,
31}
32
33impl M8Header {
34 fn to_bytes(&self) -> Vec<u8> {
35 let mut bytes = Vec::with_capacity(std::mem::size_of::<Self>());
36 bytes.extend_from_slice(&self.magic);
37 bytes.push(self.version);
38 bytes.push(self.flags);
39 bytes.extend_from_slice(&self.block_count.to_le_bytes());
40 bytes.extend_from_slice(&self.identity_freq.to_le_bytes());
41 bytes.extend_from_slice(&self.temporal_phase.to_le_bytes());
42 bytes.extend_from_slice(&self.crc32.to_le_bytes());
43 bytes
44 }
45
46 fn from_bytes(bytes: &[u8]) -> Result<Self> {
47 if bytes.len() < std::mem::size_of::<Self>() {
48 anyhow::bail!("Invalid header size");
49 }
50
51 let mut cursor = 0;
52 let mut magic = [0u8; 4];
53 magic.copy_from_slice(&bytes[cursor..cursor + 4]);
54 cursor += 4;
55
56 let version = bytes[cursor];
57 cursor += 1;
58
59 let flags = bytes[cursor];
60 cursor += 1;
61
62 let block_count = u32::from_le_bytes(bytes[cursor..cursor + 4].try_into()?);
63 cursor += 4;
64
65 let identity_freq = f64::from_le_bytes(bytes[cursor..cursor + 8].try_into()?);
66 cursor += 8;
67
68 let temporal_phase = f64::from_le_bytes(bytes[cursor..cursor + 8].try_into()?);
69 cursor += 8;
70
71 let crc32 = u32::from_le_bytes(bytes[cursor..cursor + 4].try_into()?);
72
73 Ok(Self {
74 magic,
75 version,
76 flags,
77 block_count,
78 identity_freq,
79 temporal_phase,
80 crc32,
81 })
82 }
83}
84
85#[repr(C)]
87#[derive(Debug, Clone)]
88pub struct M8Block {
89 pub index: u64,
91
92 pub wave_signature: [u8; 16],
94
95 pub timestamp: u64,
97
98 pub importance: u16,
100
101 pub token_id: u16,
103
104 pub prev_hash: [u8; 32],
106
107 pub content_len: u32,
109
110 pub content: Vec<u8>,
112}
113
114pub struct TokenMap {
116 str_to_token: HashMap<String, u16>,
118
119 token_to_str: HashMap<u16, String>,
121
122 next_id: u16,
124
125 reserved: HashMap<u16, String>,
127}
128
129impl Default for TokenMap {
130 fn default() -> Self {
131 Self::new()
132 }
133}
134
135impl TokenMap {
136 pub fn new() -> Self {
137 let mut reserved = HashMap::new();
138 reserved.insert(0x80, "node_modules".to_string());
139 reserved.insert(0x81, ".git".to_string());
140 reserved.insert(0x82, "target".to_string());
141 reserved.insert(0x83, "dist".to_string());
142 reserved.insert(0x84, "build".to_string());
143 reserved.insert(0x90, ".rs".to_string());
144 reserved.insert(0x91, ".py".to_string());
145 reserved.insert(0x92, ".js".to_string());
146 reserved.insert(0x93, ".ts".to_string());
147 reserved.insert(0xFFFE, "Claude".to_string());
148 reserved.insert(0xFFFF, "Hue".to_string());
149
150 let mut str_to_token = HashMap::new();
151 let mut token_to_str = HashMap::new();
152
153 for (&id, value) in &reserved {
154 str_to_token.insert(value.clone(), id);
155 token_to_str.insert(id, value.clone());
156 }
157
158 Self {
159 str_to_token,
160 token_to_str,
161 next_id: 0x100, reserved,
163 }
164 }
165
166 pub fn get_token(&mut self, s: &str) -> u16 {
168 if let Some(&token) = self.str_to_token.get(s) {
169 return token;
170 }
171
172 let token = self.next_id;
173 self.next_id += 1;
174
175 self.str_to_token.insert(s.to_string(), token);
176 self.token_to_str.insert(token, s.to_string());
177
178 token
179 }
180
181 pub fn decode_token(&self, token: u16) -> Option<&str> {
183 self.token_to_str.get(&token).map(|s| s.as_str())
184 }
185}
186
187pub struct M8BinaryFile {
189 path: PathBuf,
190 file: File,
191 header: M8Header,
192 tokens: TokenMap,
193
194 read_position: u64,
196
197 importance_index: Vec<(u64, f32)>, }
200
201impl M8BinaryFile {
202 pub fn create(path: impl AsRef<Path>) -> Result<Self> {
204 let path = path.as_ref().to_path_buf();
205
206 let mut file = OpenOptions::new()
207 .create(true)
208 .write(true)
209 .read(true)
210 .truncate(true)
211 .open(&path)
212 .context("Failed to create .m8 file")?;
213
214 let header = M8Header {
215 magic: *M8_MAGIC,
216 version: M8_VERSION,
217 flags: 0,
218 block_count: 0,
219 identity_freq: 440.0, temporal_phase: 0.0,
221 crc32: 0,
222 };
223
224 file.write_all(&header.to_bytes())?;
226
227 Ok(Self {
228 path,
229 file,
230 header,
231 tokens: TokenMap::new(),
232 read_position: std::mem::size_of::<M8Header>() as u64,
233 importance_index: Vec::new(),
234 })
235 }
236
237 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
239 let path = path.as_ref().to_path_buf();
240
241 let mut file = OpenOptions::new()
242 .read(true)
243 .write(true)
244 .open(&path)
245 .context("Failed to open .m8 file")?;
246
247 let mut header_bytes = vec![0u8; std::mem::size_of::<M8Header>()];
249 file.read_exact(&mut header_bytes)?;
250
251 let header = M8Header::from_bytes(&header_bytes)?;
252
253 if header.magic != *M8_MAGIC {
255 anyhow::bail!("Invalid .m8 file (bad magic)");
256 }
257
258 let file_size = file.seek(SeekFrom::End(0))?;
260
261 let mut m8 = Self {
262 path,
263 file,
264 header,
265 tokens: TokenMap::new(),
266 read_position: file_size,
267 importance_index: Vec::new(),
268 };
269
270 m8.build_importance_index()?;
272
273 Ok(m8)
274 }
275
276 pub fn append_block(&mut self, content: &[u8], importance: f32) -> Result<()> {
278 let timestamp = SystemTime::now().duration_since(UNIX_EPOCH)?.as_micros() as u64;
279
280 let wave_sig = self.generate_wave_signature(content);
281
282 let block = M8Block {
283 index: self.header.block_count as u64,
284 wave_signature: wave_sig,
285 timestamp,
286 importance: (importance * 65535.0) as u16,
287 token_id: 0, prev_hash: [0; 32], content_len: content.len() as u32,
290 content: content.to_vec(),
291 };
292
293 self.file.seek(SeekFrom::End(0))?;
295
296 self.write_block(&block)?;
298
299 self.header.block_count += 1;
301 self.update_header()?;
302
303 let offset = self.file.stream_position()?;
305 self.importance_index.push((offset, importance));
306
307 Ok(())
308 }
309
310 pub fn read_backwards(&mut self) -> Result<Option<M8Block>> {
312 if self.read_position <= std::mem::size_of::<M8Header>() as u64 {
313 return Ok(None);
314 }
315
316 self.read_position -= BLOCK_SIZE as u64;
318 self.file.seek(SeekFrom::Start(self.read_position))?;
319
320 self.read_block()
322 }
323
324 pub fn read_by_importance(&mut self, keywords: &[String]) -> Result<Vec<M8Block>> {
326 let mut blocks = Vec::new();
327
328 self.importance_index
330 .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
331
332 let offsets: Vec<u64> = self.importance_index.iter().map(|(off, _)| *off).collect();
334
335 for offset in offsets {
336 self.file.seek(SeekFrom::Start(offset))?;
337
338 if let Some(block) = self.read_block()? {
339 let content_str = String::from_utf8_lossy(&block.content);
341 let has_keyword = keywords.iter().any(|kw| content_str.contains(kw));
342
343 if has_keyword || blocks.len() < 10 {
344 blocks.push(block);
345 }
346
347 if blocks.len() >= 20 {
348 break;
349 }
350 }
351 }
352
353 Ok(blocks)
354 }
355
356 fn generate_wave_signature(&self, content: &[u8]) -> [u8; 16] {
358 use sha2::{Digest, Sha256};
359
360 let mut hasher = Sha256::new();
361 hasher.update(content);
362 hasher.update(self.header.identity_freq.to_le_bytes());
363
364 let hash = hasher.finalize();
365 let mut signature = [0u8; 16];
366 signature.copy_from_slice(&hash[..16]);
367
368 signature
369 }
370
371 fn write_block(&mut self, block: &M8Block) -> Result<()> {
373 let mut buffer = vec![0u8; BLOCK_SIZE];
375 let mut cursor = 0;
376
377 buffer[cursor..cursor + 8].copy_from_slice(&block.index.to_le_bytes());
379 cursor += 8;
380
381 buffer[cursor..cursor + 16].copy_from_slice(&block.wave_signature);
382 cursor += 16;
383
384 buffer[cursor..cursor + 8].copy_from_slice(&block.timestamp.to_le_bytes());
385 cursor += 8;
386
387 buffer[cursor..cursor + 2].copy_from_slice(&block.importance.to_le_bytes());
388 cursor += 2;
389
390 buffer[cursor..cursor + 2].copy_from_slice(&block.token_id.to_le_bytes());
391 cursor += 2;
392
393 buffer[cursor..cursor + 32].copy_from_slice(&block.prev_hash);
394 cursor += 32;
395
396 buffer[cursor..cursor + 4].copy_from_slice(&block.content_len.to_le_bytes());
397 cursor += 4;
398
399 let content_space = BLOCK_SIZE - cursor;
401 let content_to_copy = block.content.len().min(content_space);
402 buffer[cursor..cursor + content_to_copy].copy_from_slice(&block.content[..content_to_copy]);
403
404 self.file.write_all(&buffer)?;
405
406 Ok(())
407 }
408
409 fn read_block(&mut self) -> Result<Option<M8Block>> {
411 let mut buffer = vec![0u8; BLOCK_SIZE];
412
413 match self.file.read_exact(&mut buffer) {
414 Ok(_) => {}
415 Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => return Ok(None),
416 Err(e) => return Err(e.into()),
417 }
418
419 let mut cursor = 0;
420
421 let index = u64::from_le_bytes(buffer[cursor..cursor + 8].try_into()?);
422 cursor += 8;
423
424 let mut wave_signature = [0u8; 16];
425 wave_signature.copy_from_slice(&buffer[cursor..cursor + 16]);
426 cursor += 16;
427
428 let timestamp = u64::from_le_bytes(buffer[cursor..cursor + 8].try_into()?);
429 cursor += 8;
430
431 let importance = u16::from_le_bytes(buffer[cursor..cursor + 2].try_into()?);
432 cursor += 2;
433
434 let token_id = u16::from_le_bytes(buffer[cursor..cursor + 2].try_into()?);
435 cursor += 2;
436
437 let mut prev_hash = [0u8; 32];
438 prev_hash.copy_from_slice(&buffer[cursor..cursor + 32]);
439 cursor += 32;
440
441 let content_len = u32::from_le_bytes(buffer[cursor..cursor + 4].try_into()?);
442 cursor += 4;
443
444 let content = buffer[cursor..cursor + content_len as usize].to_vec();
445
446 Ok(Some(M8Block {
447 index,
448 wave_signature,
449 timestamp,
450 importance,
451 token_id,
452 prev_hash,
453 content_len,
454 content,
455 }))
456 }
457
458 fn update_header(&mut self) -> Result<()> {
460 self.file.seek(SeekFrom::Start(0))?;
461 self.file.write_all(&self.header.to_bytes())?;
462 self.file.flush()?;
463 Ok(())
464 }
465
466 fn build_importance_index(&mut self) -> Result<()> {
468 self.file
469 .seek(SeekFrom::Start(std::mem::size_of::<M8Header>() as u64))?;
470
471 loop {
472 let offset = self.file.stream_position()?;
473
474 match self.read_block()? {
475 Some(block) => {
476 let importance = block.importance as f32 / 65535.0;
477 self.importance_index.push((offset, importance));
478 }
479 None => break,
480 }
481 }
482
483 Ok(())
484 }
485}
486
487pub fn convert_json_to_binary(json_path: &Path, binary_path: &Path) -> Result<()> {
489 use std::fs;
490
491 let data = fs::read(json_path)?;
493
494 let json_str = if data.starts_with(b"\x78\x9c") || data.starts_with(b"\x78\xda") {
495 use flate2::read::ZlibDecoder;
497 let mut decoder = ZlibDecoder::new(&data[..]);
498 let mut decompressed = String::new();
499 decoder.read_to_string(&mut decompressed)?;
500 decompressed
501 } else {
502 String::from_utf8(data)?
503 };
504
505 let contexts: serde_json::Value = serde_json::from_str(&json_str)?;
507
508 let mut m8_file = M8BinaryFile::create(binary_path)?;
510
511 if let Some(contexts_array) = contexts.get("contexts").and_then(|c| c.as_array()) {
513 for context in contexts_array {
514 let content = serde_json::to_vec(context)?;
515 let importance = context.get("score").and_then(|s| s.as_f64()).unwrap_or(0.5) as f32;
516
517 m8_file.append_block(&content, importance)?;
518 }
519 }
520
521 Ok(())
522}
523
524#[cfg(test)]
525mod tests {
526 use super::*;
527 use tempfile::tempdir;
528
529 #[test]
530 fn test_m8_binary_format() {
531 let dir = tempdir().unwrap();
532 let path = dir.path().join("test.m8");
533
534 let mut m8 = M8BinaryFile::create(&path).unwrap();
536
537 m8.append_block(b"First memory", 0.8).unwrap();
539 m8.append_block(b"Second memory", 0.5).unwrap();
540 m8.append_block(b"Important memory", 1.0).unwrap();
541
542 let mut m8 = M8BinaryFile::open(&path).unwrap();
544
545 let block = m8.read_backwards().unwrap().unwrap();
547 assert_eq!(&block.content, b"Important memory");
548
549 let important_blocks = m8.read_by_importance(&["Important".to_string()]).unwrap();
551 assert!(!important_blocks.is_empty());
552 }
553
554 #[test]
555 fn test_tokenization() {
556 let mut tokens = TokenMap::new();
557
558 assert_eq!(tokens.get_token("node_modules"), 0x80);
560 assert_eq!(tokens.get_token(".rs"), 0x90);
561 assert_eq!(tokens.get_token("Claude"), 0xFFFE);
562
563 let token1 = tokens.get_token("custom_string");
565 let token2 = tokens.get_token("another_string");
566 assert!(token1 >= 0x100);
567 assert!(token2 > token1);
568
569 assert_eq!(tokens.decode_token(0x80), Some("node_modules"));
571 assert_eq!(tokens.decode_token(token1), Some("custom_string"));
572 }
573}