Skip to main content

agentic_memory/format/
reader.rs

1//! Reads .amem files into in-memory graph.
2
3use std::io::Read;
4use std::path::Path;
5
6use crate::graph::MemoryGraph;
7use crate::index::{DocLengths, TermIndex};
8use crate::types::error::{AmemError, AmemResult};
9use crate::types::header::FileHeader;
10use crate::types::{CognitiveEvent, Edge, EdgeType, EventType};
11
12use super::compression::decompress_content;
13
14/// Reader for .amem binary files.
15pub struct AmemReader;
16
17impl AmemReader {
18    /// Read an .amem file into a MemoryGraph.
19    pub fn read_from_file(path: &Path) -> AmemResult<MemoryGraph> {
20        let data = std::fs::read(path)?;
21        let mut cursor = std::io::Cursor::new(data);
22        Self::read_from(&mut cursor)
23    }
24
25    /// Read from any reader into a MemoryGraph.
26    pub fn read_from(reader: &mut impl Read) -> AmemResult<MemoryGraph> {
27        // Read all data into a buffer
28        let mut data = Vec::new();
29        reader.read_to_end(&mut data)?;
30
31        if data.len() < 64 {
32            return Err(AmemError::Truncated);
33        }
34
35        // Parse header
36        let header = FileHeader::read_from(&mut std::io::Cursor::new(&data[..64]))?;
37
38        let dimension = header.dimension as usize;
39        let node_count = header.node_count as usize;
40        let edge_count = header.edge_count as usize;
41
42        // Read node table
43        let node_table_start = header.node_table_offset as usize;
44        let mut nodes: Vec<CognitiveEvent> = Vec::with_capacity(node_count);
45        let mut node_content_info: Vec<(u64, u32)> = Vec::with_capacity(node_count);
46
47        for i in 0..node_count {
48            let offset = node_table_start + i * 72;
49            if offset + 72 > data.len() {
50                return Err(AmemError::Truncated);
51            }
52            let record = &data[offset..offset + 72];
53            let (event, content_offset, content_length) = parse_node_record(record)?;
54            node_content_info.push((content_offset, content_length));
55            nodes.push(event);
56        }
57
58        // Read edge table
59        let edge_table_start = header.edge_table_offset as usize;
60        let mut edges: Vec<Edge> = Vec::with_capacity(edge_count);
61
62        for i in 0..edge_count {
63            let offset = edge_table_start + i * 32;
64            if offset + 32 > data.len() {
65                return Err(AmemError::Truncated);
66            }
67            let record = &data[offset..offset + 32];
68            edges.push(parse_edge_record(record)?);
69        }
70
71        // Read content block
72        let content_block_start = header.content_block_offset as usize;
73        for (i, node) in nodes.iter_mut().enumerate() {
74            let (content_offset, content_length) = node_content_info[i];
75            if content_length > 0 {
76                let start = content_block_start + content_offset as usize;
77                let end = start + content_length as usize;
78                if end > data.len() {
79                    return Err(AmemError::Truncated);
80                }
81                node.content = decompress_content(&data[start..end])?;
82            }
83        }
84
85        // Read feature vectors
86        let fv_start = header.feature_vec_offset as usize;
87        for (i, node) in nodes.iter_mut().enumerate() {
88            let offset = fv_start + i * dimension * 4;
89            if offset + dimension * 4 > data.len() {
90                return Err(AmemError::Truncated);
91            }
92            let mut vec = Vec::with_capacity(dimension);
93            for j in 0..dimension {
94                let byte_offset = offset + j * 4;
95                let bytes: [u8; 4] = data[byte_offset..byte_offset + 4].try_into().unwrap();
96                vec.push(f32::from_le_bytes(bytes));
97            }
98            node.feature_vec = vec;
99        }
100
101        // Build graph from parts (this rebuilds type/temporal/session indexes)
102        let mut graph = MemoryGraph::from_parts(nodes, edges, dimension)?;
103
104        // Parse index block for new BM25 indexes (tags 0x05, 0x06)
105        // The index block starts after the feature vector block
106        let index_block_start = fv_start + node_count * dimension * 4;
107        let mut pos = index_block_start;
108
109        while pos + 9 <= data.len() {
110            // Each index: [tag: u8][length: u64][data: length bytes]
111            let tag = data[pos];
112            pos += 1;
113            let length = u64::from_le_bytes(data[pos..pos + 8].try_into().unwrap()) as usize;
114            pos += 8;
115
116            if pos + length > data.len() {
117                break; // Truncated index block, skip gracefully
118            }
119
120            match tag {
121                0x01..=0x04 => {
122                    // Existing indexes — already rebuilt from nodes by from_parts, skip
123                    pos += length;
124                }
125                0x05 => {
126                    // Term Index
127                    if let Some(ti) = TermIndex::from_bytes(&data[pos..pos + length]) {
128                        graph.set_term_index(ti);
129                    }
130                    pos += length;
131                }
132                0x06 => {
133                    // Doc Lengths
134                    if let Some(dl) = DocLengths::from_bytes(&data[pos..pos + length]) {
135                        graph.set_doc_lengths(dl);
136                    }
137                    pos += length;
138                }
139                _ => {
140                    // Unknown tag — skip gracefully (forward compatibility)
141                    pos += length;
142                }
143            }
144        }
145
146        Ok(graph)
147    }
148}
149
150/// Parse a 72-byte node record.
151fn parse_node_record(data: &[u8]) -> AmemResult<(CognitiveEvent, u64, u32)> {
152    let id = u64::from_le_bytes(data[0..8].try_into().unwrap());
153    let event_type_byte = data[8];
154    let event_type = EventType::from_u8(event_type_byte).ok_or(AmemError::Corrupt(0))?;
155    // bytes 9..12: padding
156    let created_at = u64::from_le_bytes(data[12..20].try_into().unwrap());
157    let session_id = u32::from_le_bytes(data[20..24].try_into().unwrap());
158    let confidence = f32::from_le_bytes(data[24..28].try_into().unwrap());
159    let access_count = u32::from_le_bytes(data[28..32].try_into().unwrap());
160    let last_accessed = u64::from_le_bytes(data[32..40].try_into().unwrap());
161    let decay_score = f32::from_le_bytes(data[40..44].try_into().unwrap());
162    let content_offset = u64::from_le_bytes(data[44..52].try_into().unwrap());
163    let content_length = u32::from_le_bytes(data[52..56].try_into().unwrap());
164    // edge_offset at 56..64 (not needed for in-memory construction)
165    // edge_count at 64..66 (not needed)
166    // padding at 66..72 (not needed)
167
168    let event = CognitiveEvent {
169        id,
170        event_type,
171        created_at,
172        session_id,
173        confidence,
174        access_count,
175        last_accessed,
176        decay_score,
177        content: String::new(),  // Will be filled from content block
178        feature_vec: Vec::new(), // Will be filled from feature vec block
179    };
180
181    Ok((event, content_offset, content_length))
182}
183
184/// Parse a 32-byte edge record.
185fn parse_edge_record(data: &[u8]) -> AmemResult<Edge> {
186    let source_id = u64::from_le_bytes(data[0..8].try_into().unwrap());
187    let target_id = u64::from_le_bytes(data[8..16].try_into().unwrap());
188    let edge_type_byte = data[16];
189    let edge_type = EdgeType::from_u8(edge_type_byte).ok_or(AmemError::Corrupt(0))?;
190    // bytes 17..20: padding
191    let weight = f32::from_le_bytes(data[20..24].try_into().unwrap());
192    let created_at = u64::from_le_bytes(data[24..32].try_into().unwrap());
193
194    Ok(Edge {
195        source_id,
196        target_id,
197        edge_type,
198        weight,
199        created_at,
200    })
201}