agentic_codebase/format/
reader.rs1use std::io::Read;
7use std::path::{Path, PathBuf};
8
9use crate::graph::CodeGraph;
10use crate::types::header::{FileHeader, HEADER_SIZE};
11use crate::types::{
12 AcbError, AcbResult, CodeUnit, CodeUnitType, Edge, EdgeType, Language, Span, Visibility,
13 ACB_MAGIC, FORMAT_VERSION,
14};
15
16use super::compression::StringPool;
17use super::writer::{EDGE_RECORD_SIZE, UNIT_RECORD_SIZE};
18use super::AcbWriter;
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21enum StorageMigrationPolicy {
22 AutoSafe,
23 Strict,
24 Off,
25}
26
27impl StorageMigrationPolicy {
28 fn from_env(name: &str) -> Self {
29 let raw = std::env::var(name).unwrap_or_else(|_| "auto-safe".to_string());
30 match raw.trim().to_ascii_lowercase().as_str() {
31 "strict" => Self::Strict,
32 "off" | "disabled" | "none" => Self::Off,
33 _ => Self::AutoSafe,
34 }
35 }
36}
37
38pub struct AcbReader;
40
41impl AcbReader {
42 pub fn read_from_file(path: &Path) -> AcbResult<CodeGraph> {
48 if !path.exists() {
49 return Err(AcbError::PathNotFound(path.to_path_buf()));
50 }
51 let data = std::fs::read(path)?;
52 if data.len() < HEADER_SIZE {
53 return Err(AcbError::Truncated);
54 }
55 let legacy_version = detect_legacy_version(&data);
56 let migration_policy = StorageMigrationPolicy::from_env("ACB_STORAGE_MIGRATION_POLICY");
57 if let Some(from_version) = legacy_version {
58 if migration_policy == StorageMigrationPolicy::Strict {
59 return Err(AcbError::UnsupportedVersion(from_version));
60 }
61 }
62
63 let graph = Self::read_from_data(&data)?;
64 if let Some(from_version) = legacy_version {
65 match migration_policy {
66 StorageMigrationPolicy::AutoSafe => {
67 if let Err(err) = migrate_file_in_place(path, &graph, from_version) {
68 tracing::warn!(
69 "Failed to auto-migrate {} from v{}: {}",
70 path.display(),
71 from_version,
72 err
73 );
74 }
75 }
76 StorageMigrationPolicy::Off => {
77 tracing::warn!(
78 "Legacy .acb version {} loaded for {} with migration disabled",
79 from_version,
80 path.display()
81 );
82 }
83 StorageMigrationPolicy::Strict => {}
84 }
85 }
86 Ok(graph)
87 }
88
89 pub fn read_from_data(data: &[u8]) -> AcbResult<CodeGraph> {
91 if data.len() < HEADER_SIZE {
92 return Err(AcbError::Truncated);
93 }
94
95 let header_bytes: [u8; HEADER_SIZE] = data[..HEADER_SIZE]
97 .try_into()
98 .map_err(|_| AcbError::Truncated)?;
99 let header = FileHeader::from_bytes(&header_bytes)?;
100
101 let file_len = data.len() as u64;
103 validate_offset(header.unit_table_offset, file_len)?;
104 if header.unit_count > 0 {
105 let unit_table_end =
106 header.unit_table_offset + header.unit_count * UNIT_RECORD_SIZE as u64;
107 if unit_table_end > file_len {
108 return Err(AcbError::Truncated);
109 }
110 }
111 if header.edge_count > 0 {
112 validate_offset(header.edge_table_offset, file_len)?;
113 let edge_table_end =
114 header.edge_table_offset + header.edge_count * EDGE_RECORD_SIZE as u64;
115 if edge_table_end > file_len {
116 return Err(AcbError::Truncated);
117 }
118 }
119
120 let pool = if header.string_pool_offset > 0 && header.string_pool_offset < file_len {
122 let pool_start = header.string_pool_offset as usize;
123 if pool_start + 8 > data.len() {
124 return Err(AcbError::Truncated);
125 }
126 let _uncompressed_size =
127 u64::from_le_bytes(data[pool_start..pool_start + 8].try_into().unwrap());
128 let compressed_data = &data[pool_start + 8..];
129 let compressed_end = if header.feature_vec_offset > 0 {
131 (header.feature_vec_offset as usize).saturating_sub(pool_start + 8)
132 } else {
133 compressed_data.len()
134 };
135 let compressed_slice = &compressed_data[..compressed_end.min(compressed_data.len())];
136 StringPool::from_compressed(compressed_slice)?
137 } else {
138 StringPool::from_data(Vec::new())
139 };
140
141 let mut graph = CodeGraph::new(header.dimension as usize);
143 let mut unit_edge_info: Vec<(u64, u32)> = Vec::with_capacity(header.unit_count as usize);
144
145 for i in 0..header.unit_count {
146 let offset = header.unit_table_offset as usize + (i as usize) * UNIT_RECORD_SIZE;
147 let record = &data[offset..offset + UNIT_RECORD_SIZE];
148 let (unit, edge_offset, edge_count) = read_unit_record(record, &pool)?;
149 unit_edge_info.push((edge_offset, edge_count));
150 graph.add_unit(unit);
151 }
152
153 for i in 0..header.edge_count {
155 let offset = header.edge_table_offset as usize + (i as usize) * EDGE_RECORD_SIZE;
156 let record = &data[offset..offset + EDGE_RECORD_SIZE];
157 let edge = read_edge_record(record)?;
158 if let Err(e) = graph.add_edge(edge) {
160 tracing::warn!("Skipping invalid edge during read: {}", e);
161 }
162 }
163
164 if header.feature_vec_offset > 0 && header.feature_vec_offset < file_len {
166 let dim = header.dimension as usize;
167 for i in 0..header.unit_count {
168 let vec_offset = header.feature_vec_offset as usize + (i as usize) * dim * 4;
169 if vec_offset + dim * 4 <= data.len() {
170 let mut fv = Vec::with_capacity(dim);
171 for d in 0..dim {
172 let fo = vec_offset + d * 4;
173 let val = f32::from_le_bytes(data[fo..fo + 4].try_into().unwrap());
174 fv.push(val);
175 }
176 if let Some(unit) = graph.get_unit_mut(i) {
177 unit.feature_vec = fv;
178 }
179 }
180 }
181 }
182
183 Ok(graph)
184 }
185
186 pub fn read_from(reader: &mut impl Read) -> AcbResult<CodeGraph> {
188 let mut data = Vec::new();
189 reader.read_to_end(&mut data)?;
190 Self::read_from_data(&data)
191 }
192}
193
194fn validate_offset(offset: u64, file_len: u64) -> AcbResult<()> {
195 if offset > file_len {
196 Err(AcbError::Truncated)
197 } else {
198 Ok(())
199 }
200}
201
202fn detect_legacy_version(data: &[u8]) -> Option<u32> {
203 if data.len() < 8 {
204 return None;
205 }
206 if data[0..4] != ACB_MAGIC {
207 return None;
208 }
209 let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
210 if version < FORMAT_VERSION {
211 Some(version)
212 } else {
213 None
214 }
215}
216
217fn migrate_file_in_place(path: &Path, graph: &CodeGraph, from_version: u32) -> AcbResult<()> {
218 let migration_dir = path
219 .parent()
220 .unwrap_or_else(|| Path::new("."))
221 .join(".acb-migrations");
222 std::fs::create_dir_all(&migration_dir)?;
223
224 let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("graph");
225 let ts = chrono::Utc::now().format("%Y%m%d%H%M%S");
226 let checkpoint = migration_dir.join(format!("{stem}.v{from_version}.{ts}.acb.checkpoint"));
227 std::fs::copy(path, &checkpoint)?;
228
229 let writer = AcbWriter::new(graph.dimension());
230 writer.write_to_file(graph, path)?;
231 tracing::info!(
232 "Auto-migrated {} from v{} to v{} (checkpoint: {})",
233 path.display(),
234 from_version,
235 FORMAT_VERSION,
236 checkpoint.display()
237 );
238 Ok(())
239}
240
241fn read_unit_record(data: &[u8], pool: &StringPool) -> AcbResult<(CodeUnit, u64, u32)> {
243 let id = u64::from_le_bytes(data[0..8].try_into().unwrap());
244 let unit_type = CodeUnitType::from_u8(data[8]).ok_or(AcbError::Corrupt(0))?;
245 let language = Language::from_u8(data[9]).ok_or(AcbError::Corrupt(1))?;
246 let visibility = Visibility::from_u8(data[10]).ok_or(AcbError::Corrupt(2))?;
247 let flags = data[11];
248 let is_async = (flags & 1) != 0;
249 let is_generator = (flags & 2) != 0;
250 let complexity = u16::from_le_bytes(data[12..14].try_into().unwrap()) as u32;
251 let name_offset = u32::from_le_bytes(data[16..20].try_into().unwrap());
255 let name_len = u16::from_le_bytes(data[20..22].try_into().unwrap());
256 let qname_offset = u32::from_le_bytes(data[22..26].try_into().unwrap());
257 let qname_len = u16::from_le_bytes(data[26..28].try_into().unwrap());
258 let path_offset = u32::from_le_bytes(data[28..32].try_into().unwrap());
259 let path_len = u16::from_le_bytes(data[32..34].try_into().unwrap());
260 let start_line = u32::from_le_bytes(data[40..44].try_into().unwrap());
264 let start_col = u16::from_le_bytes(data[44..46].try_into().unwrap()) as u32;
265 let end_line = u32::from_le_bytes(data[46..50].try_into().unwrap());
266 let end_col = u16::from_le_bytes(data[50..52].try_into().unwrap()) as u32;
267 let created_at = u64::from_le_bytes(data[56..64].try_into().unwrap());
271 let last_modified = u64::from_le_bytes(data[64..72].try_into().unwrap());
272 let change_count = u32::from_le_bytes(data[72..76].try_into().unwrap());
273 let stability_x100 = u16::from_le_bytes(data[76..78].try_into().unwrap());
274 let stability_score = stability_x100 as f32 / 100.0;
275 let edge_offset = u64::from_le_bytes(data[80..88].try_into().unwrap());
279 let edge_count = u32::from_le_bytes(data[88..92].try_into().unwrap());
280 let name = if name_len > 0 {
284 pool.get(name_offset, name_len)?.to_string()
285 } else {
286 String::new()
287 };
288 let qualified_name = if qname_len > 0 {
289 pool.get(qname_offset, qname_len)?.to_string()
290 } else {
291 String::new()
292 };
293 let file_path = if path_len > 0 {
294 PathBuf::from(pool.get(path_offset, path_len)?)
295 } else {
296 PathBuf::new()
297 };
298
299 let mut unit = CodeUnit::new(
300 unit_type,
301 language,
302 name,
303 qualified_name,
304 file_path,
305 Span::new(start_line, start_col, end_line, end_col),
306 );
307 unit.id = id;
308 unit.visibility = visibility;
309 unit.is_async = is_async;
310 unit.is_generator = is_generator;
311 unit.complexity = complexity;
312 unit.created_at = created_at;
313 unit.last_modified = last_modified;
314 unit.change_count = change_count;
315 unit.stability_score = stability_score;
316
317 Ok((unit, edge_offset, edge_count))
318}
319
320fn read_edge_record(data: &[u8]) -> AcbResult<Edge> {
322 let source_id = u64::from_le_bytes(data[0..8].try_into().unwrap());
323 let target_id = u64::from_le_bytes(data[8..16].try_into().unwrap());
324 let edge_type = EdgeType::from_u8(data[16]).ok_or(AcbError::Corrupt(16))?;
325 let weight_bits = u32::from_le_bytes(data[20..24].try_into().unwrap());
327 let weight = f32::from_bits(weight_bits);
328 let created_at = u64::from_le_bytes(data[24..32].try_into().unwrap());
329 let context = u32::from_le_bytes(data[32..36].try_into().unwrap());
330 Ok(Edge {
333 source_id,
334 target_id,
335 edge_type,
336 weight,
337 created_at,
338 context,
339 })
340}