agentic_codebase/format/
writer.rs1use std::io::Write;
8use std::path::Path;
9
10use crate::graph::CodeGraph;
11use crate::types::header::{FileHeader, HEADER_SIZE};
12use crate::types::{AcbResult, DEFAULT_DIMENSION};
13
14use super::compression::StringPoolBuilder;
15
16pub const UNIT_RECORD_SIZE: usize = 96;
18
19pub const EDGE_RECORD_SIZE: usize = 40;
21
22pub struct AcbWriter {
24 dimension: usize,
25}
26
27impl AcbWriter {
28 pub fn new(dimension: usize) -> Self {
30 Self { dimension }
31 }
32
33 pub fn with_default_dimension() -> Self {
35 Self::new(DEFAULT_DIMENSION)
36 }
37
38 pub fn write_to_file(&self, graph: &CodeGraph, path: &Path) -> AcbResult<()> {
44 let mut file = std::fs::File::create(path)?;
45 self.write_to(graph, &mut file)
46 }
47
48 pub fn write_to(&self, graph: &CodeGraph, w: &mut impl Write) -> AcbResult<()> {
50 let mut pool = StringPoolBuilder::new();
52 let mut unit_strings: Vec<UnitStrings> = Vec::with_capacity(graph.unit_count());
53
54 for unit in graph.units() {
55 let (name_offset, name_len) = pool.add(&unit.name);
56 let (qname_offset, qname_len) = pool.add(&unit.qualified_name);
57 let path_str = unit.file_path.to_string_lossy();
58 let (path_offset, path_len) = pool.add(&path_str);
59 unit_strings.push(UnitStrings {
60 name_offset,
61 name_len,
62 qname_offset,
63 qname_len,
64 path_offset,
65 path_len,
66 });
67 }
68
69 let compressed_pool = pool.compress();
70
71 let mut sorted_edges: Vec<_> = graph.edges().to_vec();
73 sorted_edges.sort_by(|a, b| {
74 a.source_id
75 .cmp(&b.source_id)
76 .then(a.target_id.cmp(&b.target_id))
77 });
78
79 let mut unit_edge_offsets: Vec<(u64, u32)> = vec![(0, 0); graph.unit_count()];
81 {
82 let mut current_source = u64::MAX;
83 let mut current_offset = 0u64;
84 let mut current_count = 0u32;
85
86 for (i, edge) in sorted_edges.iter().enumerate() {
87 if edge.source_id != current_source {
88 if current_source != u64::MAX {
89 unit_edge_offsets[current_source as usize] =
90 (current_offset, current_count);
91 }
92 current_source = edge.source_id;
93 current_offset = (i as u64) * EDGE_RECORD_SIZE as u64;
94 current_count = 0;
95 }
96 current_count += 1;
97 }
98 if current_source != u64::MAX {
99 unit_edge_offsets[current_source as usize] = (current_offset, current_count);
100 }
101 }
102
103 let unit_table_offset = HEADER_SIZE as u64;
105 let edge_table_offset =
106 unit_table_offset + (graph.unit_count() as u64) * UNIT_RECORD_SIZE as u64;
107 let string_pool_offset =
108 edge_table_offset + (sorted_edges.len() as u64) * EDGE_RECORD_SIZE as u64;
109 let string_pool_section_size = 8 + compressed_pool.len() as u64;
111 let feature_vec_offset = string_pool_offset + string_pool_section_size;
112 let feature_vec_size = (graph.unit_count() as u64) * (self.dimension as u64) * 4;
113 let temporal_offset = feature_vec_offset + feature_vec_size;
114 let temporal_size = 16u64;
116 let index_offset = temporal_offset + temporal_size;
117
118 let mut header = FileHeader::new(self.dimension as u32);
120 header.unit_count = graph.unit_count() as u64;
121 header.edge_count = sorted_edges.len() as u64;
122 header.language_count = graph.languages().len() as u32;
123 header.unit_table_offset = unit_table_offset;
124 header.edge_table_offset = edge_table_offset;
125 header.string_pool_offset = string_pool_offset;
126 header.feature_vec_offset = feature_vec_offset;
127 header.temporal_offset = temporal_offset;
128 header.index_offset = index_offset;
129
130 header.write_to(w)?;
132
133 for (i, unit) in graph.units().iter().enumerate() {
135 let us = &unit_strings[i];
136 let (eoff, ecnt) = unit_edge_offsets[i];
137 write_unit_record(w, unit, us, eoff, ecnt)?;
138 }
139
140 for edge in &sorted_edges {
142 write_edge_record(w, edge)?;
143 }
144
145 w.write_all(&(pool.uncompressed_size() as u64).to_le_bytes())?;
147 w.write_all(&compressed_pool)?;
148
149 for unit in graph.units() {
151 for &val in &unit.feature_vec {
152 w.write_all(&val.to_le_bytes())?;
153 }
154 for _ in unit.feature_vec.len()..self.dimension {
156 w.write_all(&0.0f32.to_le_bytes())?;
157 }
158 }
159
160 w.write_all(&0u64.to_le_bytes())?; w.write_all(&0u64.to_le_bytes())?; w.write_all(&0xFFFFFFFFu32.to_le_bytes())?;
166
167 Ok(())
168 }
169}
170
171struct UnitStrings {
173 name_offset: u32,
174 name_len: u16,
175 qname_offset: u32,
176 qname_len: u16,
177 path_offset: u32,
178 path_len: u16,
179}
180
181fn write_unit_record(
183 w: &mut impl Write,
184 unit: &crate::types::CodeUnit,
185 strings: &UnitStrings,
186 edge_offset: u64,
187 edge_count: u32,
188) -> AcbResult<()> {
189 w.write_all(&unit.id.to_le_bytes())?; w.write_all(&[unit.unit_type as u8])?; w.write_all(&[unit.language as u8])?; w.write_all(&[unit.visibility as u8])?; let flags: u8 = (unit.is_async as u8) | ((unit.is_generator as u8) << 1);
195 w.write_all(&[flags])?; let complexity_u16 = unit.complexity as u16;
197 w.write_all(&complexity_u16.to_le_bytes())?; w.write_all(&[0u8; 2])?; w.write_all(&strings.name_offset.to_le_bytes())?; w.write_all(&strings.name_len.to_le_bytes())?; w.write_all(&strings.qname_offset.to_le_bytes())?; w.write_all(&strings.qname_len.to_le_bytes())?; w.write_all(&strings.path_offset.to_le_bytes())?; w.write_all(&strings.path_len.to_le_bytes())?; w.write_all(&[0u8; 6])?; w.write_all(&unit.span.start_line.to_le_bytes())?; let start_col_u16 = unit.span.start_col as u16;
212 w.write_all(&start_col_u16.to_le_bytes())?; w.write_all(&unit.span.end_line.to_le_bytes())?; let end_col_u16 = unit.span.end_col as u16;
215 w.write_all(&end_col_u16.to_le_bytes())?; w.write_all(&[0u8; 4])?; w.write_all(&unit.created_at.to_le_bytes())?; w.write_all(&unit.last_modified.to_le_bytes())?; let change_count_u32 = unit.change_count;
222 w.write_all(&change_count_u32.to_le_bytes())?; let stability_x100 = (unit.stability_score * 100.0).round() as u16;
224 w.write_all(&stability_x100.to_le_bytes())?; w.write_all(&[0u8; 2])?; w.write_all(&edge_offset.to_le_bytes())?; w.write_all(&edge_count.to_le_bytes())?; w.write_all(&[0u8; 4])?; Ok(())
233}
234
235fn write_edge_record(w: &mut impl Write, edge: &crate::types::Edge) -> AcbResult<()> {
237 w.write_all(&edge.source_id.to_le_bytes())?; w.write_all(&edge.target_id.to_le_bytes())?; w.write_all(&[edge.edge_type as u8])?; w.write_all(&[0u8; 3])?; w.write_all(&edge.weight.to_bits().to_le_bytes())?; w.write_all(&edge.created_at.to_le_bytes())?; w.write_all(&edge.context.to_le_bytes())?; w.write_all(&[0u8; 4])?; Ok(())
247}