use std::io::Write;
use std::path::Path;
use crate::graph::CodeGraph;
use crate::types::header::{FileHeader, HEADER_SIZE};
use crate::types::{AcbResult, DEFAULT_DIMENSION};
use super::compression::StringPoolBuilder;
pub const UNIT_RECORD_SIZE: usize = 96;
pub const EDGE_RECORD_SIZE: usize = 40;
pub struct AcbWriter {
dimension: usize,
}
impl AcbWriter {
pub fn new(dimension: usize) -> Self {
Self { dimension }
}
pub fn with_default_dimension() -> Self {
Self::new(DEFAULT_DIMENSION)
}
pub fn write_to_file(&self, graph: &CodeGraph, path: &Path) -> AcbResult<()> {
let mut file = std::fs::File::create(path)?;
self.write_to(graph, &mut file)
}
pub fn write_to(&self, graph: &CodeGraph, w: &mut impl Write) -> AcbResult<()> {
let mut pool = StringPoolBuilder::new();
let mut unit_strings: Vec<UnitStrings> = Vec::with_capacity(graph.unit_count());
for unit in graph.units() {
let (name_offset, name_len) = pool.add(&unit.name);
let (qname_offset, qname_len) = pool.add(&unit.qualified_name);
let path_str = unit.file_path.to_string_lossy();
let (path_offset, path_len) = pool.add(&path_str);
unit_strings.push(UnitStrings {
name_offset,
name_len,
qname_offset,
qname_len,
path_offset,
path_len,
});
}
let compressed_pool = pool.compress();
let mut sorted_edges: Vec<_> = graph.edges().to_vec();
sorted_edges.sort_by(|a, b| {
a.source_id
.cmp(&b.source_id)
.then(a.target_id.cmp(&b.target_id))
});
let mut unit_edge_offsets: Vec<(u64, u32)> = vec![(0, 0); graph.unit_count()];
{
let mut current_source = u64::MAX;
let mut current_offset = 0u64;
let mut current_count = 0u32;
for (i, edge) in sorted_edges.iter().enumerate() {
if edge.source_id != current_source {
if current_source != u64::MAX {
unit_edge_offsets[current_source as usize] =
(current_offset, current_count);
}
current_source = edge.source_id;
current_offset = (i as u64) * EDGE_RECORD_SIZE as u64;
current_count = 0;
}
current_count += 1;
}
if current_source != u64::MAX {
unit_edge_offsets[current_source as usize] = (current_offset, current_count);
}
}
let unit_table_offset = HEADER_SIZE as u64;
let edge_table_offset =
unit_table_offset + (graph.unit_count() as u64) * UNIT_RECORD_SIZE as u64;
let string_pool_offset =
edge_table_offset + (sorted_edges.len() as u64) * EDGE_RECORD_SIZE as u64;
let string_pool_section_size = 8 + compressed_pool.len() as u64;
let feature_vec_offset = string_pool_offset + string_pool_section_size;
let feature_vec_size = (graph.unit_count() as u64) * (self.dimension as u64) * 4;
let temporal_offset = feature_vec_offset + feature_vec_size;
let temporal_size = 16u64;
let index_offset = temporal_offset + temporal_size;
let mut header = FileHeader::new(self.dimension as u32);
header.unit_count = graph.unit_count() as u64;
header.edge_count = sorted_edges.len() as u64;
header.language_count = graph.languages().len() as u32;
header.unit_table_offset = unit_table_offset;
header.edge_table_offset = edge_table_offset;
header.string_pool_offset = string_pool_offset;
header.feature_vec_offset = feature_vec_offset;
header.temporal_offset = temporal_offset;
header.index_offset = index_offset;
header.write_to(w)?;
for (i, unit) in graph.units().iter().enumerate() {
let us = &unit_strings[i];
let (eoff, ecnt) = unit_edge_offsets[i];
write_unit_record(w, unit, us, eoff, ecnt)?;
}
for edge in &sorted_edges {
write_edge_record(w, edge)?;
}
w.write_all(&(pool.uncompressed_size() as u64).to_le_bytes())?;
w.write_all(&compressed_pool)?;
for unit in graph.units() {
for &val in &unit.feature_vec {
w.write_all(&val.to_le_bytes())?;
}
for _ in unit.feature_vec.len()..self.dimension {
w.write_all(&0.0f32.to_le_bytes())?;
}
}
w.write_all(&0u64.to_le_bytes())?; w.write_all(&0u64.to_le_bytes())?;
w.write_all(&0xFFFFFFFFu32.to_le_bytes())?;
Ok(())
}
}
struct UnitStrings {
name_offset: u32,
name_len: u16,
qname_offset: u32,
qname_len: u16,
path_offset: u32,
path_len: u16,
}
fn write_unit_record(
w: &mut impl Write,
unit: &crate::types::CodeUnit,
strings: &UnitStrings,
edge_offset: u64,
edge_count: u32,
) -> AcbResult<()> {
w.write_all(&unit.id.to_le_bytes())?; w.write_all(&[unit.unit_type as u8])?; w.write_all(&[unit.language as u8])?; w.write_all(&[unit.visibility as u8])?; let flags: u8 = (unit.is_async as u8) | ((unit.is_generator as u8) << 1);
w.write_all(&[flags])?; let complexity_u16 = unit.complexity as u16;
w.write_all(&complexity_u16.to_le_bytes())?; w.write_all(&[0u8; 2])?;
w.write_all(&strings.name_offset.to_le_bytes())?; w.write_all(&strings.name_len.to_le_bytes())?; w.write_all(&strings.qname_offset.to_le_bytes())?; w.write_all(&strings.qname_len.to_le_bytes())?; w.write_all(&strings.path_offset.to_le_bytes())?; w.write_all(&strings.path_len.to_le_bytes())?; w.write_all(&[0u8; 6])?;
w.write_all(&unit.span.start_line.to_le_bytes())?; let start_col_u16 = unit.span.start_col as u16;
w.write_all(&start_col_u16.to_le_bytes())?; w.write_all(&unit.span.end_line.to_le_bytes())?; let end_col_u16 = unit.span.end_col as u16;
w.write_all(&end_col_u16.to_le_bytes())?; w.write_all(&[0u8; 4])?;
w.write_all(&unit.created_at.to_le_bytes())?; w.write_all(&unit.last_modified.to_le_bytes())?; let change_count_u32 = unit.change_count;
w.write_all(&change_count_u32.to_le_bytes())?; let stability_x100 = (unit.stability_score * 100.0).round() as u16;
w.write_all(&stability_x100.to_le_bytes())?; w.write_all(&[0u8; 2])?;
w.write_all(&edge_offset.to_le_bytes())?; w.write_all(&edge_count.to_le_bytes())?; w.write_all(&[0u8; 4])?;
Ok(())
}
fn write_edge_record(w: &mut impl Write, edge: &crate::types::Edge) -> AcbResult<()> {
w.write_all(&edge.source_id.to_le_bytes())?; w.write_all(&edge.target_id.to_le_bytes())?; w.write_all(&[edge.edge_type as u8])?; w.write_all(&[0u8; 3])?; w.write_all(&edge.weight.to_bits().to_le_bytes())?; w.write_all(&edge.created_at.to_le_bytes())?; w.write_all(&edge.context.to_le_bytes())?; w.write_all(&[0u8; 4])?;
Ok(())
}