use std::fs;
use std::path::Path;
use anyhow::Context;
use anyhow::Result;
use anyhow::bail;
use serde::Deserialize;
#[derive(Debug, Deserialize)]
pub struct EdgesDataReference {
pub offset: u32,
pub length: u32,
}
#[derive(Debug, Deserialize)]
pub struct AnalyzeModule {
pub ident: String,
pub path: String,
}
#[derive(Debug, Deserialize)]
pub struct ModulesDataHeader {
pub modules: Vec<AnalyzeModule>,
#[expect(dead_code, reason = "deserialized from binary format but not yet used")]
pub module_dependents: EdgesDataReference,
#[expect(dead_code, reason = "deserialized from binary format but not yet used")]
pub async_module_dependents: EdgesDataReference,
pub module_dependencies: EdgesDataReference,
pub async_module_dependencies: EdgesDataReference,
}
#[derive(Debug, Deserialize)]
pub struct AnalyzeSource {
pub parent_source_index: Option<u32>,
pub path: String,
}
#[derive(Debug, Deserialize)]
pub struct AnalyzeChunkPart {
pub source_index: u32,
#[allow(dead_code)]
pub output_file_index: u32,
pub size: u32,
pub compressed_size: u32,
}
#[derive(Debug, Deserialize)]
pub struct AnalyzeDataHeader {
pub sources: Vec<AnalyzeSource>,
pub chunk_parts: Vec<AnalyzeChunkPart>,
#[allow(dead_code)]
pub source_roots: Vec<u32>,
}
pub struct ModulesData {
pub header: ModulesDataHeader,
pub(crate) binary: Vec<u8>,
}
pub struct AnalyzeData {
pub header: AnalyzeDataHeader,
}
pub fn load_modules_data(data_dir: &Path) -> Result<ModulesData> {
let path = data_dir.join("modules.data");
let bytes = fs::read(&path).context("failed to read modules.data")?;
ModulesData::from_bytes(&bytes)
}
fn parse_envelope<T: serde::de::DeserializeOwned>(bytes: &[u8]) -> Result<(T, Vec<u8>)> {
if bytes.len() < 4 {
bail!("file too short (< 4 bytes)");
}
let json_len = read_u32_be(bytes, 0) as usize;
let json_end = 4 + json_len;
if bytes.len() < json_end {
bail!(
"file too short for JSON header ({json_len} bytes, file has {})",
bytes.len()
);
}
let header: T =
serde_json::from_slice(&bytes[4..json_end]).context("failed to parse JSON header")?;
let binary = bytes[json_end..].to_vec();
Ok((header, binary))
}
impl ModulesData {
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
let (header, binary) =
parse_envelope::<ModulesDataHeader>(bytes).context("parsing modules.data envelope")?;
Ok(Self { header, binary })
}
pub fn edges_for(&self, reference: &EdgesDataReference, index: usize) -> Vec<u32> {
read_edges_at(&self.binary, reference, index)
}
}
impl AnalyzeData {
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
let (header, _binary) =
parse_envelope::<AnalyzeDataHeader>(bytes).context("parsing analyze.data envelope")?;
Ok(Self { header })
}
pub fn full_source_path(&self, index: usize) -> String {
let source = &self.header.sources[index];
match source.parent_source_index {
None => source.path.clone(),
Some(parent) => {
let parent_path = self.full_source_path(parent as usize);
format!("{parent_path}{}", source.path)
}
}
}
}
fn read_edges_at(binary: &[u8], reference: &EdgesDataReference, index: usize) -> Vec<u32> {
if reference.length == 0 {
return Vec::new();
}
let base = reference.offset as usize;
let num_nodes = read_u32_be(binary, base) as usize;
if index >= num_nodes {
return Vec::new();
}
let offsets_start = base + 4;
let prev_offset = if index == 0 {
0
} else {
read_u32_be(binary, offsets_start + (index - 1) * 4) as usize
};
let current_offset = read_u32_be(binary, offsets_start + index * 4) as usize;
let edge_count = current_offset - prev_offset;
if edge_count == 0 {
return Vec::new();
}
let data_start = offsets_start + num_nodes * 4;
(0..edge_count)
.map(|j| read_u32_be(binary, data_start + (prev_offset + j) * 4))
.collect()
}
fn read_u32_be(data: &[u8], offset: usize) -> u32 {
u32::from_be_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
])
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_u32_be() {
let data = [0x00, 0x00, 0x00, 0x05];
assert_eq!(read_u32_be(&data, 0), 5);
}
#[test]
fn test_parse_envelope_minimal() {
let json = serde_json::json!({
"modules": [],
"module_dependents": {"offset": 0, "length": 0},
"async_module_dependents": {"offset": 0, "length": 0},
"module_dependencies": {"offset": 0, "length": 0},
"async_module_dependencies": {"offset": 0, "length": 0},
});
let json_bytes = serde_json::to_vec(&json).unwrap();
let mut blob = (json_bytes.len() as u32).to_be_bytes().to_vec();
blob.extend_from_slice(&json_bytes);
let data = ModulesData::from_bytes(&blob).unwrap();
assert!(data.header.modules.is_empty());
}
#[test]
fn test_edges_decoding() {
let json = serde_json::json!({
"modules": [
{"ident": "a (ecmascript)", "path": "a"},
{"ident": "b (ecmascript)", "path": "b"},
{"ident": "c (ecmascript)", "path": "c"},
],
"module_dependents": {"offset": 0, "length": 0},
"async_module_dependents": {"offset": 0, "length": 0},
"module_dependencies": {"offset": 0, "length": 40},
"async_module_dependencies": {"offset": 0, "length": 0},
});
let json_bytes = serde_json::to_vec(&json).unwrap();
let mut blob = (json_bytes.len() as u32).to_be_bytes().to_vec();
blob.extend_from_slice(&json_bytes);
let mut edges_binary: Vec<u8> = Vec::new();
edges_binary.extend_from_slice(&3u32.to_be_bytes()); edges_binary.extend_from_slice(&2u32.to_be_bytes()); edges_binary.extend_from_slice(&3u32.to_be_bytes()); edges_binary.extend_from_slice(&3u32.to_be_bytes()); edges_binary.extend_from_slice(&1u32.to_be_bytes()); edges_binary.extend_from_slice(&2u32.to_be_bytes()); edges_binary.extend_from_slice(&2u32.to_be_bytes()); blob.extend_from_slice(&edges_binary);
let data = ModulesData::from_bytes(&blob).unwrap();
assert_eq!(
data.edges_for(&data.header.module_dependencies, 0),
vec![1, 2]
);
assert_eq!(data.edges_for(&data.header.module_dependencies, 1), vec![2]);
assert_eq!(
data.edges_for(&data.header.module_dependencies, 2),
Vec::<u32>::new()
);
}
}