parcode/
inspector.rs

1//! Tools for inspecting the physical structure of Parcode files.
2//!
3//! This module provides the [`ParcodeInspector`] tool, which can analyze a Parcode file
4//! and report on its internal structure, chunk sizes, compression algorithms, and data distribution.
5//!
6//! This is useful for:
7//! - **Forensics:** Understanding why a file is large or corrupt.
8//! - **Optimization:** Verifying that your `#[parcode(chunkable)]` and `#[parcode(map)]` attributes are working as intended.
9//! - **Debugging:** visualizing the dependency graph of your serialized data.
10
11use crate::error::Result;
12use crate::reader::{ChunkNode, ParcodeFile};
13use serde::{Deserialize, Serialize};
14use std::path::Path;
15
16/// A structural report of a Parcode file.
17#[derive(Debug, Serialize)]
18pub struct DebugReport {
19    /// Total size of the file on disk.
20    pub file_size: u64,
21    /// Offset where the Root chunk starts.
22    pub root_offset: u64,
23    /// Format version.
24    pub global_version: u16,
25    /// The hierarchical tree of chunks.
26    pub tree: ChunkInfo,
27}
28
29/// Metadata for a single chunk in the graph.
30#[derive(Debug, Serialize)]
31pub struct ChunkInfo {
32    /// Absolute offset.
33    pub offset: u64,
34    /// Total chunk length (header + payload + footer).
35    pub total_length: u64,
36    /// Size of the data payload.
37    pub payload_size: u64,
38    /// Number of children dependencies.
39    pub child_count: u32,
40    /// Compression algorithm used.
41    pub compression_algo: String,
42    /// Whether it flagged as having children.
43    pub is_chunkable: bool,
44    /// Inferred type info (e.g., "Vec Container", "Map Shard", "Blob").
45    pub node_type_hint: String,
46    /// Extra info about distribution (e.g., "32 shards").
47    pub distribution_info: Option<String>,
48    /// Child nodes.
49    pub children: Vec<Self>,
50}
51
52/// The Parcode Inspector tool.
53#[derive(Debug)]
54pub struct ParcodeInspector;
55
56impl ParcodeInspector {
57    /// Inspects an already open file.
58    pub fn inspect_file(file: &ParcodeFile) -> Result<DebugReport> {
59        let root = file.root_node()?;
60        let tree = Self::inspect_node(&root)?;
61
62        Ok(DebugReport {
63            file_size: file.file_size(),
64            root_offset: root.offset(),
65            global_version: 4,
66            tree,
67        })
68    }
69
70    /// Convenience wrapper to inspect from path (matches old API if needed).
71    pub fn inspect<P: AsRef<Path>>(path: P) -> Result<DebugReport> {
72        let file = ParcodeFile::open(path)?;
73        Self::inspect_file(&file)
74    }
75
76    fn inspect_node(node: &ChunkNode<'_>) -> Result<ChunkInfo> {
77        let raw_meta = node.meta();
78        let algo_id = raw_meta.compression_method();
79        let algo_name = match algo_id {
80            0 => "None".to_string(),
81            1 => "LZ4".to_string(),
82            _ => format!("Unknown({})", algo_id),
83        };
84
85        let (hint, distrib) = Self::analyze_payload(node);
86
87        let mut children_info = Vec::new();
88        if node.child_count() > 0 {
89            let children = node.children()?;
90            for child in children {
91                children_info.push(Self::inspect_node(&child)?);
92            }
93        }
94
95        Ok(ChunkInfo {
96            offset: node.offset(),
97            total_length: node.length(),
98            payload_size: node.payload_len(),
99            child_count: node.child_count(),
100            compression_algo: algo_name,
101            is_chunkable: raw_meta.is_chunkable(),
102            node_type_hint: hint,
103            distribution_info: distrib,
104            children: children_info,
105        })
106    }
107
108    fn analyze_payload(node: &ChunkNode<'_>) -> (String, Option<String>) {
109        if node.child_count() == 0 {
110            return ("Leaf/Blob".to_string(), None);
111        }
112
113        let payload = match node.read_raw() {
114            Ok(p) => p,
115            Err(_) => return ("Corrupted".to_string(), None),
116        };
117
118        // CHECK 1: Vec Container
119        if payload.len() >= 8 {
120            #[derive(Deserialize)]
121            struct ShardRun {
122                _item_count: u32,
123                repeat: u32,
124            }
125
126            let slice = &payload.get(8..).expect("Missing Vec header");
127            // We attempt to decode. If it fails, it's not a Vec header.
128            if let Ok((runs, _)) = bincode::serde::decode_from_slice::<Vec<ShardRun>, _>(
129                slice,
130                bincode::config::standard(),
131            ) {
132                let total_items = if payload.len() >= 8 {
133                    u64::from_le_bytes(
134                        payload
135                            .get(0..8)
136                            .expect("Missing Vec header")
137                            .try_into()
138                            .unwrap_or([0; 8]),
139                    )
140                } else {
141                    0
142                };
143
144                let distribution = format!(
145                    "Vec<{}> items across {} logical shards",
146                    total_items,
147                    runs.iter().map(|r| r.repeat).sum::<u32>()
148                );
149                return ("Vec Container".to_string(), Some(distribution));
150            }
151        }
152
153        // CHECK 2: Map Container (4 bytes exactly)
154        if payload.len() == 4 {
155            let num_shards = u32::from_le_bytes(
156                payload
157                    .get(0..4)
158                    .expect("Missing Map header")
159                    .try_into()
160                    .unwrap_or([0; 4]),
161            );
162            if num_shards == node.child_count() {
163                return (
164                    "Map Container".to_string(),
165                    Some(format!("Hashtable with {} buckets", num_shards)),
166                );
167            }
168        }
169
170        ("Generic Container".to_string(), None)
171    }
172}
173
174impl std::fmt::Display for DebugReport {
175    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176        writeln!(f, "=== PARCODE INSPECTOR REPORT ===")?;
177        writeln!(f, "Root Offset:    {}", self.root_offset)?;
178        writeln!(f, "\n[GRAPH LAYOUT]")?;
179        self.tree.fmt_recursive(f, "", true)
180    }
181}
182
183impl ChunkInfo {
184    fn fmt_recursive(
185        &self,
186        f: &mut std::fmt::Formatter<'_>,
187        prefix: &str,
188        is_last: bool,
189    ) -> std::fmt::Result {
190        let connector = if is_last { "└── " } else { "├── " };
191        let child_prefix = if is_last { "    " } else { "│   " };
192        let distrib = self
193            .distribution_info
194            .as_deref()
195            .map(|d| format!(" [{}]", d))
196            .unwrap_or_default();
197
198        writeln!(
199            f,
200            "{}{}[{}] Size: {}b | Algo: {} | Children: {}{}",
201            prefix,
202            connector,
203            self.node_type_hint,
204            self.payload_size,
205            self.compression_algo,
206            self.child_count,
207            distrib
208        )?;
209
210        for (i, child) in self.children.iter().enumerate() {
211            let is_last_child = i == self.children.len() - 1;
212            child.fmt_recursive(f, &format!("{}{}", prefix, child_prefix), is_last_child)?;
213        }
214        Ok(())
215    }
216}