Skip to main content

pcf_debug/model/
walk.rs

1//! A defensive, read-only walk of a PCF file's physical structure.
2//!
3//! Unlike [`pcf::Container`], which flattens the table-block chain into a single
4//! list of entries and hides per-block layout, this walk preserves the chain:
5//! every block's offset, header, entry array, chain link, and stored-vs-computed
6//! table hash. It reuses the crate's public byte parsers (`*::from_bytes`) so it
7//! never re-implements the format, but it tolerates corruption that
8//! `Container::open` would reject.
9
10use pcf::{
11    compute_table_hash, FileHeader, HashAlgo, PartitionEntry, TableBlockHeader, Trailer,
12    ENTRY_SIZE, HEADER_SIZE, PT_OFFSET_TRAILER, TABLE_HEADER_SIZE, TRAILER_MAGIC, TRAILER_SIZE,
13};
14
15use super::diag::{DiagKind, Diagnostic};
16
17/// Maximum number of table blocks we will follow before giving up.
18const MAX_BLOCKS: usize = 4096;
19
20/// One partition entry as physically found in a block.
21#[derive(Debug, Clone)]
22pub struct EntryView {
23    pub slot: usize,
24    pub entry: PartitionEntry,
25    /// `Ok` if [`PartitionEntry::validate`] passed, else the stringified reason.
26    pub validate_ok: Result<(), String>,
27    /// `Some(true/false)` when the data region is readable and the algorithm
28    /// verifies; `None` when unreadable or the algorithm is `None`.
29    pub data_hash_ok: Option<bool>,
30    /// Whether `[start_offset, start_offset + used_bytes)` lies within the file.
31    pub data_in_bounds: bool,
32}
33
34/// One table block as physically found in the chain.
35#[derive(Debug, Clone)]
36pub struct BlockView {
37    pub index: usize,
38    pub offset: u64,
39    pub header: TableBlockHeader,
40    pub entries: Vec<EntryView>,
41    pub next_offset: u64,
42    pub stored_table_hash: [u8; 64],
43    /// `Some(true/false)` when recomputable; `None` if the algorithm is `None`
44    /// or the block's entries could not all be parsed.
45    pub table_hash_ok: Option<bool>,
46}
47
48/// The result of walking a file.
49#[derive(Debug, Clone)]
50pub struct Walk {
51    pub file_len: u64,
52    pub header: Option<FileHeader>,
53    pub blocks: Vec<BlockView>,
54    pub diagnostics: Vec<Diagnostic>,
55}
56
57fn read_array<const N: usize>(data: &[u8], off: usize) -> Option<[u8; N]> {
58    data.get(off..off + N)?.try_into().ok()
59}
60
61/// Scan backward from the end of `data` for the last valid file trailer: a
62/// 20-byte window ending in [`TRAILER_MAGIC`] whose recorded head is empty (0)
63/// or references a parseable table block. Returns `(trailer_offset, trailer)`.
64fn locate_trailer(data: &[u8]) -> Option<(u64, Trailer)> {
65    let ts = TRAILER_SIZE as usize;
66    let mut end = data.len();
67    while end >= ts {
68        let start = end - ts;
69        let window: [u8; 20] = data[start..end].try_into().ok()?;
70        if window[12..20] == TRAILER_MAGIC {
71            if let Ok(t) = Trailer::from_bytes(&window) {
72                let head = t.partition_table_offset;
73                let head_ok = head == 0
74                    || (head
75                        .checked_add(TABLE_HEADER_SIZE)
76                        .is_some_and(|p| p as usize <= start)
77                        && read_array::<{ TABLE_HEADER_SIZE as usize }>(data, head as usize)
78                            .and_then(|b| TableBlockHeader::from_bytes(&b).ok())
79                            .is_some());
80                if head_ok {
81                    return Some((start as u64, t));
82                }
83            }
84        }
85        end -= 1;
86    }
87    None
88}
89
90/// Walk `data` (the whole file loaded into memory) and build a structural model.
91///
92/// When `verify` is false, data and table hashes are not computed (a fast path
93/// for very large files); the corresponding `*_hash_ok` fields stay `None`.
94pub fn walk(data: &[u8], verify: bool) -> Walk {
95    let file_len = data.len() as u64;
96    let mut diagnostics = Vec::new();
97
98    // ---- header ----------------------------------------------------------
99    let header = match read_array::<{ HEADER_SIZE as usize }>(data, 0) {
100        Some(buf) => match FileHeader::from_bytes(&buf) {
101            Ok(h) => Some(h),
102            Err(e) => {
103                diagnostics.push(Diagnostic::error(
104                    DiagKind::BadHeader {
105                        reason: format!("{e:?}"),
106                    },
107                    format!("file header is invalid: {e:?}"),
108                ));
109                None
110            }
111        },
112        None => {
113            diagnostics.push(Diagnostic::error(
114                DiagKind::BadHeader {
115                    reason: "file shorter than 20-byte header".into(),
116                },
117                format!("file is only {file_len} bytes; a PCF header needs {HEADER_SIZE}"),
118            ));
119            None
120        }
121    };
122
123    // ---- block chain -----------------------------------------------------
124    let mut blocks = Vec::new();
125    let mut visited: Vec<u64> = Vec::new();
126    if let Some(h) = header {
127        // Resolve the head: a header carrying the trailer sentinel locates its
128        // partition-table head via a file trailer (the last valid one).
129        let mut off = h.partition_table_offset;
130        if off == PT_OFFSET_TRAILER {
131            match locate_trailer(data) {
132                Some((toff, t)) => {
133                    let backward = t.chain_flags & 1 != 0;
134                    diagnostics.push(Diagnostic::info(
135                        DiagKind::TrailerResolved {
136                            trailer_offset: toff,
137                            head: t.partition_table_offset,
138                            backward,
139                        },
140                        format!(
141                            "header uses the trailer sentinel; trailer at {toff:#x} -> head {:#x} ({})",
142                            t.partition_table_offset,
143                            if backward { "backward" } else { "forward" }
144                        ),
145                    ));
146                    off = t.partition_table_offset;
147                }
148                None => {
149                    diagnostics.push(Diagnostic::error(
150                        DiagKind::BadHeader {
151                            reason: "trailer sentinel set but no valid trailer found".into(),
152                        },
153                        "header requests trailer-based location but no valid trailer was found"
154                            .to_string(),
155                    ));
156                    off = 0;
157                }
158            }
159        }
160        let mut index = 0usize;
161        while off != 0 {
162            if blocks.len() >= MAX_BLOCKS {
163                diagnostics.push(Diagnostic::error(
164                    DiagKind::ChainCycle { at_offset: off },
165                    format!("chain exceeds {MAX_BLOCKS} blocks; stopping (possible cycle)"),
166                ));
167                break;
168            }
169            if visited.contains(&off) {
170                diagnostics.push(Diagnostic::error(
171                    DiagKind::ChainCycle { at_offset: off },
172                    format!("table-block chain cycles back to offset {off:#x}"),
173                ));
174                break;
175            }
176            visited.push(off);
177
178            let hdr_buf = match read_array::<{ TABLE_HEADER_SIZE as usize }>(data, off as usize) {
179                Some(b) => b,
180                None => {
181                    diagnostics.push(Diagnostic::error(
182                        DiagKind::Truncated {
183                            start: off,
184                            want: off + TABLE_HEADER_SIZE,
185                            have: file_len,
186                        },
187                        format!("table block at {off:#x} runs past end of file"),
188                    ));
189                    break;
190                }
191            };
192            let bh = match TableBlockHeader::from_bytes(&hdr_buf) {
193                Ok(bh) => bh,
194                Err(e) => {
195                    diagnostics.push(Diagnostic::error(
196                        DiagKind::BadBlock {
197                            offset: off,
198                            reason: format!("{e:?}"),
199                        },
200                        format!("table block header at {off:#x} is invalid: {e:?}"),
201                    ));
202                    break;
203                }
204            };
205
206            // Parse the entries that follow the header.
207            let mut entries = Vec::with_capacity(bh.partition_count as usize);
208            let mut all_entries_parsed = true;
209            for i in 0..bh.partition_count as u64 {
210                let eoff = off + TABLE_HEADER_SIZE + i * ENTRY_SIZE;
211                let ebuf = match read_array::<{ ENTRY_SIZE as usize }>(data, eoff as usize) {
212                    Some(b) => b,
213                    None => {
214                        all_entries_parsed = false;
215                        diagnostics.push(Diagnostic::error(
216                            DiagKind::Truncated {
217                                start: eoff,
218                                want: eoff + ENTRY_SIZE,
219                                have: file_len,
220                            },
221                            format!(
222                                "entry {i} of block {index} at {eoff:#x} runs past end of file"
223                            ),
224                        ));
225                        break;
226                    }
227                };
228                let entry = match PartitionEntry::from_bytes(&ebuf) {
229                    Ok(e) => e,
230                    Err(e) => {
231                        all_entries_parsed = false;
232                        diagnostics.push(Diagnostic::warn(
233                            DiagKind::BadBlock {
234                                offset: eoff,
235                                reason: format!("{e:?}"),
236                            },
237                            format!("entry {i} of block {index} could not be parsed: {e:?}"),
238                        ));
239                        break;
240                    }
241                };
242
243                let validate_ok = entry.validate().map_err(|e| format!("{e:?}"));
244                if let Err(reason) = &validate_ok {
245                    diagnostics.push(Diagnostic::warn(
246                        DiagKind::EntryInvalid {
247                            uid: entry.uid,
248                            reason: reason.clone(),
249                        },
250                        format!(
251                            "entry '{}' fails conformance: {reason}",
252                            entry.label_string().unwrap_or_default()
253                        ),
254                    ));
255                }
256
257                let start = entry.start_offset;
258                let used = entry.used_bytes;
259                let data_in_bounds = start
260                    .checked_add(used)
261                    .map(|e| e <= file_len)
262                    .unwrap_or(false);
263                if used > 0 && !data_in_bounds {
264                    diagnostics.push(Diagnostic::error(
265                        DiagKind::Truncated {
266                            start,
267                            want: start.saturating_add(used),
268                            have: file_len,
269                        },
270                        format!(
271                            "data of partition '{}' at {start:#x} runs past end of file",
272                            entry.label_string().unwrap_or_default()
273                        ),
274                    ));
275                }
276                let data_hash_ok = if !verify || !entry.data_hash_algo.verifies() {
277                    None
278                } else if data_in_bounds {
279                    let slice = &data[start as usize..(start + used) as usize];
280                    let ok = entry.data_hash_algo.verify(slice, &entry.data_hash);
281                    if !ok {
282                        diagnostics.push(Diagnostic::error(
283                            DiagKind::DataHashMismatch { uid: entry.uid },
284                            format!(
285                                "data hash mismatch for partition '{}'",
286                                entry.label_string().unwrap_or_default()
287                            ),
288                        ));
289                    }
290                    Some(ok)
291                } else {
292                    None
293                };
294
295                entries.push(EntryView {
296                    slot: i as usize,
297                    entry,
298                    validate_ok,
299                    data_hash_ok,
300                    data_in_bounds,
301                });
302            }
303
304            // Verify the table hash over [header-with-zeroed-hash || entries].
305            let table_hash_ok = if !verify || !bh.table_hash_algo.verifies() || !all_entries_parsed
306            {
307                None
308            } else {
309                let parsed: Vec<PartitionEntry> = entries.iter().map(|e| e.entry.clone()).collect();
310                let computed =
311                    compute_table_hash(bh.table_hash_algo, bh.next_table_offset, &parsed);
312                let n = bh.table_hash_algo.digest_len();
313                let ok = computed[..n] == bh.table_hash[..n];
314                if !ok {
315                    diagnostics.push(Diagnostic::error(
316                        DiagKind::TableHashMismatch { block_index: index },
317                        format!("table hash mismatch for block {index} at {off:#x}"),
318                    ));
319                }
320                Some(ok)
321            };
322
323            let next = bh.next_table_offset;
324            if next != 0 && next <= off {
325                diagnostics.push(Diagnostic::info(
326                    DiagKind::BackwardChainLink {
327                        from: off,
328                        to: next,
329                    },
330                    format!("block {index} links backward: {off:#x} -> {next:#x}"),
331                ));
332            }
333
334            blocks.push(BlockView {
335                index,
336                offset: off,
337                header: bh.clone(),
338                entries,
339                next_offset: next,
340                stored_table_hash: bh.table_hash,
341                table_hash_ok,
342            });
343
344            off = next;
345            index += 1;
346        }
347    }
348
349    Walk {
350        file_len,
351        header,
352        blocks,
353        diagnostics,
354    }
355}
356
357/// Convenience: a flat copy of every parsed entry, in chain order.
358pub fn flat_entries(walk: &Walk) -> Vec<&EntryView> {
359    walk.blocks.iter().flat_map(|b| b.entries.iter()).collect()
360}
361
362/// Look up a hash algorithm's display name without exposing internals.
363pub fn algo_name(algo: HashAlgo) -> &'static str {
364    match algo.id() {
365        0 => "none",
366        1 => "crc32",
367        2 => "crc32c",
368        3 => "crc64",
369        4 => "md5",
370        5 => "sha1",
371        16 => "sha256",
372        17 => "sha512",
373        18 => "blake3",
374        _ => "unknown",
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381    use std::io::Cursor;
382
383    #[test]
384    fn walk_resolves_trailer_mode_file() {
385        // Build a trailer-mode PCF file and confirm the walk follows the trailer
386        // to the head, finds the block, and emits the informational diagnostic.
387        let bytes = {
388            let mut c = pcf::Container::create(Cursor::new(Vec::new())).unwrap();
389            c.add_partition(1, [1u8; 16], "p", b"hi", 0, HashAlgo::Sha256)
390                .unwrap();
391            c.finalize_with_trailer().unwrap();
392            c.into_storage().into_inner()
393        };
394        let w = walk(&bytes, true);
395        assert_eq!(w.blocks.len(), 1);
396        assert_eq!(w.blocks[0].entries.len(), 1);
397        assert!(w
398            .diagnostics
399            .iter()
400            .any(|d| matches!(d.kind, DiagKind::TrailerResolved { .. })));
401    }
402
403    #[test]
404    fn walk_reports_missing_trailer() {
405        // Header carries the sentinel but there is no valid trailer.
406        let mut bytes = {
407            let mut c = pcf::Container::create(Cursor::new(Vec::new())).unwrap();
408            c.add_partition(1, [1u8; 16], "p", b"x", 0, HashAlgo::Sha256)
409                .unwrap();
410            c.into_storage().into_inner()
411        };
412        bytes[12..20].copy_from_slice(&PT_OFFSET_TRAILER.to_le_bytes());
413        let w = walk(&bytes, true);
414        assert!(w.blocks.is_empty());
415        assert!(w.diagnostics.iter().any(|d| d.message.contains("trailer")));
416    }
417}