Skip to main content

pcf_debug/model/
walk.rs

1//! A defensive, read-only walk of a PCF file's physical structure.
2//!
3//! Unlike [`pcf::Container`], which flattens the table-block chain into a single
4//! list of entries and hides per-block layout, this walk preserves the chain:
5//! every block's offset, header, entry array, chain link, and stored-vs-computed
6//! table hash. It reuses the crate's public byte parsers (`*::from_bytes`) so it
7//! never re-implements the format, but it tolerates corruption that
8//! `Container::open` would reject.
9
10use pcf::{
11    compute_table_hash, FileHeader, HashAlgo, PartitionEntry, TableBlockHeader, ENTRY_SIZE,
12    HEADER_SIZE, TABLE_HEADER_SIZE,
13};
14
15use super::diag::{DiagKind, Diagnostic};
16
17/// Maximum number of table blocks we will follow before giving up.
18const MAX_BLOCKS: usize = 4096;
19
20/// One partition entry as physically found in a block.
21#[derive(Debug, Clone)]
22pub struct EntryView {
23    pub slot: usize,
24    pub entry: PartitionEntry,
25    /// `Ok` if [`PartitionEntry::validate`] passed, else the stringified reason.
26    pub validate_ok: Result<(), String>,
27    /// `Some(true/false)` when the data region is readable and the algorithm
28    /// verifies; `None` when unreadable or the algorithm is `None`.
29    pub data_hash_ok: Option<bool>,
30    /// Whether `[start_offset, start_offset + used_bytes)` lies within the file.
31    pub data_in_bounds: bool,
32}
33
34/// One table block as physically found in the chain.
35#[derive(Debug, Clone)]
36pub struct BlockView {
37    pub index: usize,
38    pub offset: u64,
39    pub header: TableBlockHeader,
40    pub entries: Vec<EntryView>,
41    pub next_offset: u64,
42    pub stored_table_hash: [u8; 64],
43    /// `Some(true/false)` when recomputable; `None` if the algorithm is `None`
44    /// or the block's entries could not all be parsed.
45    pub table_hash_ok: Option<bool>,
46}
47
48/// The result of walking a file.
49#[derive(Debug, Clone)]
50pub struct Walk {
51    pub file_len: u64,
52    pub header: Option<FileHeader>,
53    pub blocks: Vec<BlockView>,
54    pub diagnostics: Vec<Diagnostic>,
55}
56
57fn read_array<const N: usize>(data: &[u8], off: usize) -> Option<[u8; N]> {
58    data.get(off..off + N)?.try_into().ok()
59}
60
61/// Walk `data` (the whole file loaded into memory) and build a structural model.
62///
63/// When `verify` is false, data and table hashes are not computed (a fast path
64/// for very large files); the corresponding `*_hash_ok` fields stay `None`.
65pub fn walk(data: &[u8], verify: bool) -> Walk {
66    let file_len = data.len() as u64;
67    let mut diagnostics = Vec::new();
68
69    // ---- header ----------------------------------------------------------
70    let header = match read_array::<{ HEADER_SIZE as usize }>(data, 0) {
71        Some(buf) => match FileHeader::from_bytes(&buf) {
72            Ok(h) => Some(h),
73            Err(e) => {
74                diagnostics.push(Diagnostic::error(
75                    DiagKind::BadHeader {
76                        reason: format!("{e:?}"),
77                    },
78                    format!("file header is invalid: {e:?}"),
79                ));
80                None
81            }
82        },
83        None => {
84            diagnostics.push(Diagnostic::error(
85                DiagKind::BadHeader {
86                    reason: "file shorter than 20-byte header".into(),
87                },
88                format!("file is only {file_len} bytes; a PCF header needs {HEADER_SIZE}"),
89            ));
90            None
91        }
92    };
93
94    // ---- block chain -----------------------------------------------------
95    let mut blocks = Vec::new();
96    let mut visited: Vec<u64> = Vec::new();
97    if let Some(h) = header {
98        let mut off = h.partition_table_offset;
99        let mut index = 0usize;
100        while off != 0 {
101            if blocks.len() >= MAX_BLOCKS {
102                diagnostics.push(Diagnostic::error(
103                    DiagKind::ChainCycle { at_offset: off },
104                    format!("chain exceeds {MAX_BLOCKS} blocks; stopping (possible cycle)"),
105                ));
106                break;
107            }
108            if visited.contains(&off) {
109                diagnostics.push(Diagnostic::error(
110                    DiagKind::ChainCycle { at_offset: off },
111                    format!("table-block chain cycles back to offset {off:#x}"),
112                ));
113                break;
114            }
115            visited.push(off);
116
117            let hdr_buf = match read_array::<{ TABLE_HEADER_SIZE as usize }>(data, off as usize) {
118                Some(b) => b,
119                None => {
120                    diagnostics.push(Diagnostic::error(
121                        DiagKind::Truncated {
122                            start: off,
123                            want: off + TABLE_HEADER_SIZE,
124                            have: file_len,
125                        },
126                        format!("table block at {off:#x} runs past end of file"),
127                    ));
128                    break;
129                }
130            };
131            let bh = match TableBlockHeader::from_bytes(&hdr_buf) {
132                Ok(bh) => bh,
133                Err(e) => {
134                    diagnostics.push(Diagnostic::error(
135                        DiagKind::BadBlock {
136                            offset: off,
137                            reason: format!("{e:?}"),
138                        },
139                        format!("table block header at {off:#x} is invalid: {e:?}"),
140                    ));
141                    break;
142                }
143            };
144
145            // Parse the entries that follow the header.
146            let mut entries = Vec::with_capacity(bh.partition_count as usize);
147            let mut all_entries_parsed = true;
148            for i in 0..bh.partition_count as u64 {
149                let eoff = off + TABLE_HEADER_SIZE + i * ENTRY_SIZE;
150                let ebuf = match read_array::<{ ENTRY_SIZE as usize }>(data, eoff as usize) {
151                    Some(b) => b,
152                    None => {
153                        all_entries_parsed = false;
154                        diagnostics.push(Diagnostic::error(
155                            DiagKind::Truncated {
156                                start: eoff,
157                                want: eoff + ENTRY_SIZE,
158                                have: file_len,
159                            },
160                            format!(
161                                "entry {i} of block {index} at {eoff:#x} runs past end of file"
162                            ),
163                        ));
164                        break;
165                    }
166                };
167                let entry = match PartitionEntry::from_bytes(&ebuf) {
168                    Ok(e) => e,
169                    Err(e) => {
170                        all_entries_parsed = false;
171                        diagnostics.push(Diagnostic::warn(
172                            DiagKind::BadBlock {
173                                offset: eoff,
174                                reason: format!("{e:?}"),
175                            },
176                            format!("entry {i} of block {index} could not be parsed: {e:?}"),
177                        ));
178                        break;
179                    }
180                };
181
182                let validate_ok = entry.validate().map_err(|e| format!("{e:?}"));
183                if let Err(reason) = &validate_ok {
184                    diagnostics.push(Diagnostic::warn(
185                        DiagKind::EntryInvalid {
186                            uid: entry.uid,
187                            reason: reason.clone(),
188                        },
189                        format!(
190                            "entry '{}' fails conformance: {reason}",
191                            entry.label_string().unwrap_or_default()
192                        ),
193                    ));
194                }
195
196                let start = entry.start_offset;
197                let used = entry.used_bytes;
198                let data_in_bounds = start
199                    .checked_add(used)
200                    .map(|e| e <= file_len)
201                    .unwrap_or(false);
202                if used > 0 && !data_in_bounds {
203                    diagnostics.push(Diagnostic::error(
204                        DiagKind::Truncated {
205                            start,
206                            want: start.saturating_add(used),
207                            have: file_len,
208                        },
209                        format!(
210                            "data of partition '{}' at {start:#x} runs past end of file",
211                            entry.label_string().unwrap_or_default()
212                        ),
213                    ));
214                }
215                let data_hash_ok = if !verify || !entry.data_hash_algo.verifies() {
216                    None
217                } else if data_in_bounds {
218                    let slice = &data[start as usize..(start + used) as usize];
219                    let ok = entry.data_hash_algo.verify(slice, &entry.data_hash);
220                    if !ok {
221                        diagnostics.push(Diagnostic::error(
222                            DiagKind::DataHashMismatch { uid: entry.uid },
223                            format!(
224                                "data hash mismatch for partition '{}'",
225                                entry.label_string().unwrap_or_default()
226                            ),
227                        ));
228                    }
229                    Some(ok)
230                } else {
231                    None
232                };
233
234                entries.push(EntryView {
235                    slot: i as usize,
236                    entry,
237                    validate_ok,
238                    data_hash_ok,
239                    data_in_bounds,
240                });
241            }
242
243            // Verify the table hash over [header-with-zeroed-hash || entries].
244            let table_hash_ok = if !verify || !bh.table_hash_algo.verifies() || !all_entries_parsed
245            {
246                None
247            } else {
248                let parsed: Vec<PartitionEntry> = entries.iter().map(|e| e.entry.clone()).collect();
249                let computed =
250                    compute_table_hash(bh.table_hash_algo, bh.next_table_offset, &parsed);
251                let n = bh.table_hash_algo.digest_len();
252                let ok = computed[..n] == bh.table_hash[..n];
253                if !ok {
254                    diagnostics.push(Diagnostic::error(
255                        DiagKind::TableHashMismatch { block_index: index },
256                        format!("table hash mismatch for block {index} at {off:#x}"),
257                    ));
258                }
259                Some(ok)
260            };
261
262            let next = bh.next_table_offset;
263            if next != 0 && next <= off {
264                diagnostics.push(Diagnostic::info(
265                    DiagKind::BackwardChainLink {
266                        from: off,
267                        to: next,
268                    },
269                    format!("block {index} links backward: {off:#x} -> {next:#x}"),
270                ));
271            }
272
273            blocks.push(BlockView {
274                index,
275                offset: off,
276                header: bh.clone(),
277                entries,
278                next_offset: next,
279                stored_table_hash: bh.table_hash,
280                table_hash_ok,
281            });
282
283            off = next;
284            index += 1;
285        }
286    }
287
288    Walk {
289        file_len,
290        header,
291        blocks,
292        diagnostics,
293    }
294}
295
296/// Convenience: a flat copy of every parsed entry, in chain order.
297pub fn flat_entries(walk: &Walk) -> Vec<&EntryView> {
298    walk.blocks.iter().flat_map(|b| b.entries.iter()).collect()
299}
300
301/// Look up a hash algorithm's display name without exposing internals.
302pub fn algo_name(algo: HashAlgo) -> &'static str {
303    match algo.id() {
304        0 => "none",
305        1 => "crc32",
306        2 => "crc32c",
307        3 => "crc64",
308        4 => "md5",
309        5 => "sha1",
310        16 => "sha256",
311        17 => "sha512",
312        18 => "blake3",
313        _ => "unknown",
314    }
315}