Skip to main content

lamfold_squash/
squash.rs

1//! SquashFS 4.0 reader, clean-roomed from the public on-disk format docs.
2//!
3//! Layout: a 96-byte superblock; inodes and directory listings packed into
4//! compressed 8 KiB "metadata blocks" (a 2-byte header per block, top bit =
5//! uncompressed); file data in `block_size` data blocks plus a shared fragment
6//! for tails. All decompression goes through the shared substrate codec.
7
8use alloc::collections::BTreeMap;
9use alloc::string::String;
10use alloc::vec;
11use alloc::vec::Vec;
12
13use lamfold::{
14    checked_full_read_len, decode, BlockSource, Codec, DirEntry, FileKind, FoldError, FoldFrontend,
15    Metadata, NodeId, Result, SubstrateCtx,
16};
17
18const MAGIC: u32 = 0x7371_7368; // "hsqs"
19const META_MAX: usize = 8192; // max uncompressed metadata block
20const NO_FRAGMENT: u32 = 0xFFFF_FFFF;
21const SIZE_UNCOMPRESSED: u32 = 1 << 24; // block-size flag: stored uncompressed
22const SIZE_MASK: u32 = 0x00FF_FFFF;
23
24#[derive(Clone)]
25enum Body {
26    Dir {
27        abs_block: u64,
28        offset: u16,
29        size: u32,
30    },
31    File {
32        blocks_start: u64,
33        block_sizes: Vec<u32>,
34        frag_idx: u32,
35        frag_off: u32,
36    },
37    Symlink(Vec<u8>),
38    Other,
39}
40
41#[derive(Clone)]
42struct SquashInode {
43    kind: FileKind,
44    size: u64,
45    body: Body,
46}
47
48/// A mounted SquashFS volume.
49pub struct SquashFs<S: BlockSource> {
50    src: S,
51    codec: Codec,
52    block_size: u32,
53    inode_table_start: u64,
54    directory_table_start: u64,
55    fragment_table_start: u64,
56    nodes: Vec<SquashInode>,
57    by_ref: BTreeMap<u64, NodeId>,
58}
59
60impl<S: BlockSource> SquashFs<S> {
61    /// Decompress one metadata block at absolute offset `at`; returns its bytes
62    /// and the offset of the following block.
63    fn read_meta_block(&mut self, at: u64) -> Result<(Vec<u8>, u64)> {
64        let mut hdr = [0u8; 2];
65        self.src.read_at(at, &mut hdr)?;
66        let h = u16::from_le_bytes(hdr);
67        let uncompressed = h & 0x8000 != 0;
68        let len = usize::from(h & 0x7FFF);
69        let mut raw = vec![0u8; len];
70        self.src.read_at(at + 2, &mut raw)?;
71        let data = if uncompressed {
72            raw
73        } else {
74            decode(self.codec, &raw, META_MAX)?
75        };
76        Ok((data, at + 2 + len as u64))
77    }
78
79    /// Read `length` bytes starting at `offset` within the metadata block at
80    /// absolute offset `abs_block`, spanning into following blocks as needed.
81    fn read_meta_span(&mut self, abs_block: u64, offset: usize, length: usize) -> Result<Vec<u8>> {
82        let _ = checked_full_read_len(length as u64)?;
83        let mut out = Vec::with_capacity(length);
84        let (mut block, mut next) = self.read_meta_block(abs_block)?;
85        let mut pos = offset;
86        while out.len() < length {
87            if pos >= block.len() {
88                let (b, n) = self.read_meta_block(next)?;
89                block = b;
90                next = n;
91                pos = 0;
92                if block.is_empty() {
93                    break;
94                }
95            }
96            let take = core::cmp::min(length - out.len(), block.len() - pos);
97            out.extend_from_slice(&block[pos..pos + take]);
98            pos += take;
99        }
100        Ok(out)
101    }
102
103    fn block_count(&self, size: u64, frag_idx: u32) -> usize {
104        let bs = u64::from(self.block_size);
105        if frag_idx != NO_FRAGMENT {
106            (size / bs) as usize
107        } else {
108            size.div_ceil(bs) as usize
109        }
110    }
111
112    /// Parse the inode at the given inode reference `(block << 16) | offset`.
113    fn parse_inode(&mut self, inode_ref: u64) -> Result<SquashInode> {
114        let abs = self.inode_table_start + (inode_ref >> 16);
115        let off = (inode_ref & 0xFFFF) as usize;
116        let itype = le_u16(&self.read_meta_span(abs, off, 16)?, 0)?;
117        match itype {
118            1 => {
119                let b = self.read_meta_span(abs, off, 32)?;
120                Ok(SquashInode {
121                    kind: FileKind::Directory,
122                    size: u64::from(le_u16(&b, 24)?),
123                    body: Body::Dir {
124                        abs_block: self.directory_table_start + u64::from(le_u32(&b, 16)?),
125                        offset: le_u16(&b, 26)?,
126                        size: u32::from(le_u16(&b, 24)?),
127                    },
128                })
129            }
130            8 => {
131                let b = self.read_meta_span(abs, off, 40)?;
132                let size = le_u32(&b, 20)?;
133                Ok(SquashInode {
134                    kind: FileKind::Directory,
135                    size: u64::from(size),
136                    body: Body::Dir {
137                        abs_block: self.directory_table_start + u64::from(le_u32(&b, 24)?),
138                        offset: le_u16(&b, 34)?,
139                        size,
140                    },
141                })
142            }
143            2 => {
144                let h = self.read_meta_span(abs, off, 32)?;
145                let blocks_start = u64::from(le_u32(&h, 16)?);
146                let frag_idx = le_u32(&h, 20)?;
147                let frag_off = le_u32(&h, 24)?;
148                let size = u64::from(le_u32(&h, 28)?);
149                let n = self.block_count(size, frag_idx);
150                let full = self.read_meta_span(abs, off, 32 + n * 4)?;
151                let block_sizes = (0..n)
152                    .map(|i| le_u32(&full, 32 + i * 4))
153                    .collect::<Result<_>>()?;
154                Ok(SquashInode {
155                    kind: FileKind::Regular,
156                    size,
157                    body: Body::File {
158                        blocks_start,
159                        block_sizes,
160                        frag_idx,
161                        frag_off,
162                    },
163                })
164            }
165            9 => {
166                let h = self.read_meta_span(abs, off, 56)?;
167                let blocks_start = le_u64(&h, 16)?;
168                let size = le_u64(&h, 24)?;
169                let frag_idx = le_u32(&h, 44)?;
170                let frag_off = le_u32(&h, 48)?;
171                let n = self.block_count(size, frag_idx);
172                let full = self.read_meta_span(abs, off, 56 + n * 4)?;
173                let block_sizes = (0..n)
174                    .map(|i| le_u32(&full, 56 + i * 4))
175                    .collect::<Result<_>>()?;
176                Ok(SquashInode {
177                    kind: FileKind::Regular,
178                    size,
179                    body: Body::File {
180                        blocks_start,
181                        block_sizes,
182                        frag_idx,
183                        frag_off,
184                    },
185                })
186            }
187            3 | 10 => {
188                let h = self.read_meta_span(abs, off, 24)?;
189                let target_size = checked_full_read_len(u64::from(le_u32(&h, 20)?))?;
190                let full = self.read_meta_span(abs, off, 24 + target_size)?;
191                let target = full
192                    .get(24..24 + target_size)
193                    .ok_or(FoldError::Corrupt("squashfs: symlink target OOB"))?
194                    .to_vec();
195                Ok(SquashInode {
196                    kind: FileKind::Symlink,
197                    size: target_size as u64,
198                    body: Body::Symlink(target),
199                })
200            }
201            _ => Ok(SquashInode {
202                kind: FileKind::Other,
203                size: 0,
204                body: Body::Other,
205            }),
206        }
207    }
208
209    fn intern(&mut self, inode_ref: u64, inode: SquashInode) -> NodeId {
210        if let Some(&id) = self.by_ref.get(&inode_ref) {
211            return id;
212        }
213        let id = self.nodes.len() as NodeId;
214        self.nodes.push(inode);
215        self.by_ref.insert(inode_ref, id);
216        id
217    }
218
219    fn inode(&self, node: NodeId) -> Result<SquashInode> {
220        self.nodes
221            .get(node as usize)
222            .cloned()
223            .ok_or(FoldError::NotFound)
224    }
225
226    /// Parse a directory listing into (name, child inode reference) pairs.
227    fn dir_entries(
228        &mut self,
229        abs_block: u64,
230        offset: u16,
231        size: u32,
232    ) -> Result<Vec<(String, u64)>> {
233        // `size` includes a 3-byte bias; a listing of <= 3 is empty.
234        let listing = self.read_meta_span(
235            abs_block,
236            usize::from(offset),
237            (size as usize).saturating_sub(3),
238        )?;
239        let mut out = Vec::new();
240        let mut p = 0;
241        while p + 12 <= listing.len() {
242            let count = le_u32(&listing, p)? as usize; // entries - 1
243            let start = u64::from(le_u32(&listing, p + 4)?); // inode metadata block
244            p += 12;
245            for _ in 0..count.saturating_add(1) {
246                if p + 8 > listing.len() {
247                    break;
248                }
249                let eoff = le_u16(&listing, p)?;
250                let nlen = usize::from(le_u16(&listing, p + 6)?) + 1;
251                let name = listing
252                    .get(p + 8..p + 8 + nlen)
253                    .ok_or(FoldError::Corrupt("squashfs: dir entry name OOB"))?;
254                out.push((
255                    String::from_utf8_lossy(name).into_owned(),
256                    (start << 16) | u64::from(eoff),
257                ));
258                p += 8 + nlen;
259            }
260        }
261        Ok(out)
262    }
263
264    /// Read a fragment block (the shared tail block) and return its decompressed
265    /// bytes.
266    fn read_fragment(&mut self, idx: u32) -> Result<Vec<u8>> {
267        // The fragment table is an array of u64 pointers to metadata blocks of
268        // 16-byte fragment entries (512 per 8 KiB block).
269        let meta_blk = u64::from(idx / 512);
270        let in_block = (idx % 512) as usize;
271        let mut ptr = [0u8; 8];
272        self.src
273            .read_at(self.fragment_table_start + meta_blk * 8, &mut ptr)?;
274        let entry_block = u64::from_le_bytes(ptr);
275        let fe = self.read_meta_span(entry_block, in_block * 16, 16)?;
276        let start = le_u64(&fe, 0)?;
277        let raw_size = le_u32(&fe, 8)?;
278        self.read_data_block(start, raw_size)
279    }
280
281    /// Read one on-disk data/fragment block and decompress it if needed.
282    fn read_data_block(&mut self, at: u64, raw_size: u32) -> Result<Vec<u8>> {
283        let len = (raw_size & SIZE_MASK) as usize;
284        if len == 0 {
285            return Ok(Vec::new()); // sparse
286        }
287        let mut raw = vec![0u8; len];
288        self.src.read_at(at, &mut raw)?;
289        if raw_size & SIZE_UNCOMPRESSED != 0 {
290            Ok(raw)
291        } else {
292            decode(self.codec, &raw, self.block_size as usize)
293        }
294    }
295
296    /// Read a whole file's bytes (data blocks + fragment tail), read-capped.
297    fn read_file(
298        &mut self,
299        size: u64,
300        blocks_start: u64,
301        block_sizes: &[u32],
302        frag_idx: u32,
303        frag_off: u32,
304    ) -> Result<Vec<u8>> {
305        let total = checked_full_read_len(size)?;
306        let mut out = Vec::with_capacity(total);
307        let mut at = blocks_start;
308        for &bs in block_sizes {
309            if out.len() >= total {
310                break;
311            }
312            let len = (bs & SIZE_MASK) as usize;
313            if len == 0 {
314                // sparse full block → zero-fill up to one block (or the remainder)
315                let n = core::cmp::min(self.block_size as usize, total - out.len());
316                out.resize(out.len() + n, 0);
317            } else {
318                let block = self.read_data_block(at, bs)?;
319                at += len as u64;
320                out.extend_from_slice(&block);
321            }
322        }
323        if frag_idx != NO_FRAGMENT && out.len() < total {
324            let frag = self.read_fragment(frag_idx)?;
325            let start = frag_off as usize;
326            let take = total - out.len();
327            let tail = frag
328                .get(start..start + take)
329                .ok_or(FoldError::Corrupt("squashfs: fragment tail OOB"))?;
330            out.extend_from_slice(tail);
331        }
332        out.truncate(total);
333        Ok(out)
334    }
335}
336
337impl<S: BlockSource> FoldFrontend<S> for SquashFs<S> {
338    const TAG: &'static str = "squashfs";
339
340    fn probe(src: &mut S) -> Result<bool> {
341        if src.len() < 4 {
342            return Ok(false);
343        }
344        let mut m = [0u8; 4];
345        src.read_at(0, &mut m)?;
346        Ok(u32::from_le_bytes(m) == MAGIC)
347    }
348
349    fn open(src: S, _cx: &mut SubstrateCtx<'_>) -> Result<Self> {
350        let mut sb = [0u8; 96];
351        if src.len() < 96 {
352            return Err(FoldError::Corrupt(
353                "squashfs: source shorter than superblock",
354            ));
355        }
356        let mut src = src;
357        src.read_at(0, &mut sb)?;
358        if le_u32(&sb, 0)? != MAGIC {
359            return Err(FoldError::Corrupt("squashfs: bad magic"));
360        }
361        let codec = match le_u16(&sb, 20)? {
362            1 => Codec::Zlib,
363            3 => Codec::Lzo,
364            4 => Codec::Xz,
365            5 => Codec::Lz4,
366            6 => Codec::Zstd,
367            2 => return Err(FoldError::Unsupported("squashfs: legacy lzma1 compression")),
368            _ => return Err(FoldError::Unsupported("squashfs: unknown compression id")),
369        };
370        let block_size = le_u32(&sb, 12)?;
371        if block_size == 0 || block_size > (1 << 20) {
372            return Err(FoldError::Corrupt("squashfs: implausible block size"));
373        }
374        let root_ref = le_u64(&sb, 32)?;
375        let mut me = SquashFs {
376            src,
377            codec,
378            block_size,
379            inode_table_start: le_u64(&sb, 64)?,
380            directory_table_start: le_u64(&sb, 72)?,
381            fragment_table_start: le_u64(&sb, 80)?,
382            nodes: Vec::new(),
383            by_ref: BTreeMap::new(),
384        };
385        let root = me.parse_inode(root_ref)?;
386        me.intern(root_ref, root); // node 0
387        Ok(me)
388    }
389
390    fn root(&self) -> NodeId {
391        0
392    }
393
394    fn lookup(
395        &mut self,
396        dir: NodeId,
397        name: &str,
398        cx: &mut SubstrateCtx<'_>,
399    ) -> Result<Option<NodeId>> {
400        Ok(self
401            .read_dir(dir, cx)?
402            .into_iter()
403            .find(|e| e.name == name)
404            .map(|e| e.node))
405    }
406
407    fn read_dir(&mut self, dir: NodeId, _cx: &mut SubstrateCtx<'_>) -> Result<Vec<DirEntry>> {
408        let inode = self.inode(dir)?;
409        let Body::Dir {
410            abs_block,
411            offset,
412            size,
413        } = inode.body
414        else {
415            return Err(FoldError::NotDirectory);
416        };
417        let entries = self.dir_entries(abs_block, offset, size)?;
418        let mut out = Vec::with_capacity(entries.len());
419        for (name, child_ref) in entries {
420            let child = self.parse_inode(child_ref)?;
421            let kind = child.kind;
422            let node = self.intern(child_ref, child);
423            out.push(DirEntry { name, node, kind });
424        }
425        Ok(out)
426    }
427
428    fn metadata(&mut self, node: NodeId, _cx: &mut SubstrateCtx<'_>) -> Result<Metadata> {
429        let inode = self.inode(node)?;
430        Ok(Metadata {
431            kind: inode.kind,
432            size: inode.size,
433            mode: 0,
434        })
435    }
436
437    fn read_at(
438        &mut self,
439        node: NodeId,
440        off: u64,
441        buf: &mut [u8],
442        _cx: &mut SubstrateCtx<'_>,
443    ) -> Result<usize> {
444        let inode = self.inode(node)?;
445        let Body::File {
446            blocks_start,
447            block_sizes,
448            frag_idx,
449            frag_off,
450        } = &inode.body
451        else {
452            return Err(if inode.kind == FileKind::Directory {
453                FoldError::IsDirectory
454            } else {
455                FoldError::Unsupported("squashfs: read of a non-regular inode")
456            });
457        };
458        if off >= inode.size {
459            return Ok(0);
460        }
461        // Read the whole file then slice. Streaming per-block is a follow-up.
462        let data = self.read_file(inode.size, *blocks_start, block_sizes, *frag_idx, *frag_off)?;
463        let start = off as usize;
464        let n = core::cmp::min(buf.len(), data.len().saturating_sub(start));
465        buf[..n].copy_from_slice(&data[start..start + n]);
466        Ok(n)
467    }
468
469    fn read_link(&mut self, node: NodeId, _cx: &mut SubstrateCtx<'_>) -> Result<Option<Vec<u8>>> {
470        match self.inode(node)?.body {
471            Body::Symlink(t) => Ok(Some(t)),
472            _ => Ok(None),
473        }
474    }
475}
476
477fn le_u16(b: &[u8], o: usize) -> Result<u16> {
478    b.get(o..o + 2)
479        .and_then(|s| s.try_into().ok())
480        .map(u16::from_le_bytes)
481        .ok_or(FoldError::Corrupt("squashfs: truncated u16"))
482}
483fn le_u32(b: &[u8], o: usize) -> Result<u32> {
484    b.get(o..o + 4)
485        .and_then(|s| s.try_into().ok())
486        .map(u32::from_le_bytes)
487        .ok_or(FoldError::Corrupt("squashfs: truncated u32"))
488}
489fn le_u64(b: &[u8], o: usize) -> Result<u64> {
490    b.get(o..o + 8)
491        .and_then(|s| s.try_into().ok())
492        .map(u64::from_le_bytes)
493        .ok_or(FoldError::Corrupt("squashfs: truncated u64"))
494}