Skip to main content

lamfold_udf/
udf.rs

1//! UDF 1.02 reader (ECMA-167 / OSTA UDF), clean-roomed from the free specs.
2//!
3//! Descriptor chain: Anchor VD Pointer (sector 256) → Main Volume Descriptor
4//! Sequence (Partition Descriptor → partition start; Logical Volume Descriptor →
5//! block size + the File Set Descriptor long_ad) → File Set Descriptor → root
6//! ICB. Inodes are File Entries (tag 261); directory entries are File Identifier
7//! Descriptors; file/dir data is inline, short_ad, or long_ad.
8//!
9//! No `unsafe`: every on-disk field is read through bounds-checked little-endian
10//! helpers; every allocation goes through the substrate read cap.
11
12use alloc::collections::BTreeMap;
13use alloc::string::String;
14use alloc::vec;
15use alloc::vec::Vec;
16
17use lamfold::{
18    checked_full_read_len, BlockSource, DirEntry, FileKind, FoldError, FoldFrontend, Metadata,
19    NodeId, Result, SubstrateCtx,
20};
21
22/// UDF logical sector size (the AVDP lives at "sector 256" = this × 256).
23const SECTOR: u64 = 2048;
24const AVDP_SECTOR: u32 = 256;
25/// Descriptor tag identifiers (ECMA-167 §3 / §4).
26const TAG_AVDP: u16 = 2;
27const TAG_PARTITION: u16 = 5;
28const TAG_LOGICAL_VOLUME: u16 = 6;
29const TAG_TERMINATING: u16 = 8;
30const TAG_FILE_SET: u16 = 256;
31const TAG_FID: u16 = 257;
32const TAG_FILE_ENTRY: u16 = 261;
33const TAG_EXTENDED_FILE_ENTRY: u16 = 266;
34/// ICBTag FileType values.
35const FT_DIRECTORY: u8 = 4;
36const FT_REGULAR: u8 = 5;
37const FT_SYMLINK: u8 = 12;
38const MAX_VDS_SECTORS: u32 = 64;
39
40#[derive(Clone, Copy)]
41struct Extent {
42    /// Absolute LBA, or `None` for an unrecorded/sparse extent (reads as zeros).
43    lba: Option<u32>,
44    len: u32,
45}
46
47#[derive(Clone)]
48enum FileData {
49    Inline(Vec<u8>),
50    Extents(Vec<Extent>),
51}
52
53#[derive(Clone)]
54struct UdfInode {
55    kind: FileKind,
56    size: u64,
57    data: FileData,
58}
59
60/// A mounted UDF volume.
61pub struct Udf<S: BlockSource> {
62    src: S,
63    block_size: u32,
64    partition_start: u32,
65    nodes: Vec<UdfInode>,
66    by_lba: BTreeMap<u32, NodeId>,
67}
68
69impl<S: BlockSource> Udf<S> {
70    fn read_block(&mut self, lba: u32) -> Result<Vec<u8>> {
71        let off = u64::from(lba) * u64::from(self.block_size);
72        if off + u64::from(self.block_size) > self.src.len() {
73            return Err(FoldError::Corrupt("udf: block past end of source"));
74        }
75        let mut b = vec![0u8; self.block_size as usize];
76        self.src.read_at(off, &mut b)?;
77        Ok(b)
78    }
79
80    fn intern(&mut self, fe_lba: u32, inode: UdfInode) -> NodeId {
81        if let Some(&id) = self.by_lba.get(&fe_lba) {
82            return id;
83        }
84        let id = self.nodes.len() as NodeId;
85        self.nodes.push(inode);
86        self.by_lba.insert(fe_lba, id);
87        id
88    }
89
90    fn inode(&self, node: NodeId) -> Result<UdfInode> {
91        self.nodes
92            .get(node as usize)
93            .cloned()
94            .ok_or(FoldError::NotFound)
95    }
96
97    /// Parse a File Entry (tag 261) at `fe_lba` into an inode (kind + size + the
98    /// resolved data location).
99    fn read_fe(&mut self, fe_lba: u32) -> Result<UdfInode> {
100        let buf = self.read_block(fe_lba)?;
101        match le_u16(&buf, 0)? {
102            TAG_EXTENDED_FILE_ENTRY => {
103                return Err(FoldError::Unsupported(
104                    "udf: Extended File Entry (UDF 2.x) not yet supported",
105                ))
106            }
107            TAG_FILE_ENTRY => {}
108            _ => return Err(FoldError::Corrupt("udf: expected a File Entry")),
109        }
110        let file_type = *buf.get(27).ok_or(FoldError::Corrupt("udf: short ICBTag"))?;
111        let ad_type = le_u16(&buf, 34)? & 0x7;
112        let info_len = le_u64(&buf, 56)?;
113        let l_ea = le_u32(&buf, 168)? as usize;
114        let l_ad = le_u32(&buf, 172)? as usize;
115        let ad_off = 176usize
116            .checked_add(l_ea)
117            .ok_or(FoldError::Corrupt("udf: L_EA overflow"))?;
118
119        let kind = match file_type {
120            FT_DIRECTORY => FileKind::Directory,
121            FT_REGULAR => FileKind::Regular,
122            FT_SYMLINK => FileKind::Symlink,
123            _ => FileKind::Other,
124        };
125
126        let data = match ad_type {
127            3 => {
128                // Inline: the data is the AD area itself.
129                let n = checked_full_read_len(info_len)?;
130                let end = ad_off
131                    .checked_add(n)
132                    .ok_or(FoldError::Corrupt("udf: inline overflow"))?;
133                let bytes = buf
134                    .get(ad_off..end)
135                    .ok_or(FoldError::Corrupt("udf: inline data out of bounds"))?;
136                FileData::Inline(bytes.to_vec())
137            }
138            0 => self.parse_extents(&buf, ad_off, l_ad, 8)?,
139            1 => self.parse_extents(&buf, ad_off, l_ad, 16)?,
140            _ => {
141                return Err(FoldError::Unsupported(
142                    "udf: extended_ad allocation descriptors not supported",
143                ))
144            }
145        };
146        Ok(UdfInode {
147            kind,
148            size: info_len,
149            data,
150        })
151    }
152
153    /// Parse a short_ad (`stride` 8) or long_ad (`stride` 16) list into extents.
154    /// The extent position is partition-relative (short_ad) or carries an LBA +
155    /// partition ref (long_ad); with a single physical partition both resolve to
156    /// `partition_start + block`.
157    fn parse_extents(
158        &self,
159        buf: &[u8],
160        ad_off: usize,
161        l_ad: usize,
162        stride: usize,
163    ) -> Result<FileData> {
164        let ads = buf.get(ad_off..ad_off + l_ad).ok_or(FoldError::Corrupt(
165            "udf: allocation descriptors out of bounds",
166        ))?;
167        let mut ex = Vec::new();
168        for c in ads.chunks_exact(stride) {
169            let raw = u32::from_le_bytes([c[0], c[1], c[2], c[3]]);
170            let len = raw & 0x3FFF_FFFF;
171            let etype = raw >> 30;
172            if len == 0 {
173                continue;
174            }
175            if etype == 3 {
176                return Err(FoldError::Unsupported(
177                    "udf: allocation-extent continuation not supported",
178                ));
179            }
180            // short_ad: position @4; long_ad: logical block @4 (partition ref @8).
181            let block = u32::from_le_bytes([c[4], c[5], c[6], c[7]]);
182            ex.push(Extent {
183                lba: (etype == 0).then_some(self.partition_start + block),
184                len,
185            });
186        }
187        Ok(FileData::Extents(ex))
188    }
189
190    /// Read a whole file/directory's bytes (inline, or by reading its extents),
191    /// capped to the inode's information length.
192    fn read_all(&mut self, inode: &UdfInode) -> Result<Vec<u8>> {
193        match &inode.data {
194            FileData::Inline(b) => Ok(b.clone()),
195            FileData::Extents(ex) => {
196                let total = checked_full_read_len(inode.size)?;
197                let mut out = Vec::with_capacity(total);
198                for e in ex {
199                    if out.len() >= total {
200                        break;
201                    }
202                    let take = core::cmp::min(e.len as usize, total - out.len());
203                    match e.lba {
204                        Some(lba) => {
205                            let mut chunk = vec![0u8; take];
206                            self.src
207                                .read_at(u64::from(lba) * u64::from(self.block_size), &mut chunk)?;
208                            out.extend_from_slice(&chunk);
209                        }
210                        None => out.resize(out.len() + take, 0),
211                    }
212                }
213                out.truncate(total);
214                Ok(out)
215            }
216        }
217    }
218}
219
220impl<S: BlockSource> FoldFrontend<S> for Udf<S> {
221    const TAG: &'static str = "udf";
222
223    fn probe(src: &mut S) -> Result<bool> {
224        let off = u64::from(AVDP_SECTOR) * SECTOR;
225        if src.len() < off + 4 {
226            return Ok(false);
227        }
228        let mut t = [0u8; 4];
229        src.read_at(off, &mut t)?;
230        Ok(u16::from_le_bytes([t[0], t[1]]) == TAG_AVDP)
231    }
232
233    fn open(src: S, _cx: &mut SubstrateCtx<'_>) -> Result<Self> {
234        let mut me = Udf {
235            src,
236            block_size: SECTOR as u32,
237            partition_start: 0,
238            nodes: Vec::new(),
239            by_lba: BTreeMap::new(),
240        };
241
242        // Anchor Volume Descriptor Pointer at sector 256 → Main VDS extent.
243        let avdp = me.read_block(AVDP_SECTOR)?;
244        if le_u16(&avdp, 0)? != TAG_AVDP {
245            return Err(FoldError::Corrupt(
246                "udf: no Anchor VD Pointer at sector 256",
247            ));
248        }
249        let mvds_len = le_u32(&avdp, 16)?;
250        let mvds_loc = le_u32(&avdp, 20)?;
251
252        // Walk the Main VDS for the Partition + Logical Volume descriptors.
253        let mut partition_start = None;
254        let mut lvd = None; // (logical_block_size, fsd_lb)
255        let n_sectors = (mvds_len / SECTOR as u32).min(MAX_VDS_SECTORS);
256        for i in 0..n_sectors {
257            let b = me.read_block(mvds_loc + i)?;
258            match le_u16(&b, 0)? {
259                TAG_PARTITION => partition_start = Some(le_u32(&b, 188)?),
260                TAG_LOGICAL_VOLUME => {
261                    let lbs = le_u32(&b, 212)?;
262                    let fsd_lb = le_u32(&b, 252)?; // LogicalVolumeContentsUse long_ad
263                    lvd = Some((lbs, fsd_lb));
264                }
265                TAG_TERMINATING => break,
266                _ => {}
267            }
268        }
269        let partition_start =
270            partition_start.ok_or(FoldError::Corrupt("udf: no Partition Descriptor"))?;
271        let (lbs, fsd_lb) = lvd.ok_or(FoldError::Corrupt("udf: no Logical Volume Descriptor"))?;
272        me.partition_start = partition_start;
273        if lbs != 0 {
274            me.block_size = lbs;
275        }
276
277        // File Set Descriptor → root directory ICB.
278        let fsd = me.read_block(partition_start + fsd_lb)?;
279        if le_u16(&fsd, 0)? != TAG_FILE_SET {
280            return Err(FoldError::Corrupt("udf: no File Set Descriptor"));
281        }
282        let root_icb_lb = le_u32(&fsd, 404)?; // Root Directory ICB long_ad
283        let root_fe_lba = partition_start + root_icb_lb;
284        let root = me.read_fe(root_fe_lba)?;
285        me.intern(root_fe_lba, root); // node 0
286        Ok(me)
287    }
288
289    fn root(&self) -> NodeId {
290        0
291    }
292
293    fn lookup(
294        &mut self,
295        dir: NodeId,
296        name: &str,
297        cx: &mut SubstrateCtx<'_>,
298    ) -> Result<Option<NodeId>> {
299        Ok(self
300            .read_dir(dir, cx)?
301            .into_iter()
302            .find(|e| e.name == name)
303            .map(|e| e.node))
304    }
305
306    fn read_dir(&mut self, dir: NodeId, _cx: &mut SubstrateCtx<'_>) -> Result<Vec<DirEntry>> {
307        let inode = self.inode(dir)?;
308        if inode.kind != FileKind::Directory {
309            return Err(FoldError::NotDirectory);
310        }
311        let data = self.read_all(&inode)?;
312        let fids = parse_fids(&data, self.partition_start)?;
313        let mut out = Vec::with_capacity(fids.len());
314        for (name, child_fe_lba) in fids {
315            let child = self.read_fe(child_fe_lba)?;
316            let kind = child.kind;
317            let node = self.intern(child_fe_lba, child);
318            out.push(DirEntry { name, node, kind });
319        }
320        Ok(out)
321    }
322
323    fn metadata(&mut self, node: NodeId, _cx: &mut SubstrateCtx<'_>) -> Result<Metadata> {
324        let inode = self.inode(node)?;
325        Ok(Metadata {
326            kind: inode.kind,
327            size: inode.size,
328            mode: 0,
329        })
330    }
331
332    fn read_at(
333        &mut self,
334        node: NodeId,
335        off: u64,
336        buf: &mut [u8],
337        _cx: &mut SubstrateCtx<'_>,
338    ) -> Result<usize> {
339        let inode = self.inode(node)?;
340        if inode.kind == FileKind::Directory {
341            return Err(FoldError::IsDirectory);
342        }
343        if off >= inode.size {
344            return Ok(0);
345        }
346        let want = core::cmp::min(buf.len() as u64, inode.size - off) as usize;
347        match &inode.data {
348            FileData::Inline(b) => {
349                let start = off as usize;
350                let n = core::cmp::min(want, b.len().saturating_sub(start));
351                buf[..n].copy_from_slice(&b[start..start + n]);
352                Ok(n)
353            }
354            FileData::Extents(ex) => {
355                let mut file_pos = 0u64;
356                let mut produced = 0usize;
357                for e in ex {
358                    if produced >= want {
359                        break;
360                    }
361                    let ext_start = file_pos;
362                    let ext_end = file_pos + u64::from(e.len);
363                    file_pos = ext_end;
364                    let cur = off + produced as u64;
365                    if cur >= ext_end {
366                        continue;
367                    }
368                    let intra = (cur - ext_start) as usize;
369                    let avail = (e.len as usize).saturating_sub(intra);
370                    let take = core::cmp::min(avail, want - produced);
371                    match e.lba {
372                        Some(lba) => self.src.read_at(
373                            u64::from(lba) * u64::from(self.block_size) + intra as u64,
374                            &mut buf[produced..produced + take],
375                        )?,
376                        None => buf[produced..produced + take].fill(0),
377                    }
378                    produced += take;
379                }
380                Ok(produced)
381            }
382        }
383    }
384}
385
386/// Parse File Identifier Descriptors over a directory's data, returning
387/// (name, child File Entry LBA) for each non-parent, non-deleted entry.
388fn parse_fids(data: &[u8], partition_start: u32) -> Result<Vec<(String, u32)>> {
389    let mut out = Vec::new();
390    let mut p = 0;
391    while p + 38 <= data.len() {
392        if le_u16(data, p)? != TAG_FID {
393            break;
394        }
395        let fc = data[p + 18]; // FileCharacteristics: bit2 deleted, bit3 parent
396        let l_fi = data[p + 19] as usize;
397        let icb_lb = le_u32(data, p + 24)?; // ICB long_ad logical block
398        let l_iu = le_u16(data, p + 36)? as usize;
399        let fi_off = p + 38 + l_iu;
400        let fi = data
401            .get(fi_off..fi_off + l_fi)
402            .ok_or(FoldError::Corrupt("udf: FID name out of bounds"))?;
403        if fc & 0x08 == 0 && fc & 0x04 == 0 {
404            out.push((decode_udf_name(fi)?, partition_start + icb_lb));
405        }
406        let total = (38 + l_iu + l_fi + 3) & !3; // pad to a 4-byte boundary
407        if total == 0 {
408            break;
409        }
410        p += total;
411    }
412    Ok(out)
413}
414
415/// Decode a UDF compressed-unicode `d-string`: a leading compression id selects
416/// 8-bit (Latin-1) or 16-bit (UTF-16BE) characters.
417fn decode_udf_name(fi: &[u8]) -> Result<String> {
418    match fi.split_first() {
419        None => Ok(String::new()),
420        Some((8, rest)) => Ok(rest.iter().map(|&b| b as char).collect()),
421        Some((16, rest)) => {
422            let units = rest
423                .chunks_exact(2)
424                .map(|c| u16::from_be_bytes([c[0], c[1]]));
425            let mut s = String::new();
426            for ch in char::decode_utf16(units) {
427                s.push(ch.map_err(|_| FoldError::InvalidPath("udf: bad UTF-16 in name"))?);
428            }
429            Ok(s)
430        }
431        Some(_) => Err(FoldError::InvalidPath("udf: unknown name compression id")),
432    }
433}
434
435fn le_u16(b: &[u8], o: usize) -> Result<u16> {
436    b.get(o..o + 2)
437        .and_then(|s| s.try_into().ok())
438        .map(u16::from_le_bytes)
439        .ok_or(FoldError::Corrupt("udf: truncated u16"))
440}
441
442fn le_u32(b: &[u8], o: usize) -> Result<u32> {
443    b.get(o..o + 4)
444        .and_then(|s| s.try_into().ok())
445        .map(u32::from_le_bytes)
446        .ok_or(FoldError::Corrupt("udf: truncated u32"))
447}
448
449fn le_u64(b: &[u8], o: usize) -> Result<u64> {
450    b.get(o..o + 8)
451        .and_then(|s| s.try_into().ok())
452        .map(u64::from_le_bytes)
453        .ok_or(FoldError::Corrupt("udf: truncated u64"))
454}