cdragon_rman/
lib.rs

1//! Support of RMAN files, Riot manifest files
2//!
3//! Use [Rman] to open an RMAN file and access its content.
4//!
5//! An RMAN file is made of a header and multiple tables (bundles, file names, ...).
6//! When an instance is created, only the headers are read. Tables are then iterated on using the
7//! `iter_*()` methods.
8//!
9//! # Example: list files
10//! ```no_run
11//! # use cdragon_rman::Rman;
12//!
13//! let rman = Rman::open("example.manifest").expect("failed to open or read headers");
14//! // Directories are listed separately from files and their basenames
15//! let dir_paths = rman.dir_paths();
16//! // Iterate on files, print the full paths
17//! for file in rman.iter_files() {
18//!     println!("{}", file.path(&dir_paths));
19//! }
20//! ```
21
22use std::io::{Read, BufReader};
23use std::path::Path;
24use std::convert::TryInto;
25use std::collections::HashMap;
26use nom::{
27    number::complete::{le_u8, le_u16, le_u32, le_u64},
28    bytes::complete::tag,
29    sequence::tuple,
30};
31use thiserror::Error;
32use cdragon_utils::{
33    parsing::{ParseError, ReadArray},
34    parse_buf,
35};
36
37/// Result type for RMAN errors
38type Result<T, E = RmanError> = std::result::Result<T, E>;
39
40
41/// Riot manifest file
42///
43/// The body is decompressed and parsed on demand.
44/// Entries are parsed each time they are iterated on.
45/// They should be cached by the caller if needed
46///
47/// # Note on errors
48///
49/// Most reading methods may panic on invalid offsets or invalid data.
50/// This is especially true for the `iter_*()` methods.
51pub struct Rman {
52    /// RMAN version (`(major, minor)`)
53    ///
54    /// Currently, only version `(2, 0)` is supported.
55    pub version: (u8, u8),
56    /// RMAN flags
57    pub flags: u16,
58    /// Manifest ID
59    ///
60    /// Typically, it matches the manifest filename.
61    pub manifest_id: u64,
62    body: Vec<u8>,
63    offset_bundles: i32,
64    offset_flags: i32,
65    offset_files: i32,
66    offset_directories: i32,
67}
68
69/// Map directory ID to full paths
70pub type DirPaths = HashMap<u64, String>;
71
72/// Chunk, associated to a bundle
73#[derive(Clone, Debug)]
74pub struct BundleChunk {
75    /// Bundle ID of the chunk
76    pub bundle_id: u64,
77    /// Offset of chunk in bundle
78    pub bundle_offset: u32,
79    /// Size of chunk in bundle, compressed
80    pub bundle_size: u32,
81    /// Size of chunk, uncompressed
82    pub target_size: u32,
83}
84
85/// Map chunk IDs to their data
86pub type BundleChunks = HashMap<u64, BundleChunk>;
87
88impl Rman {
89    /// Open an RMAN file from path
90    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
91        let file = std::fs::File::open(path.as_ref())?;
92        let reader = BufReader::new(file);
93        Rman::read(reader)
94    }
95
96    /// Read an RMAN file, check header and decompress body
97    ///
98    /// Body is assumed to have the expected size. It is not checked against header length values.
99    pub fn read<R: Read>(mut reader: R) -> Result<Self> {
100        let (version, flags, manifest_id, body_length) = {
101            let r = reader.by_ref();
102            Self::parse_header(r)?
103        };
104        let body = zstd::stream::decode_all(reader.take(body_length as u64))?;
105        let offsets = Self::parse_body_header(&body);
106        Ok(Self {
107            version, flags, manifest_id, body,
108            offset_bundles: offsets.0,
109            offset_flags: offsets.1,
110            offset_files: offsets.2,
111            offset_directories: offsets.3,
112        })
113    }
114
115    /// Parse header, advance to the beginning of the body
116    fn parse_header<R: Read>(mut reader: R) -> Result<((u8, u8), u16, u64, u32)> {
117        const MAGIC_VERSION_LEN: usize = 4 + 2;
118        const FIELDS_LEN: usize = 2 + 4 + 4 + 8 + 4;
119        const HEADER_LEN: usize = MAGIC_VERSION_LEN + FIELDS_LEN;
120
121        let version = {
122            let buf = reader.read_array::<MAGIC_VERSION_LEN>()?;
123            let (_, major, minor) = parse_buf!(buf, tuple((tag("RMAN"), le_u8, le_u8)));
124            if (major, minor) != (2, 0) {
125                return Err(RmanError::UnsupportedVersion(major, minor));
126            }
127            (major, minor)
128        };
129
130        let (flags, manifest_id, zstd_length) = {
131            let buf = reader.read_array::<FIELDS_LEN>()?;
132            let (flags, offset, zstd_length, manifest_id, _body_length) =
133                parse_buf!(buf, tuple((le_u16, le_u32, le_u32, le_u64, le_u32)));
134            if flags & (1 << 9) == 0 {
135                return Err(RmanError::UnsupportedFlags(flags));
136            }
137            if offset < HEADER_LEN as u32 {
138                return Err(ParseError::Error.into());
139            } else if offset > HEADER_LEN as u32 {
140                let skipped_len = offset - HEADER_LEN as u32;
141                std::io::copy(&mut reader.take(skipped_len as u64), &mut std::io::sink())?;
142            }
143            (flags, manifest_id, zstd_length)
144        };
145
146        Ok((version, flags, manifest_id, zstd_length))
147    }
148
149    /// Parse body header
150    fn parse_body_header(body: &[u8]) -> (i32, i32, i32, i32) {
151        let mut cursor = BodyCursor::new(body, 0);
152
153        // Unknown header, skip it
154        let header_len = cursor.read_i32();
155        cursor.skip(header_len);
156
157        (
158            cursor.read_offset(),
159            cursor.read_offset(),
160            cursor.read_offset(),
161            cursor.read_offset(),
162            // Note: the last two tables are unknown
163        )
164    }
165
166    /// Iterate on flags (locales, platforms)
167    pub fn iter_flags(&self) -> OffsetTableIter<'_, FileFlagEntry> {
168        let cursor = BodyCursor::new(&self.body, self.offset_flags);
169        OffsetTableIter::new(cursor, parse_flag_entry)
170    }
171
172    /// Iterate on bundles
173    pub fn iter_bundles(&self) -> OffsetTableIter<'_, BundleEntry<'_>> {
174        let cursor = BodyCursor::new(&self.body, self.offset_bundles);
175        OffsetTableIter::new(cursor, parse_bundle_entry)
176    }
177
178    /// Iterate on files
179    pub fn iter_files(&self) -> OffsetTableIter<'_, FileEntry<'_>> {
180        let cursor = BodyCursor::new(&self.body, self.offset_files);
181        OffsetTableIter::new(cursor, parse_file_entry)
182    }
183
184    /// Iterate on directories (raw entries)
185    pub fn iter_directories(&self) -> OffsetTableIter<'_, DirectoryEntry<'_>> {
186        let cursor = BodyCursor::new(&self.body, self.offset_directories);
187        OffsetTableIter::new(cursor, parse_directory_entry)
188    }
189
190    /// Build map of directory paths
191    pub fn dir_paths(&self) -> DirPaths {
192        let directories: Vec<DirectoryEntry> = self.iter_directories().collect();
193        DirectoryEntry::build_path_map(&directories)
194    }
195
196    /// Build a map of chunks, with bundle information
197    pub fn bundle_chunks(&self) -> BundleChunks {
198        self.iter_bundles().flat_map(|bundle| {
199            let bundle_id = bundle.id;
200            bundle.iter_chunks().map(move |chunk| {
201                (chunk.id, BundleChunk {
202                    bundle_id,
203                    bundle_offset: chunk.bundle_offset,
204                    bundle_size: chunk.bundle_size,
205                    target_size: chunk.target_size,
206                })
207            })
208        }).collect()
209    }
210}
211
212
213/// Parse data from RMAN body
214///
215/// RMAN parsing uses a lot of negative indexes. Regular slices don't allow to go backwards.
216/// Implement our own parsing helpers for cleaner and easier parsing.
217/// There is no error handling: parsers panic if there is not enough data.
218///
219/// # Implementation note
220///
221/// Body size is guaranteed to fits in a `u32`, and should always fit in a `i32`.
222/// Use `i32` for all offsets to simplify use and avoid numerous casts.
223///
224/// # Errors
225///
226/// Parsing methods will panic on attempts to read outside the buffer.
227#[derive(Clone, Debug)]
228struct BodyCursor<'a> {
229    body: &'a [u8],
230    offset: i32,
231}
232
233impl<'a> BodyCursor<'a> {
234    fn new(body: &'a [u8], offset: i32) -> Self {
235        Self { body, offset }
236    }
237
238    fn offset(&self) -> i32 {
239        self.offset
240    }
241
242    fn read_slice(&mut self, n: i32) -> &'a [u8] {
243        let slice = &self.body[self.offset as usize .. (self.offset + n) as usize];
244        self.offset += n;
245        slice
246    }
247
248    fn peek_slice(&self, n: i32) -> &'a [u8] {
249        &self.body[self.offset as usize .. (self.offset + n) as usize]
250    }
251
252    fn fields_cursor(mut self) -> BodyFieldsCursor<'a> {
253        let entry_offset = self.offset();
254        let fields_offset = entry_offset - self.read_i32() + 2 * 2;  // Note: skip the 2 header fields
255        BodyFieldsCursor { body: self.body, fields_offset, entry_offset }
256    }
257
258    /// Read an offset and return a new cursor pointing to it
259    fn subcursor(&mut self) -> Self {
260        Self::new(self.body, self.read_offset())
261    }
262
263    /// Skip `n` bytes, rewind of negative
264    fn skip(&mut self, n: i32) {
265        self.offset += n;
266    }
267
268    fn read_u8(&mut self) -> u8 {
269        let v = self.body[self.offset as usize];
270        self.offset += 1;
271        v
272    }
273
274    fn read_i32(&mut self) -> i32 {
275        i32::from_le_bytes(self.read_slice(4).try_into().unwrap())
276    }
277
278    fn read_u32(&mut self) -> u32 {
279        u32::from_le_bytes(self.read_slice(4).try_into().unwrap())
280    }
281
282    fn read_u64(&mut self) -> u64 {
283        u64::from_le_bytes(self.read_slice(8).try_into().unwrap())
284    }
285
286    /// Read an offset value, return an absolute body offset
287    fn read_offset(&mut self) -> i32 {
288        let base_offset = self.offset;
289        let offset = self.read_i32();
290        base_offset + offset
291    }
292
293    fn peek_u32(&self) -> u32 {
294        u32::from_le_bytes(self.peek_slice(4).try_into().unwrap())
295    }
296}
297
298/// Same as [BodyCursor], but suited to read indexed fields from entry
299///
300/// The first two fields are always:
301/// - the size of the field list itself
302/// - the size of the entry (which is the end of the fields)
303struct BodyFieldsCursor<'a> {
304    body: &'a [u8],
305    fields_offset: i32,
306    entry_offset: i32,
307}
308
309impl<'a> BodyFieldsCursor<'a> {
310    fn field_slice(&self, field: u8, n: i32) -> Option<&'a [u8]> {
311        match self.field_offset(field) {
312            0 => None,
313            o => {
314                let offset = self.entry_offset + o;
315                Some(&self.body[offset as usize .. (offset + n) as usize])
316            }
317        }
318    }
319
320    /// Get field offset value
321    fn field_offset(&self, field: u8) -> i32 {
322        let offset = (self.fields_offset + 2 * field as i32) as usize;
323        let slice = &self.body[offset .. offset + 2];
324        u16::from_le_bytes(slice.try_into().unwrap()) as i32
325    }
326
327    fn get_i32(&self, field: u8) -> Option<i32> {
328        self.field_slice(field, 4).map(|s| i32::from_le_bytes(s.try_into().unwrap()))
329    }
330
331    fn get_u32(&self, field: u8) -> Option<u32> {
332        self.field_slice(field, 4).map(|s| u32::from_le_bytes(s.try_into().unwrap()))
333    }
334
335    fn get_u64(&self, field: u8) -> Option<u64> {
336        self.field_slice(field, 8).map(|s| u64::from_le_bytes(s.try_into().unwrap()))
337    }
338
339    /// Read an offset value, return a body cursor at this offset
340    fn get_offset_cursor(&self, field: u8) -> Option<BodyCursor<'a>> {
341        self.get_i32(field).map(|o| {
342            let offset = self.entry_offset + o + self.field_offset(field);
343            BodyCursor::new(self.body, offset)
344        })
345    }
346
347    /// Read an offset value, then string at given offset
348    fn get_str(&self, field: u8) -> Option<&'a str> {
349        self.get_offset_cursor(field).map(|mut cursor| {
350            let len = cursor.read_i32();
351            let slice = cursor.read_slice(len);
352            std::str::from_utf8(slice).expect("invalid UTF-8 string in RMAN")
353        })
354    }
355}
356
357
358/// An iterator over invidual entries of an RMAN table
359///
360/// This struct is created by the various `iter_*()` methods on [Rman].
361pub struct OffsetTableIter<'a, I> {
362    cursor: BodyCursor<'a>,
363    count: u32,
364    parser: fn(BodyCursor<'a>) -> I,
365}
366
367impl<'a, I> OffsetTableIter<'a, I> {
368    /// Initialize the iterator, read item count from the cursor
369    fn new(mut cursor: BodyCursor<'a>, parser: fn(BodyCursor<'a>) -> I) -> Self {
370        let count = cursor.read_u32();
371        Self { cursor, count, parser }
372    }
373}
374
375impl<'a, I> Iterator for OffsetTableIter<'a, I> {
376    type Item = I;
377
378    fn next(&mut self) -> Option<Self::Item> {
379        if self.count == 0 {
380            None
381        } else {
382            self.count -= 1;
383            Some((self.parser)(self.cursor.subcursor()))
384        }
385    }
386
387    fn size_hint(&self) -> (usize, Option<usize>) {
388        (self.count as usize, Some(self.count as usize))
389    }
390
391    fn count(self) -> usize {
392        self.count as usize
393    }
394}
395
396
397/// File flag defined in RMAN
398///
399/// Flags are locale codes (e.g. `en_US`) or platform (e.g. `macos`).
400#[derive(Debug)]
401pub struct FileFlagEntry<'a> {
402    /// Flag ID
403    pub id: u8,
404    /// Flag value
405    pub flag: &'a str,
406}
407
408
409/// Bundle information from RMAN
410#[derive(Debug)]
411pub struct BundleEntry<'a> {
412    /// Bundle ID
413    pub id: u64,
414    cursor: BodyCursor<'a>,
415}
416
417impl<'a> BundleEntry<'a> {
418    /// Iterate of bundle chunks
419    pub fn iter_chunks(&self) -> impl Iterator<Item=ChunkEntry> + 'a {
420        OffsetTableIter::new(self.cursor.clone(), parse_chunk_entry)
421            .scan(0u32, |offset, mut e| {
422                e.bundle_offset = *offset;
423                *offset += e.bundle_size;
424                Some(e)
425            })
426    }
427
428    /// Return the number of chunks in the bundle
429    pub fn chunks_count(&self) -> u32 {
430       self.cursor.peek_u32()
431    }
432}
433
434/// Chunk information from RMAN
435#[derive(Debug)]
436pub struct ChunkEntry {
437    /// Chunk ID
438    pub id: u64,
439    /// Size of chunk in bundle, compressed
440    pub bundle_size: u32,
441    /// Size of chunk, uncompressed
442    pub target_size: u32,
443    /// Offset of chunk in bundle
444    pub bundle_offset: u32,
445}
446
447/// File information from RMAN
448#[derive(Debug)]
449pub struct FileEntry<'a> {
450    /// File ID
451    pub id: u64,
452    /// File name (without directory)
453    pub name: &'a str,
454    /// For links, target of the link
455    pub link: Option<&'a str>,
456    /// ID of the directory the file is into
457    pub directory_id: Option<u64>,
458    /// Size of the file, when extracted
459    pub filesize: u32,
460    /// Flags, used to filter which files need to be installed
461    pub flags: Option<FileFlagSet>,
462    chunks_cursor: BodyCursor<'a>,
463}
464
465/// Data byte range for an RMAN file
466#[derive(Debug)]
467pub struct FileChunkRange {
468    /// Byte range of the chunk in its bundle
469    pub bundle: (u32, u32),
470    /// Byte range of the chunk in the target file
471    pub target: (u32, u32),
472}
473
474/// Chunk data information for an RMAN file
475///
476/// Store chunks of a file, grouped by bundle.
477/// For each entry in the map, key is the bundle ID and value a list of chunk data ranges.
478pub type FileBundleRanges = HashMap<u64, Vec<FileChunkRange>>;
479
480impl<'a> FileEntry<'a> {
481    /// Iterate on the chunks the file is built from
482    pub fn iter_chunks(&self) -> FileChunksIter<'a> {
483        FileChunksIter::new(self.chunks_cursor.clone())
484    }
485
486    /// Return full file path, using given directory path map
487    pub fn path(&self, dirs: &DirPaths) -> String {
488        match self.directory_id {
489            None => self.name.to_owned(),
490            Some(id) => format!("{}/{}", dirs[&id], self.name),
491        }
492    }
493
494    /// Collect file chunks grouped by bundle, and the total file size
495    pub fn bundle_chunks(&self, bundle_chunks: &BundleChunks) -> (u32, FileBundleRanges) {
496        // Group chunks by bundle
497        // For each bundle, get its list of ranges to download and target file ranges
498        // Also compute the total file size
499        let mut bundle_ranges = FileBundleRanges::new();
500        let file_size = self
501            .iter_chunks()
502            .fold(0u32, |offset, chunk_id| {
503                let chunk = &bundle_chunks[&chunk_id];
504                let ranges = &mut bundle_ranges.entry(chunk.bundle_id).or_default();
505                ranges.push(FileChunkRange {
506                    bundle: (chunk.bundle_offset, chunk.bundle_offset + chunk.bundle_size),
507                    target: (offset, offset + chunk.target_size),
508                });
509                offset + chunk.target_size
510            });
511        (file_size, bundle_ranges)
512    }
513}
514
515/// An iterator over the chunks of an RMAN file
516///
517/// This `struct` is created by `FileEntry::iter_chunks` method.
518pub struct FileChunksIter<'a> {
519    cursor: BodyCursor<'a>,
520    count: u32,
521}
522
523impl<'a> FileChunksIter<'a> {
524    fn new(mut cursor: BodyCursor<'a>) -> Self {
525        let count = cursor.read_u32();
526        Self { cursor, count }
527    }
528}
529
530impl<'a> Iterator for FileChunksIter<'a> {
531    type Item = u64;
532
533    fn next(&mut self) -> Option<Self::Item> {
534        if self.count == 0 {
535            None
536        } else {
537            self.count -= 1;
538            Some(self.cursor.read_u64())
539        }
540    }
541
542    fn size_hint(&self) -> (usize, Option<usize>) {
543        (self.count as usize, Some(self.count as usize))
544    }
545
546    fn count(self) -> usize {
547        self.count as usize
548    }
549}
550
551
552/// Set of RMAN file flags, as a bitmask
553#[derive(Debug)]
554pub struct FileFlagSet {
555    mask: u64,
556}
557
558impl FileFlagSet {
559    /// Iterate on flags set in the mask
560    pub fn iter<'a, I: Iterator<Item=&'a FileFlagEntry<'a>>>(&self, flags_it: I) -> impl Iterator<Item=&'a str> {
561        let mask = self.mask;
562        flags_it.filter_map(move |e| {
563            if mask & (1 << e.id) == 0 {
564                None
565            } else {
566                Some(e.flag)
567            }
568        })
569    }
570}
571
572
573/// Directory defined in RMAN
574#[derive(Debug)]
575pub struct DirectoryEntry<'a> {
576    /// Directory ID
577    pub id: u64,
578    /// Parent directory, if any
579    pub parent_id: Option<u64>,
580    /// Directory name
581    pub name: &'a str,
582}
583
584impl<'a> DirectoryEntry<'a> {
585    /// Build absolute path, using list of all directories
586    pub fn path(&self, dirs: &[DirectoryEntry]) -> String {
587        let mut path = self.name.to_owned();
588        let mut parent_id = self.parent_id;
589        while parent_id.is_some() {
590            let pid = parent_id.unwrap();
591            let parent = dirs.iter().find(|e| e.id == pid).expect("RMAN parent directory ID not found");
592            path = format!("{}/{}", parent.name, path);
593            parent_id = parent.parent_id;
594        }
595        path
596    }
597
598    /// Resolve directory paths, return a map indexed by ID
599    pub fn build_path_map(entries: &[DirectoryEntry]) -> DirPaths {
600        // Note: don't process recursively. Path of intermediate directories will be formatted
601        // multiple times. There are only few directories, so that should not be an issue.
602        entries.iter().map(|e| (e.id, e.path(entries))).collect()
603    }
604}
605
606
607fn parse_flag_entry(mut cursor: BodyCursor) -> FileFlagEntry {
608    // Skip field offsets, assume fixed ones
609    cursor.skip(4);
610    cursor.skip(3);
611    let flag_id = cursor.read_u8();
612    let flag = {
613        let mut cursor = cursor.subcursor();
614        let len = cursor.read_i32();
615        let slice = cursor.read_slice(len);
616        std::str::from_utf8(slice).expect("invalid UTF-8 string for RMAN file flag")
617    };
618    FileFlagEntry { id: flag_id, flag }
619}
620
621fn parse_bundle_entry(cursor: BodyCursor) -> BundleEntry {
622    // Field offsets
623    //   0  bundle ID
624    //   1  chunks offset
625    let cursor = cursor.fields_cursor();
626
627    let bundle_id = cursor.get_u64(0).expect("missing bundle ID field");
628    let chunks_cursor = cursor.get_offset_cursor(1).expect("missing chunks offset field");
629
630    BundleEntry { id: bundle_id, cursor: chunks_cursor }
631}
632
633fn parse_chunk_entry(cursor: BodyCursor) -> ChunkEntry {
634    // Field offsets
635    //   0  chunk ID
636    //   1  bundle size, compressed
637    //   2  chunk size, uncompressed
638
639    let cursor = cursor.fields_cursor();
640
641    let chunk_id = cursor.get_u64(0).expect("missing chunk ID field");
642    let bundle_size = cursor.get_u32(1).expect("missing chunk compressed size");
643    let target_size = cursor.get_u32(2).expect("missing chunk uncompressed size");
644
645    // Note: bundle_offset is set later, by `BundleEntry::iter_chunks()`
646    ChunkEntry { id: chunk_id, bundle_size, target_size, bundle_offset: 0 }
647}
648
649fn parse_file_entry(cursor: BodyCursor) -> FileEntry {
650    // Field offsets
651    //   0  file ID
652    //   1  directory ID
653    //   2  file size
654    //   3  name (offset)
655    //   4  flags (mask)
656    //   5  ?
657    //   6  ?
658    //   7  chunks (offset)
659    //   8  ?
660    //   9  link (str, offset)
661    //  10  ?
662    //  11  ? (present and set to 1 for localized WADs)
663    //  12  file type (1: executable, 2: regular)
664    let cursor = cursor.fields_cursor();
665
666    let file_id = cursor.get_u64(0).expect("missing file ID field");
667    let directory_id = cursor.get_u64(1);
668    let filesize = cursor.get_u32(2).expect("missing file size field");
669    let name = cursor.get_str(3).expect("missing file name field");
670    let flags = cursor.get_u64(4).map(|mask| FileFlagSet { mask });
671    let chunks_cursor = cursor.get_offset_cursor(7).expect("missing chunks cursor field");
672    let link = cursor.get_str(9).filter(|v| !v.is_empty());
673
674    FileEntry {
675        id: file_id, name, link, directory_id,
676        filesize, flags, chunks_cursor,
677    }
678}
679
680fn parse_directory_entry(cursor: BodyCursor) -> DirectoryEntry {
681    let cursor = cursor.fields_cursor();
682    let directory_id = cursor.get_u64(0).unwrap_or(0);
683    let parent_id = cursor.get_u64(1);
684    let name = cursor.get_str(2).expect("missing directory name field");
685
686    DirectoryEntry { id: directory_id, parent_id, name }
687}
688
689
690/// Error in an RMAN file
691#[allow(missing_docs)]
692#[derive(Error, Debug)]
693pub enum RmanError {
694    #[error(transparent)]
695    Io(#[from] std::io::Error),
696    #[error("parsing error")]
697    Parsing(#[from] ParseError),
698    #[error("version not supported: {0}.{1}")]
699    UnsupportedVersion(u8, u8),
700    #[error("flags not supported: {0:b}")]
701    UnsupportedFlags(u16),
702}
703