ds_rom/rom/
file.rs

1use std::{
2    borrow::Cow,
3    cmp::Ordering,
4    collections::{BinaryHeap, HashSet},
5    fmt::Display,
6    io::Write,
7    path::{Path, PathBuf},
8    str::FromStr,
9};
10
11use encoding_rs::SHIFT_JIS;
12use snafu::{Backtrace, Snafu};
13
14use super::raw::{self, FileAlloc, Fnt, FntDirectory, FntFile, FntSubtable, RawHeaderError};
15use crate::{
16    io::{read_dir, read_file, FileError},
17    str::BlobSize,
18};
19
20/// Contains files and directories to be placed into a ROM.
21pub struct FileSystem<'a> {
22    num_overlays: usize,
23    files: Vec<File<'a>>,
24    dirs: Vec<Dir>,
25    next_file_id: u16,
26    next_dir_id: u16,
27}
28
29/// A file for the [`FileSystem`] struct.
30#[derive(Clone)]
31pub struct File<'a> {
32    id: u16,
33    name: String,
34    original_offset: u32,
35    contents: Cow<'a, [u8]>,
36}
37
38/// A directory for the [`FileSystem`] struct.
39#[derive(Clone)]
40pub struct Dir {
41    id: u16,
42    name: String,
43    parent_id: u16,
44    children: Vec<u16>,
45}
46
47/// Errors related to [`FileSystem::parse`].
48#[derive(Debug, Snafu)]
49pub enum FileParseError {
50    /// Occurs when a file ID is missing from the raw FNT.
51    #[snafu(display("the file ID {id} is missing from the FNT:\n{backtrace}"))]
52    MissingFileId {
53        /// File ID.
54        id: u16,
55        /// Backtrace to the source of the error.
56        backtrace: Backtrace,
57    },
58    /// Occurs when a directory ID is missing from the faw FNT.
59    #[snafu(display("the directory ID {id} is missing from the FNT:\n{backtrace}"))]
60    MissingDirId {
61        /// Directory ID.
62        id: u16,
63        /// Backtrace to the source of the error.
64        backtrace: Backtrace,
65    },
66    /// See [`RawHeaderError`].
67    #[snafu(transparent)]
68    RawHeader {
69        /// Source error.
70        source: RawHeaderError,
71    },
72}
73
74/// Errors related to [`FileSystem::build_fnt`].
75#[derive(Debug, Snafu)]
76pub enum FileBuildError {
77    /// Occurs when a file name contains unmappable characters that could not be encoded to SHIFT-JIS.
78    #[snafu(display("the file name {name} contains unmappable character(s):\n{backtrace}"))]
79    EncodingFailed {
80        /// File name.
81        name: String,
82        /// Backtrace to the source of the error.
83        backtrace: Backtrace,
84    },
85}
86
87const ROOT_DIR_ID: u16 = 0xf000;
88
89impl<'a> FileSystem<'a> {
90    /// Creates a new [`FileSystem`]. The number of overlays are used to determine the first file ID, since overlays are also
91    /// located in the FAT but not the FNT.
92    pub fn new(num_overlays: usize) -> Self {
93        let root = Dir { id: ROOT_DIR_ID, name: "/".to_string(), parent_id: 0, children: vec![] };
94        Self { num_overlays, files: vec![], dirs: vec![root], next_file_id: num_overlays as u16, next_dir_id: ROOT_DIR_ID + 1 }
95    }
96
97    fn load_in<P: AsRef<Path>>(&mut self, path: P, parent_id: u16) -> Result<(), FileError> {
98        // Sort children by FNT order so the file/dir IDs become correct
99        let mut children =
100            read_dir(&path)?.collect::<Result<Vec<_>, _>>()?.into_iter().map(|entry| entry.path()).collect::<Vec<_>>();
101        children.sort_unstable_by(|a, b| {
102            Self::compare_for_fnt(a.to_string_lossy().as_ref(), a.is_dir(), b.to_string_lossy().as_ref(), b.is_dir())
103        });
104
105        for child in children.into_iter() {
106            let name = child.file_name().unwrap().to_string_lossy().to_string();
107            if child.is_dir() {
108                let child_id = self.next_dir_id;
109                let child_path = path.as_ref().join(&name);
110                self.make_child_dir(name, parent_id);
111                self.load_in(child_path, child_id)?;
112            } else {
113                let contents = read_file(child)?;
114                self.make_child_file(name, parent_id, contents);
115            }
116        }
117        Ok(())
118    }
119
120    /// Loads a file system from the given root directory. This will traverse and add all folders and files into the
121    /// [`FileSystem`] struct.
122    ///
123    /// # Errors
124    ///
125    /// This function will return an error if an I/O operation fails.
126    pub fn load<P: AsRef<Path>>(root: P, num_overlays: usize) -> Result<Self, FileError> {
127        let mut files = Self::new(num_overlays);
128        files.load_in(root, ROOT_DIR_ID)?;
129        Ok(files)
130    }
131
132    /// Returns whether the ID is a directory ID.
133    pub fn is_dir(id: u16) -> bool {
134        id >= ROOT_DIR_ID
135    }
136
137    /// Returns whether the ID is a file ID.
138    pub fn is_file(id: u16) -> bool {
139        !Self::is_dir(id)
140    }
141
142    /// Returns the name of a directory or file.
143    pub fn name(&self, id: u16) -> &str {
144        if Self::is_dir(id) {
145            &self.dir(id).name
146        } else {
147            &self.file(id).name
148        }
149    }
150
151    /// Returns a directory.
152    pub fn dir(&self, id: u16) -> &Dir {
153        &self.dirs[id as usize & 0xfff]
154    }
155
156    fn dir_mut(&mut self, id: u16) -> &mut Dir {
157        &mut self.dirs[id as usize & 0xfff]
158    }
159
160    /// Returns a file.
161    pub fn file(&self, id: u16) -> &File {
162        &self.files[id as usize - self.num_overlays]
163    }
164
165    fn parse_subtable(
166        fnt: &Fnt,
167        fat: &[FileAlloc],
168        rom: &'a raw::Rom,
169        parent: &mut Dir,
170        dirs: &mut Vec<Option<Dir>>,
171        files: &mut Vec<Option<File<'a>>>,
172    ) -> (u16, u16) {
173        let subtable_index = parent.id as usize & 0xfff;
174        let subtable = &fnt.subtables[subtable_index];
175
176        let mut max_file_id = 0;
177        let mut max_dir_id = 0;
178        for FntFile { id, name } in subtable.iter() {
179            let name = name.to_string();
180
181            if Self::is_dir(id) {
182                max_dir_id = max_dir_id.max(id);
183                let mut dir = Dir { id, name, parent_id: parent.id, children: vec![] };
184                let (max_child_dir_id, max_child_file_id) = Self::parse_subtable(fnt, fat, rom, &mut dir, dirs, files);
185                max_dir_id = max_dir_id.max(max_child_dir_id);
186                max_file_id = max_file_id.max(max_child_file_id);
187
188                dirs[id as usize & 0xfff] = Some(dir);
189                parent.children.push(id);
190            } else {
191                max_file_id = max_file_id.max(id);
192                let alloc = fat[id as usize];
193                let contents = &rom.data()[alloc.range()];
194                files[id as usize] = Some(File { id, name, original_offset: alloc.start, contents: Cow::Borrowed(contents) });
195                parent.children.push(id);
196            }
197        }
198        (max_file_id, max_dir_id)
199    }
200
201    /// Parses an FNT, FAT and ROM to create a [`FileSystem`].
202    ///
203    /// # Errors
204    ///
205    /// This function will return an error if [`raw::Rom::num_arm9_overlays`] or [`raw::Rom::num_arm7_overlays`] fails, or if
206    /// a file or directory ID is missing from the FNT.
207    pub fn parse(fnt: &Fnt, fat: &[FileAlloc], rom: &'a raw::Rom) -> Result<Self, FileParseError> {
208        let num_overlays = rom.num_arm9_overlays()? + rom.num_arm7_overlays()?;
209
210        let mut root = Dir { id: ROOT_DIR_ID, name: "/".to_string(), parent_id: 0, children: vec![] };
211        let mut dirs = vec![None; fnt.subtables.len()];
212        let mut files = vec![None; fat.len()];
213        let (max_file_id, max_dir_id) = Self::parse_subtable(fnt, fat, rom, &mut root, &mut dirs, &mut files);
214        dirs[0] = Some(root);
215
216        let files = files
217            .into_iter()
218            .skip(num_overlays)
219            .enumerate()
220            .map(|(id, f)| f.ok_or(MissingFileIdSnafu { id: id as u16 }.build()))
221            .collect::<Result<Vec<_>, _>>()?;
222        let dirs = dirs
223            .into_iter()
224            .enumerate()
225            .map(|(id, d)| d.ok_or(MissingDirIdSnafu { id: id as u16 + ROOT_DIR_ID }.build()))
226            .collect::<Result<Vec<_>, _>>()?;
227
228        Ok(FileSystem { files, dirs, num_overlays, next_file_id: max_file_id + 1, next_dir_id: max_dir_id + 1 })
229    }
230
231    fn find_first_file_id(&self, parent: &Dir) -> u16 {
232        for child in &parent.children {
233            if Self::is_file(*child) {
234                return *child;
235            } else {
236                return self.find_first_file_id(self.dir(*child));
237            }
238        }
239        panic!("No first file ID found, directory is empty");
240    }
241
242    fn build_subtable(&self, parent: &Dir) -> Result<FntSubtable, FileBuildError> {
243        let mut data = vec![];
244
245        for child in &parent.children {
246            let child = *child;
247
248            let is_dir = Self::is_dir(child);
249            let name = self.name(child);
250
251            let (sjis_name, _, had_errors) = SHIFT_JIS.encode(name);
252            if had_errors {
253                return EncodingFailedSnafu { name }.fail();
254            }
255
256            let name_length = sjis_name.len() as u8 & 0x7f;
257            let directory_bit = if is_dir { 0x80 } else { 0 };
258            data.push(name_length | directory_bit);
259
260            data.extend(sjis_name.iter().take(0x7f));
261            if is_dir {
262                data.write(&u16::to_le_bytes(child)).unwrap();
263            }
264        }
265
266        Ok(FntSubtable {
267            directory: Cow::Owned(FntDirectory {
268                subtable_offset: 0,
269                first_file_id: self.find_first_file_id(parent),
270                parent_id: parent.parent_id,
271            }),
272            data: Cow::Owned(data),
273        })
274    }
275
276    fn build_fnt_recursive(&'a self, subtables: &mut Vec<FntSubtable<'a>>, parent_id: u16) -> Result<(), FileBuildError> {
277        let parent = &self.dir(parent_id);
278        subtables.push(self.build_subtable(parent)?);
279        for child in &parent.children {
280            if Self::is_dir(*child) {
281                self.build_fnt_recursive(subtables, *child)?;
282            }
283        }
284        Ok(())
285    }
286
287    /// Builds an FNT from this [`FileSystem`].
288    ///
289    /// # Errors
290    ///
291    /// This function will return an error if a file/directory name contains non-ASCII characters.
292    pub fn build_fnt(&self) -> Result<Fnt, FileBuildError> {
293        let mut subtables = vec![];
294        self.build_fnt_recursive(&mut subtables, ROOT_DIR_ID)?;
295        Ok(Fnt { subtables: subtables.into_boxed_slice() })
296    }
297
298    fn compare_for_fnt(a: &str, a_dir: bool, b: &str, b_dir: bool) -> Ordering {
299        let files_first = a_dir.cmp(&b_dir);
300        if files_first.is_ne() {
301            return files_first;
302        }
303
304        // Convert to Shift-JIS first, *then* convert to lowercase byte-by-byte
305        // without accounting for multibyte characters like コ (83 52, but sorted
306        // as if it was actually ビ / 83 72). This strcasecmp-like behavior was
307        // observed in 999's Japanese file names.
308        let (mut a_bytes, _, _) = SHIFT_JIS.encode(&a);
309        let (mut b_bytes, _, _) = SHIFT_JIS.encode(&b);
310        let a_vec = a_bytes.to_mut();
311        let b_vec = b_bytes.to_mut();
312        a_vec.make_ascii_lowercase();
313        b_vec.make_ascii_lowercase();
314
315        // Lexicographic, case-insensitive Shift-JIS order
316        a_vec.cmp(&b_vec)
317    }
318
319    fn sort_for_fnt_in(&mut self, parent_id: u16) {
320        let mut parent = self.dir(parent_id).clone();
321        parent
322            .children
323            .sort_by(|a, b| Self::compare_for_fnt(self.name(*a), Self::is_dir(*a), self.name(*b), Self::is_dir(*b)));
324
325        for child in &mut parent.children {
326            if Self::is_dir(*child) {
327                self.sort_for_fnt_in(*child);
328            }
329        }
330
331        *self.dir_mut(parent_id) = parent;
332    }
333
334    /// Sorts the entire [`FileSystem`] so that it's laid out in the right order for the FNT.
335    pub fn sort_for_fnt(&mut self) {
336        self.sort_for_fnt_in(ROOT_DIR_ID);
337    }
338
339    fn compare_for_rom(a: &str, b: &str) -> Ordering {
340        // Lexicographic UTF-8 order
341        a.cmp(b)
342    }
343
344    fn sort_for_rom_in(&mut self, parent_id: u16) {
345        let mut parent = self.dir(parent_id).clone();
346        parent.children.sort_by(|a, b| Self::compare_for_rom(self.name(*a), self.name(*b)));
347
348        for child in &mut parent.children {
349            if Self::is_dir(*child) {
350                self.sort_for_rom_in(*child);
351            }
352        }
353
354        *self.dir_mut(parent_id) = parent;
355    }
356
357    /// Sorts the entire [`FileSystem`] so that files laid out in the right order for appending to the ROM.
358    pub fn sort_for_rom(&mut self) {
359        self.sort_for_rom_in(ROOT_DIR_ID);
360    }
361
362    fn find_path_in(&self, path: &str, parent_id: u16) -> Option<u16> {
363        let parent = &self.dir(parent_id);
364        let (child_name, next) = path.split_once('/').map(|(c, n)| (c, Some(n))).unwrap_or((path, None));
365        let child = parent.children.iter().find(|id| self.name(**id) == child_name)?;
366        if let Some(next) = next {
367            if Self::is_dir(*child) {
368                self.find_path_in(next, *child)
369            } else {
370                None
371            }
372        } else {
373            Some(*child)
374        }
375    }
376
377    fn find_path(&self, path: &str) -> Option<u16> {
378        self.find_path_in(path, ROOT_DIR_ID)
379    }
380
381    fn make_child_dir(&mut self, name: String, parent_id: u16) -> &Dir {
382        let id = self.next_dir_id;
383        self.dirs.push(Dir { id, name, parent_id, children: vec![] });
384        let parent = self.dir_mut(parent_id);
385        parent.children.push(id);
386        self.next_dir_id += 1;
387        self.dirs.last().unwrap()
388    }
389
390    fn make_child_file(&mut self, name: String, parent_id: u16, contents: Vec<u8>) -> &File {
391        let id = self.next_file_id;
392        self.files.push(File { id, name, original_offset: 0, contents: contents.into() });
393        let parent = self.dir_mut(parent_id);
394        parent.children.push(id);
395        self.next_file_id += 1;
396        self.files.last().unwrap()
397    }
398
399    fn traverse_nonvisited_files<Cb>(&self, visited: &mut HashSet<u16>, callback: &mut Cb, subdir: &Dir, path: &Path)
400    where
401        Cb: FnMut(&File, &Path) -> (),
402    {
403        if visited.contains(&subdir.id) {
404            return;
405        }
406        for child in &subdir.children {
407            if visited.contains(child) {
408                continue;
409            }
410
411            if Self::is_dir(*child) {
412                let path = path.join(self.name(*child));
413                self.traverse_nonvisited_files(visited, callback, self.dir(*child), &path);
414            } else {
415                callback(self.file(*child), path);
416                let first_time_visiting_file = visited.insert(*child);
417                assert!(first_time_visiting_file);
418            }
419        }
420        let first_time_visiting_file = visited.insert(subdir.id);
421        assert!(first_time_visiting_file);
422    }
423
424    /// Traverses the [`FileSystem`] and calls `callback` for each file found. The directories will be prioritized according to
425    /// the `path_order`.
426    pub fn traverse_files<I, Cb>(&self, path_order: I, mut callback: Cb)
427    where
428        I: IntoIterator<Item = &'a str>,
429        Cb: FnMut(&File, &Path) -> (),
430    {
431        let mut visited = HashSet::<u16>::new();
432
433        for path in path_order {
434            let path = path.strip_prefix("/").unwrap_or(path);
435            let path_buf = &PathBuf::from_str(path).unwrap();
436            let subdir = if path.trim() == "" {
437                self.dir(ROOT_DIR_ID)
438            } else {
439                let Some(child) = self.find_path(path) else { continue };
440                if visited.contains(&child) {
441                    continue;
442                }
443
444                if Self::is_dir(child) {
445                    self.dir(child)
446                } else {
447                    let file = self.file(child);
448                    callback(file, path_buf);
449                    let first_time_visiting_file = visited.insert(file.id);
450                    assert!(first_time_visiting_file);
451                    continue;
452                }
453            };
454            self.traverse_nonvisited_files(&mut visited, &mut callback, subdir, path_buf);
455        }
456    }
457
458    fn max_file_id_in(&self, parent_id: u16) -> u16 {
459        let mut max_id = 0;
460        let parent = self.dir(parent_id);
461        for child in &parent.children {
462            let id = if Self::is_dir(*child) { self.max_file_id_in(*child) } else { *child };
463            if id > max_id {
464                max_id = id;
465            }
466        }
467        max_id
468    }
469
470    /// Returns the max file ID of this [`FileSystem`].
471    pub fn max_file_id(&self) -> u16 {
472        self.max_file_id_in(ROOT_DIR_ID)
473    }
474
475    /// Creates a [`DisplayFileSystem`] which implements [`Display`].
476    pub fn display(&self, indent: usize) -> DisplayFileSystem {
477        DisplayFileSystem { files: self, parent_id: ROOT_DIR_ID, indent }
478    }
479
480    fn traverse_and_compute_path_order(&self, path: &str, path_order: &mut BinaryHeap<PathOrder>, parent: &Dir) {
481        for child in &parent.children {
482            let path = format!("{}/{}", path, self.name(*child));
483            if Self::is_dir(*child) {
484                self.traverse_and_compute_path_order(path.as_str(), path_order, self.dir(*child));
485            } else {
486                path_order.push(PathOrder {
487                    id: *child,
488                    parent_id: parent.id,
489                    path_name: path,
490                    offset: self.file(*child).original_offset,
491                });
492            }
493        }
494    }
495
496    fn are_paths_sorted(&self, paths: &[PathOrder]) -> bool {
497        paths.windows(2).all(|w| Self::compare_for_rom(self.name(w[0].id), self.name(w[1].id)).is_lt())
498    }
499
500    /// Computes the path order that the [`FileSystem`] is currently in. This can be saved and reused in
501    /// [`Self::traverse_files`] to traverse in the same order later.
502    pub fn compute_path_order(&self) -> Vec<String> {
503        let mut path_order = BinaryHeap::new();
504        self.traverse_and_compute_path_order("", &mut path_order, self.dir(ROOT_DIR_ID));
505        let mut paths = path_order.into_sorted_vec();
506
507        // Loop to simplify path order
508        let mut children_start = 0;
509        while children_start < paths.len() {
510            let parent_id = paths[children_start].parent_id;
511            if parent_id == 0 {
512                children_start += 1;
513                continue;
514            }
515
516            // Find all surrounding children with the same parent as the current child
517            let children_end = paths[children_start..]
518                .iter()
519                .position(|c| c.parent_id != parent_id)
520                .map(|pos| children_start + pos)
521                .unwrap_or(paths.len());
522            children_start = paths[..children_start]
523                .iter()
524                .enumerate()
525                .rev()
526                .find_map(|(index, child)| (child.parent_id != parent_id).then_some(index + 1))
527                .unwrap_or(0);
528            let num_children = children_end - children_start;
529
530            let parent = self.dir(parent_id);
531            let num_unvisited_children =
532                parent.children.iter().filter(|c| !paths[..children_start].iter().any(|p| p.id == **c)).count();
533
534            // Check if the child count matches the parent (excluding child paths which already exist in the path order)
535            // Also check that the children are sorted, so that simplifying the path order doesn't affect the resulting order of files
536            if num_children == num_unvisited_children && self.are_paths_sorted(&paths[children_start..children_end]) {
537                let mut path_name =
538                    paths[children_start].path_name.rsplit_once('/').map(|(parent, _)| parent).unwrap_or("/").to_string();
539                if path_name.is_empty() {
540                    path_name = "/".to_string();
541                }
542
543                // Replace the children with their parent
544                let offset = paths[children_start].offset;
545                paths.drain(children_start..children_end);
546                paths.insert(children_start, PathOrder { id: parent_id, parent_id: parent.parent_id, path_name, offset });
547            } else {
548                children_start = children_end;
549            }
550        }
551
552        paths.into_iter().map(|p| p.path_name).collect()
553    }
554}
555
556impl<'a> File<'a> {
557    /// Returns a reference to the name of this [`File`].
558    pub fn name(&self) -> &str {
559        &self.name
560    }
561
562    /// Returns the ID of this [`File`].
563    pub fn id(&self) -> u16 {
564        self.id
565    }
566
567    /// Returns a reference to the contents of this [`File`].
568    pub fn contents(&self) -> &[u8] {
569        &self.contents
570    }
571}
572
573impl Dir {
574    /// Returns whether this [`Dir`] is the root directory.
575    pub fn is_root(&self) -> bool {
576        self.id == ROOT_DIR_ID
577    }
578}
579
580#[derive(PartialEq, Eq, Clone, Debug)]
581struct PathOrder {
582    id: u16,
583    parent_id: u16,
584    path_name: String,
585    offset: u32,
586}
587
588impl PartialOrd for PathOrder {
589    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
590        self.offset.partial_cmp(&other.offset)
591    }
592}
593
594impl Ord for PathOrder {
595    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
596        self.offset.cmp(&other.offset)
597    }
598}
599
600/// Can be used to display the file hierarchy of a [`FileSystem`].
601pub struct DisplayFileSystem<'a> {
602    files: &'a FileSystem<'a>,
603    parent_id: u16,
604    indent: usize,
605}
606
607impl<'a> Display for DisplayFileSystem<'a> {
608    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
609        let i = format!("{:indent$}", "", indent = self.indent);
610        let parent = self.files.dir(self.parent_id);
611        let files = &self.files;
612        for child in &parent.children {
613            if FileSystem::is_dir(*child) {
614                write!(f, "{i}0x{:04x}: {: <32}", *child, files.name(*child))?;
615                writeln!(f)?;
616                write!(f, "{}", Self { files, parent_id: *child, indent: self.indent + 2 })?;
617            } else {
618                let file = files.file(*child);
619                let size = BlobSize(file.contents.len()).to_string();
620                write!(f, "{i}0x{:04x}: {: <48}{size: >7}", file.id, file.name)?;
621                writeln!(f)?;
622            }
623        }
624        Ok(())
625    }
626}