ds_rom/rom/
file.rs

1use std::{
2    borrow::Cow,
3    cmp::Ordering,
4    collections::{BinaryHeap, HashSet},
5    fmt::Display,
6    io::Write,
7    path::{Path, PathBuf},
8    str::FromStr,
9};
10
11use encoding_rs::SHIFT_JIS;
12use snafu::{Backtrace, Snafu};
13
14use super::raw::{self, FileAlloc, Fnt, FntDirectory, FntFile, FntSubtable, RawHeaderError};
15use crate::{
16    io::{read_dir, read_file, FileError},
17    str::BlobSize,
18};
19
20/// Contains files and directories to be placed into a ROM.
21pub struct FileSystem<'a> {
22    num_overlays: usize,
23    files: Vec<File<'a>>,
24    dirs: Vec<Dir>,
25    next_file_id: u16,
26    next_dir_id: u16,
27}
28
29/// A file for the [`FileSystem`] struct.
30#[derive(Clone)]
31pub struct File<'a> {
32    id: u16,
33    name: String,
34    original_offset: u32,
35    contents: Cow<'a, [u8]>,
36}
37
38/// A directory for the [`FileSystem`] struct.
39#[derive(Clone)]
40pub struct Dir {
41    id: u16,
42    name: String,
43    parent_id: u16,
44    children: Vec<u16>,
45}
46
47/// Errors related to [`FileSystem::parse`].
48#[derive(Debug, Snafu)]
49pub enum FileParseError {
50    /// Occurs when a file ID is missing from the raw FNT.
51    #[snafu(display("the file ID {id} is missing from the FNT:\n{backtrace}"))]
52    MissingFileId {
53        /// File ID.
54        id: u16,
55        /// Backtrace to the source of the error.
56        backtrace: Backtrace,
57    },
58    /// Occurs when a directory ID is missing from the faw FNT.
59    #[snafu(display("the directory ID {id} is missing from the FNT:\n{backtrace}"))]
60    MissingDirId {
61        /// Directory ID.
62        id: u16,
63        /// Backtrace to the source of the error.
64        backtrace: Backtrace,
65    },
66    /// See [`RawHeaderError`].
67    #[snafu(transparent)]
68    RawHeader {
69        /// Source error.
70        source: RawHeaderError,
71    },
72}
73
74/// Errors related to [`FileSystem::build_fnt`].
75#[derive(Debug, Snafu)]
76pub enum FileBuildError {
77    /// Occurs when a file name contains unmappable characters that could not be encoded to SHIFT-JIS.
78    #[snafu(display("the file name {name} contains unmappable character(s):\n{backtrace}"))]
79    EncodingFailed {
80        /// File name.
81        name: String,
82        /// Backtrace to the source of the error.
83        backtrace: Backtrace,
84    },
85}
86
87const ROOT_DIR_ID: u16 = 0xf000;
88
89impl<'a> FileSystem<'a> {
90    /// Creates a new [`FileSystem`]. The number of overlays are used to determine the first file ID, since overlays are also
91    /// located in the FAT but not the FNT.
92    pub fn new(num_overlays: usize) -> Self {
93        let root = Dir { id: ROOT_DIR_ID, name: "/".to_string(), parent_id: 0, children: vec![] };
94        Self { num_overlays, files: vec![], dirs: vec![root], next_file_id: num_overlays as u16, next_dir_id: ROOT_DIR_ID + 1 }
95    }
96
97    fn load_in<P: AsRef<Path>>(&mut self, path: P, parent_id: u16) -> Result<(), FileError> {
98        // Sort children by FNT order so the file/dir IDs become correct
99        let mut children =
100            read_dir(&path)?.collect::<Result<Vec<_>, _>>()?.into_iter().map(|entry| entry.path()).collect::<Vec<_>>();
101        children.sort_unstable_by(|a, b| {
102            Self::compare_for_fnt(a.to_string_lossy().as_ref(), a.is_dir(), b.to_string_lossy().as_ref(), b.is_dir())
103        });
104
105        for child in children.into_iter() {
106            let name = child.file_name().unwrap().to_string_lossy().to_string();
107            if child.is_dir() {
108                let child_id = self.next_dir_id;
109                let child_path = path.as_ref().join(&name);
110                self.make_child_dir(name, parent_id);
111                self.load_in(child_path, child_id)?;
112            } else {
113                let contents = read_file(child)?;
114                self.make_child_file(name, parent_id, contents);
115            }
116        }
117        Ok(())
118    }
119
120    /// Loads a file system from the given root directory. This will traverse and add all folders and files into the
121    /// [`FileSystem`] struct.
122    ///
123    /// # Errors
124    ///
125    /// This function will return an error if an I/O operation fails.
126    pub fn load<P: AsRef<Path>>(root: P, num_overlays: usize) -> Result<Self, FileError> {
127        let mut files = Self::new(num_overlays);
128        files.load_in(root, ROOT_DIR_ID)?;
129        Ok(files)
130    }
131
132    /// Returns whether the ID is a directory ID.
133    pub fn is_dir(id: u16) -> bool {
134        id >= ROOT_DIR_ID
135    }
136
137    /// Returns whether the ID is a file ID.
138    pub fn is_file(id: u16) -> bool {
139        !Self::is_dir(id)
140    }
141
142    /// Returns the name of a directory or file.
143    pub fn name(&self, id: u16) -> &str {
144        if Self::is_dir(id) {
145            &self.dir(id).name
146        } else {
147            &self.file(id).name
148        }
149    }
150
151    /// Returns a directory.
152    pub fn dir(&self, id: u16) -> &Dir {
153        &self.dirs[id as usize & 0xfff]
154    }
155
156    fn dir_mut(&mut self, id: u16) -> &mut Dir {
157        &mut self.dirs[id as usize & 0xfff]
158    }
159
160    /// Returns a file.
161    pub fn file(&self, id: u16) -> &File {
162        &self.files[id as usize - self.num_overlays]
163    }
164
165    fn parse_subtable(
166        fnt: &Fnt,
167        fat: &[FileAlloc],
168        rom: &'a raw::Rom,
169        parent: &mut Dir,
170        dirs: &mut Vec<Option<Dir>>,
171        files: &mut Vec<Option<File<'a>>>,
172    ) -> (u16, u16) {
173        let subtable_index = parent.id as usize & 0xfff;
174        let subtable = &fnt.subtables[subtable_index];
175
176        let mut max_file_id = 0;
177        let mut max_dir_id = 0;
178        for FntFile { id, name } in subtable.iter() {
179            let name = name.to_string();
180
181            if Self::is_dir(id) {
182                max_dir_id = max_dir_id.max(id);
183                let mut dir = Dir { id, name, parent_id: parent.id, children: vec![] };
184                let (max_child_dir_id, max_child_file_id) = Self::parse_subtable(fnt, fat, rom, &mut dir, dirs, files);
185                max_dir_id = max_dir_id.max(max_child_dir_id);
186                max_file_id = max_file_id.max(max_child_file_id);
187
188                dirs[id as usize & 0xfff] = Some(dir);
189                parent.children.push(id);
190            } else {
191                max_file_id = max_file_id.max(id);
192                let alloc = fat[id as usize];
193                let contents = &rom.data()[alloc.range()];
194                files[id as usize] = Some(File { id, name, original_offset: alloc.start, contents: Cow::Borrowed(contents) });
195                parent.children.push(id);
196            }
197        }
198        (max_file_id, max_dir_id)
199    }
200
201    /// Parses an FNT, FAT and ROM to create a [`FileSystem`].
202    ///
203    /// # Errors
204    ///
205    /// This function will return an error if [`raw::Rom::num_arm9_overlays`] or [`raw::Rom::num_arm7_overlays`] fails, or if
206    /// a file or directory ID is missing from the FNT.
207    pub fn parse(fnt: &Fnt, fat: &[FileAlloc], rom: &'a raw::Rom) -> Result<Self, FileParseError> {
208        let num_overlays = rom.num_arm9_overlays()? + rom.num_arm7_overlays()?;
209
210        let mut root = Dir { id: ROOT_DIR_ID, name: "/".to_string(), parent_id: 0, children: vec![] };
211        let mut dirs = vec![None; fnt.subtables.len()];
212        let mut files = vec![None; fat.len()];
213        let (max_file_id, max_dir_id) = Self::parse_subtable(fnt, fat, rom, &mut root, &mut dirs, &mut files);
214        dirs[0] = Some(root);
215
216        let files = files
217            .into_iter()
218            .skip(num_overlays)
219            .enumerate()
220            .map(|(id, f)| f.ok_or(MissingFileIdSnafu { id: id as u16 }.build()))
221            .collect::<Result<Vec<_>, _>>()?;
222        let dirs = dirs
223            .into_iter()
224            .enumerate()
225            .map(|(id, d)| d.ok_or(MissingDirIdSnafu { id: id as u16 + ROOT_DIR_ID }.build()))
226            .collect::<Result<Vec<_>, _>>()?;
227
228        Ok(FileSystem { files, dirs, num_overlays, next_file_id: max_file_id + 1, next_dir_id: max_dir_id + 1 })
229    }
230
231    fn find_first_file_id(&self, parent: &Dir) -> u16 {
232        let child = *parent.children.first().expect("No first file ID found, directory is empty");
233        if Self::is_file(child) {
234            child
235        } else {
236            self.find_first_file_id(self.dir(child))
237        }
238    }
239
240    fn build_subtable(&self, parent: &Dir) -> Result<FntSubtable, FileBuildError> {
241        let mut data = vec![];
242
243        for child in &parent.children {
244            let child = *child;
245
246            let is_dir = Self::is_dir(child);
247            let name = self.name(child);
248
249            let (sjis_name, _, had_errors) = SHIFT_JIS.encode(name);
250            if had_errors {
251                return EncodingFailedSnafu { name }.fail();
252            }
253
254            let name_length = sjis_name.len() as u8 & 0x7f;
255            let directory_bit = if is_dir { 0x80 } else { 0 };
256            data.push(name_length | directory_bit);
257
258            data.extend(sjis_name.iter().take(0x7f));
259            if is_dir {
260                data.write_all(&u16::to_le_bytes(child)).unwrap();
261            }
262        }
263
264        Ok(FntSubtable {
265            directory: Cow::Owned(FntDirectory {
266                subtable_offset: 0,
267                first_file_id: self.find_first_file_id(parent),
268                parent_id: parent.parent_id,
269            }),
270            data: Cow::Owned(data),
271        })
272    }
273
274    fn build_fnt_recursive(&'a self, subtables: &mut Vec<FntSubtable<'a>>, parent_id: u16) -> Result<(), FileBuildError> {
275        let parent = &self.dir(parent_id);
276        subtables.push(self.build_subtable(parent)?);
277        for child in &parent.children {
278            if Self::is_dir(*child) {
279                self.build_fnt_recursive(subtables, *child)?;
280            }
281        }
282        Ok(())
283    }
284
285    /// Builds an FNT from this [`FileSystem`].
286    ///
287    /// # Errors
288    ///
289    /// This function will return an error if a file/directory name contains non-ASCII characters.
290    pub fn build_fnt(&self) -> Result<Fnt, FileBuildError> {
291        let mut subtables = vec![];
292        self.build_fnt_recursive(&mut subtables, ROOT_DIR_ID)?;
293        Ok(Fnt { subtables: subtables.into_boxed_slice() })
294    }
295
296    fn compare_for_fnt(a: &str, a_dir: bool, b: &str, b_dir: bool) -> Ordering {
297        let files_first = a_dir.cmp(&b_dir);
298        if files_first.is_ne() {
299            return files_first;
300        }
301
302        // Convert to Shift-JIS first, *then* convert to lowercase byte-by-byte
303        // without accounting for multibyte characters like コ (83 52, but sorted
304        // as if it was actually ビ / 83 72). This strcasecmp-like behavior was
305        // observed in 999's Japanese file names.
306        let (mut a_bytes, _, _) = SHIFT_JIS.encode(a);
307        let (mut b_bytes, _, _) = SHIFT_JIS.encode(b);
308        let a_vec = a_bytes.to_mut();
309        let b_vec = b_bytes.to_mut();
310        a_vec.make_ascii_lowercase();
311        b_vec.make_ascii_lowercase();
312
313        // Lexicographic, case-insensitive Shift-JIS order
314        a_vec.cmp(&b_vec)
315    }
316
317    fn sort_for_fnt_in(&mut self, parent_id: u16) {
318        let mut parent = self.dir(parent_id).clone();
319        parent
320            .children
321            .sort_by(|a, b| Self::compare_for_fnt(self.name(*a), Self::is_dir(*a), self.name(*b), Self::is_dir(*b)));
322
323        for child in &mut parent.children {
324            if Self::is_dir(*child) {
325                self.sort_for_fnt_in(*child);
326            }
327        }
328
329        *self.dir_mut(parent_id) = parent;
330    }
331
332    /// Sorts the entire [`FileSystem`] so that it's laid out in the right order for the FNT.
333    pub fn sort_for_fnt(&mut self) {
334        self.sort_for_fnt_in(ROOT_DIR_ID);
335    }
336
337    fn compare_for_rom(a: &str, b: &str) -> Ordering {
338        // Lexicographic UTF-8 order
339        a.cmp(b)
340    }
341
342    fn sort_for_rom_in(&mut self, parent_id: u16) {
343        let mut parent = self.dir(parent_id).clone();
344        parent.children.sort_by(|a, b| Self::compare_for_rom(self.name(*a), self.name(*b)));
345
346        for child in &mut parent.children {
347            if Self::is_dir(*child) {
348                self.sort_for_rom_in(*child);
349            }
350        }
351
352        *self.dir_mut(parent_id) = parent;
353    }
354
355    /// Sorts the entire [`FileSystem`] so that files laid out in the right order for appending to the ROM.
356    pub fn sort_for_rom(&mut self) {
357        self.sort_for_rom_in(ROOT_DIR_ID);
358    }
359
360    fn find_path_in(&self, path: &str, parent_id: u16) -> Option<u16> {
361        let parent = &self.dir(parent_id);
362        let (child_name, next) = path.split_once('/').map(|(c, n)| (c, Some(n))).unwrap_or((path, None));
363        let child = parent.children.iter().find(|id| self.name(**id) == child_name)?;
364        if let Some(next) = next {
365            if Self::is_dir(*child) {
366                self.find_path_in(next, *child)
367            } else {
368                None
369            }
370        } else {
371            Some(*child)
372        }
373    }
374
375    fn find_path(&self, path: &str) -> Option<u16> {
376        self.find_path_in(path, ROOT_DIR_ID)
377    }
378
379    fn make_child_dir(&mut self, name: String, parent_id: u16) -> &Dir {
380        let id = self.next_dir_id;
381        self.dirs.push(Dir { id, name, parent_id, children: vec![] });
382        let parent = self.dir_mut(parent_id);
383        parent.children.push(id);
384        self.next_dir_id += 1;
385        self.dirs.last().unwrap()
386    }
387
388    fn make_child_file(&mut self, name: String, parent_id: u16, contents: Vec<u8>) -> &File {
389        let id = self.next_file_id;
390        self.files.push(File { id, name, original_offset: 0, contents: contents.into() });
391        let parent = self.dir_mut(parent_id);
392        parent.children.push(id);
393        self.next_file_id += 1;
394        self.files.last().unwrap()
395    }
396
397    fn traverse_nonvisited_files<Cb>(&self, visited: &mut HashSet<u16>, callback: &mut Cb, subdir: &Dir, path: &Path)
398    where
399        Cb: FnMut(&File, &Path),
400    {
401        if visited.contains(&subdir.id) {
402            return;
403        }
404        for child in &subdir.children {
405            if visited.contains(child) {
406                continue;
407            }
408
409            if Self::is_dir(*child) {
410                let path = path.join(self.name(*child));
411                self.traverse_nonvisited_files(visited, callback, self.dir(*child), &path);
412            } else {
413                callback(self.file(*child), path);
414                let first_time_visiting_file = visited.insert(*child);
415                assert!(first_time_visiting_file);
416            }
417        }
418        let first_time_visiting_file = visited.insert(subdir.id);
419        assert!(first_time_visiting_file);
420    }
421
422    /// Traverses the [`FileSystem`] and calls `callback` for each file found. The directories will be prioritized according to
423    /// the `path_order`.
424    pub fn traverse_files<I, Cb>(&self, path_order: I, mut callback: Cb)
425    where
426        I: IntoIterator<Item = &'a str>,
427        Cb: FnMut(&File, &Path),
428    {
429        let mut visited = HashSet::<u16>::new();
430
431        for path in path_order {
432            let path = path.strip_prefix("/").unwrap_or(path);
433            let path_buf = &PathBuf::from_str(path).unwrap();
434            let subdir = if path.trim() == "" {
435                self.dir(ROOT_DIR_ID)
436            } else {
437                let Some(child) = self.find_path(path) else { continue };
438                if visited.contains(&child) {
439                    continue;
440                }
441
442                if Self::is_dir(child) {
443                    self.dir(child)
444                } else {
445                    let file = self.file(child);
446                    callback(file, path_buf);
447                    let first_time_visiting_file = visited.insert(file.id);
448                    assert!(first_time_visiting_file);
449                    continue;
450                }
451            };
452            self.traverse_nonvisited_files(&mut visited, &mut callback, subdir, path_buf);
453        }
454    }
455
456    fn max_file_id_in(&self, parent_id: u16) -> u16 {
457        let mut max_id = 0;
458        let parent = self.dir(parent_id);
459        for child in &parent.children {
460            let id = if Self::is_dir(*child) { self.max_file_id_in(*child) } else { *child };
461            if id > max_id {
462                max_id = id;
463            }
464        }
465        max_id
466    }
467
468    /// Returns the max file ID of this [`FileSystem`].
469    pub fn max_file_id(&self) -> u16 {
470        self.max_file_id_in(ROOT_DIR_ID)
471    }
472
473    /// Creates a [`DisplayFileSystem`] which implements [`Display`].
474    pub fn display(&self, indent: usize) -> DisplayFileSystem {
475        DisplayFileSystem { files: self, parent_id: ROOT_DIR_ID, indent }
476    }
477
478    fn traverse_and_compute_path_order(&self, path: &str, path_order: &mut BinaryHeap<PathOrder>, parent: &Dir) {
479        for child in &parent.children {
480            let path = format!("{}/{}", path, self.name(*child));
481            if Self::is_dir(*child) {
482                self.traverse_and_compute_path_order(path.as_str(), path_order, self.dir(*child));
483            } else {
484                path_order.push(PathOrder {
485                    id: *child,
486                    parent_id: parent.id,
487                    path_name: path,
488                    offset: self.file(*child).original_offset,
489                });
490            }
491        }
492    }
493
494    fn are_paths_sorted(&self, paths: &[PathOrder]) -> bool {
495        paths.windows(2).all(|w| Self::compare_for_rom(self.name(w[0].id), self.name(w[1].id)).is_lt())
496    }
497
498    /// Computes the path order that the [`FileSystem`] is currently in. This can be saved and reused in
499    /// [`Self::traverse_files`] to traverse in the same order later.
500    pub fn compute_path_order(&self) -> Vec<String> {
501        let mut path_order = BinaryHeap::new();
502        self.traverse_and_compute_path_order("", &mut path_order, self.dir(ROOT_DIR_ID));
503        let mut paths = path_order.into_sorted_vec();
504
505        // Loop to simplify path order
506        let mut children_start = 0;
507        while children_start < paths.len() {
508            let parent_id = paths[children_start].parent_id;
509            if parent_id == 0 {
510                children_start += 1;
511                continue;
512            }
513
514            // Find all surrounding children with the same parent as the current child
515            let children_end = paths[children_start..]
516                .iter()
517                .position(|c| c.parent_id != parent_id)
518                .map(|pos| children_start + pos)
519                .unwrap_or(paths.len());
520            children_start = paths[..children_start]
521                .iter()
522                .enumerate()
523                .rev()
524                .find_map(|(index, child)| (child.parent_id != parent_id).then_some(index + 1))
525                .unwrap_or(0);
526            let num_children = children_end - children_start;
527
528            let parent = self.dir(parent_id);
529            let num_unvisited_children =
530                parent.children.iter().filter(|c| !paths[..children_start].iter().any(|p| p.id == **c)).count();
531
532            // Check if the child count matches the parent (excluding child paths which already exist in the path order)
533            // Also check that the children are sorted, so that simplifying the path order doesn't affect the resulting order of files
534            if num_children == num_unvisited_children && self.are_paths_sorted(&paths[children_start..children_end]) {
535                let mut path_name =
536                    paths[children_start].path_name.rsplit_once('/').map(|(parent, _)| parent).unwrap_or("/").to_string();
537                if path_name.is_empty() {
538                    path_name = "/".to_string();
539                }
540
541                // Replace the children with their parent
542                let offset = paths[children_start].offset;
543                paths.drain(children_start..children_end);
544                paths.insert(children_start, PathOrder { id: parent_id, parent_id: parent.parent_id, path_name, offset });
545            } else {
546                children_start = children_end;
547            }
548        }
549
550        paths.into_iter().map(|p| p.path_name).collect()
551    }
552}
553
554impl File<'_> {
555    /// Returns a reference to the name of this [`File`].
556    pub fn name(&self) -> &str {
557        &self.name
558    }
559
560    /// Returns the ID of this [`File`].
561    pub fn id(&self) -> u16 {
562        self.id
563    }
564
565    /// Returns a reference to the contents of this [`File`].
566    pub fn contents(&self) -> &[u8] {
567        &self.contents
568    }
569}
570
571impl Dir {
572    /// Returns whether this [`Dir`] is the root directory.
573    pub fn is_root(&self) -> bool {
574        self.id == ROOT_DIR_ID
575    }
576}
577
578#[derive(PartialEq, Eq, Clone, Debug)]
579struct PathOrder {
580    id: u16,
581    parent_id: u16,
582    path_name: String,
583    offset: u32,
584}
585
586impl PartialOrd for PathOrder {
587    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
588        Some(self.cmp(other))
589    }
590}
591
592impl Ord for PathOrder {
593    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
594        self.offset.cmp(&other.offset)
595    }
596}
597
598/// Can be used to display the file hierarchy of a [`FileSystem`].
599pub struct DisplayFileSystem<'a> {
600    files: &'a FileSystem<'a>,
601    parent_id: u16,
602    indent: usize,
603}
604
605impl Display for DisplayFileSystem<'_> {
606    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
607        let i = " ".repeat(self.indent);
608        let parent = self.files.dir(self.parent_id);
609        let files = &self.files;
610        for child in &parent.children {
611            if FileSystem::is_dir(*child) {
612                write!(f, "{i}0x{:04x}: {: <32}", *child, files.name(*child))?;
613                writeln!(f)?;
614                write!(f, "{}", Self { files, parent_id: *child, indent: self.indent + 2 })?;
615            } else {
616                let file = files.file(*child);
617                let size = BlobSize(file.contents.len()).to_string();
618                write!(f, "{i}0x{:04x}: {: <48}{size: >7}", file.id, file.name)?;
619                writeln!(f)?;
620            }
621        }
622        Ok(())
623    }
624}