liteboxfs 0.2.0

A modern POSIX filesystem in a SQLite database
Documentation
use std::{
    collections::{HashMap, hash_map::Entry},
    ffi::{OsStr, OsString},
    iter::FusedIterator,
    path::{Path, PathBuf},
};

use crate::{
    FileId, FileKind, FileMetadata, FilesystemId, block::FileId as StoreFileId,
    file_metadata::RawMetadata, sql::AncestorPathId,
};

/// An entry when listing a directory's children or descendants.
///
/// This is returned by the iterators [`Children`] and [`Descendants`].
#[derive(Debug, Clone)]
pub struct DirectoryEntry {
    path: PathBuf,
    name: OsString,
    file_id: StoreFileId,
    kind: FileKind,
    metadata: FileMetadata,
    filesystem_id: FilesystemId,
}

impl DirectoryEntry {
    /// The name of the file.
    pub fn name(&self) -> &OsStr {
        &self.name
    }

    /// The absolute path of the file.
    pub fn path(&self) -> &Path {
        &self.path
    }

    /// The [`FileKind`] of the file.
    pub fn kind(&self) -> &FileKind {
        &self.kind
    }

    /// Get a [`FileId`] which uniquely identifies the file in the filesystem.
    pub fn file_id(&self) -> FileId {
        FileId::new(self.file_id, self.filesystem_id)
    }

    /// The metadata for the file.
    pub fn metadata(&self) -> &FileMetadata {
        &self.metadata
    }
}

/// An iterator over the immediate children of a directory.
///
/// This returns entries of type [`DirectoryEntry`].
///
/// You can get a [`Children`] iterator with [`Filesystem::children`].
///
/// [`Filesystem::children`]: crate::Filesystem::children
#[derive(Debug)]
pub struct Children {
    rows: Vec<ChildrenRow>,
    parent_path: PathBuf,
    filesystem_id: FilesystemId,
}

impl Children {
    pub(crate) fn new(
        rows: Vec<ChildrenRow>,
        parent_path: PathBuf,
        filesystem_id: FilesystemId,
    ) -> Self {
        Self {
            rows,
            parent_path,
            filesystem_id,
        }
    }
}

impl Iterator for Children {
    type Item = DirectoryEntry;

    fn next(&mut self) -> Option<Self::Item> {
        let row = self.rows.pop()?;

        Some(DirectoryEntry {
            path: self.parent_path.join(&row.name),
            name: row.name,
            file_id: row.file_id,
            kind: row.kind,
            metadata: FileMetadata::from_raw(row.metadata),
            filesystem_id: self.filesystem_id,
        })
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        (self.rows.len(), Some(self.rows.len()))
    }
}

impl ExactSizeIterator for Children {}

impl FusedIterator for Children {}

/// An iterator over all the descendants of a directory.
///
/// This returns entries of type [`DirectoryEntry`].
///
/// You can get a [`Descendants`] iterator with [`Filesystem::descendants`].
///
/// [`Filesystem::descendants`]: crate::Filesystem::descendants
#[derive(Debug)]
pub struct Descendants {
    rows: Vec<DescendantsRow>,
    // This maps path row IDs to their full absolute paths. Once all of a path's descendants have
    // been returned by the iterator, it is removed from this map.
    //
    // The purpose of this approach is to reduce memory usage over storing the entire tree of paths
    // in memory. Instead, we only store the full tree of file names in memory and reconstruct the
    // full paths as we go.
    path_cache: HashMap<i64, PathBuf>,
    // This maps path row IDs to the number of children they have left to be iterated over. Once
    // this reaches zero for a path, it is removed from this map and `path_cache` above.
    remaining_children: HashMap<i64, usize>,
    filesystem_id: FilesystemId,
}

impl Descendants {
    pub(crate) fn new(
        mut rows: Vec<DescendantsRow>,
        ancestor_id: AncestorPathId,
        base_path: PathBuf,
        filesystem_id: FilesystemId,
    ) -> Self {
        let mut remaining_children: HashMap<i64, usize> = HashMap::new();
        for row in &rows {
            *remaining_children.entry(row.parent_path_id).or_insert(0) += 1;
        }

        let mut path_cache = HashMap::new();
        if remaining_children.contains_key(&ancestor_id.0) {
            path_cache.insert(ancestor_id.0, base_path);
        }

        rows.reverse();

        Self {
            rows,
            path_cache,
            remaining_children,
            filesystem_id,
        }
    }
}

impl Iterator for Descendants {
    type Item = DirectoryEntry;

    fn next(&mut self) -> Option<Self::Item> {
        let row = self.rows.pop()?;

        let parent_path = self
            .path_cache
            .get(&row.parent_path_id)
            .expect("Parent paths should be returned before their children.");
        let path = parent_path.join(&row.name);

        if let Entry::Occupied(mut entry) = self.remaining_children.entry(row.parent_path_id) {
            *entry.get_mut() -= 1;
            if *entry.get() == 0 {
                entry.remove();
                self.path_cache.remove(&row.parent_path_id);
            }
        }

        // Only cache directories that still have children yet to be returned by the iterator,
        // since only they will appear as a parent in subsequent rows.
        if matches!(row.kind, FileKind::Dir) && self.remaining_children.contains_key(&row.path_id) {
            self.path_cache.insert(row.path_id, path.clone());
        }

        Some(DirectoryEntry {
            path,
            name: row.name,
            file_id: row.file_id,
            kind: row.kind,
            filesystem_id: self.filesystem_id,
            metadata: FileMetadata::from_raw(row.metadata),
        })
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        (self.rows.len(), Some(self.rows.len()))
    }
}

impl ExactSizeIterator for Descendants {}

impl FusedIterator for Descendants {}

#[derive(Debug)]
pub struct ChildrenRow {
    pub ancestor_id: AncestorPathId,
    pub name: OsString,
    pub file_id: StoreFileId,
    pub kind: FileKind,
    pub metadata: RawMetadata,
}

#[derive(Debug)]
pub struct DescendantsRow {
    pub path_id: i64,
    pub name: OsString,
    pub parent_path_id: i64,
    pub file_id: StoreFileId,
    pub kind: FileKind,
    pub metadata: RawMetadata,
}

/// An entry when walking a directory tree with [`Filesystem::walk`].
///
/// [`Filesystem::walk`]: crate::Filesystem::walk
#[derive(Debug)]
pub struct WalkEntry<'fs> {
    pub(super) path: &'fs Path,
    pub(super) name: &'fs OsStr,
    pub(super) file_id: StoreFileId,
    pub(super) kind: &'fs FileKind,
    pub(super) metadata: &'fs FileMetadata,
    pub(super) filesystem_id: FilesystemId,
}

impl<'fs> WalkEntry<'fs> {
    /// The name of the file.
    pub fn name(&self) -> &OsStr {
        self.name
    }

    /// The absolute path of the file.
    pub fn path(&self) -> &Path {
        self.path
    }

    /// The [`FileKind`] of the file.
    pub fn kind(&self) -> &FileKind {
        self.kind
    }

    /// Get a [`FileId`] which uniquely identifies the file in the filesystem.
    pub fn file_id(&self) -> FileId {
        FileId::new(self.file_id, self.filesystem_id)
    }

    /// The metadata for the file.
    pub fn metadata(&self) -> &FileMetadata {
        self.metadata
    }
}

/// A value emitted when walking a directory tree with [`Filesystem::walk`].
///
/// This is used with [`Filesystem::walk`], which accepts a callback that is passed a [`WalkVisit`]
/// for each file visited during the walk.
///
/// Each file visited emits a [`WalkVisit::File`] or [`WalkVisit::EnterDir`]. Directories
/// additionally emit a [`WalkVisit::LeaveDir`] after all of their descendants have been visited.
///
/// [`Filesystem::walk`]: crate::Filesystem::walk
#[derive(Debug)]
pub enum WalkVisit<'fs> {
    /// A regular or special file.
    File(WalkEntry<'fs>),

    /// A directory, before its children are visited.
    EnterDir(WalkEntry<'fs>),

    /// A directory, after its descendants are visited.
    LeaveDir(&'fs Path),
}

/// A value that controls which files are visited by [`Filesystem::walk`].
///
/// [`Filesystem::walk`]: crate::Filesystem::walk
#[derive(Debug)]
pub enum WalkPredicate<R> {
    /// Continue walking the tree.
    Continue,

    /// Continue without visiting the siblings of the current file or directory.
    ///
    /// When used on [`WalkVisit::EnterDir`], the directory's descendants will still be visited.
    SkipSiblings,

    /// Continue without visiting the descendants of the current directory.
    ///
    /// This is only meaningful when used on [`WalkVisit::EnterDir`].
    SkipDescendants,

    /// Stop and return with the given value.
    Stop(R),
}

/// Options for controlling the behavior of [`Filesystem::walk`].
///
/// [`Filesystem::walk`]: crate::Filesystem::walk
#[derive(Debug)]
pub struct WalkOptions {
    pub(super) max_depth: Option<usize>,
    pub(super) follow_symlinks: bool,
}

impl Default for WalkOptions {
    fn default() -> Self {
        Self::new()
    }
}

impl WalkOptions {
    /// Create a new [`WalkOptions`] with default values.
    pub fn new() -> Self {
        Self {
            max_depth: None,
            follow_symlinks: false,
        }
    }

    /// Set the maximum depth to walk.
    ///
    /// `None` means there is no limit. `Some(0)` means only the starting directory is visited.
    /// `Some(1)` means the starting directory and its immediate children are visited, and so on.
    ///
    /// The default is `None`.
    pub fn max_depth(mut self, depth: Option<usize>) -> Self {
        self.max_depth = depth;
        self
    }

    /// Set whether to follow symbolic links to directories.
    ///
    /// When `true`, a symlink that points to a directory is visited as [`WalkVisit::EnterDir`], but
    /// the [`WalkEntry`] still describes the symlink itself, not its target. Descendants of the
    /// target directory appear as if they are descendants of the symlink.
    ///
    /// A symlink target that is broken or not a directory is not followed and is visited as
    /// [`WalkVisit::File`].
    ///
    /// If following a symbolic link would trigger a loop, the walk returns [`Error::SymlinkLoop`].
    ///
    /// The default is `false`.
    ///
    /// [`Error::SymlinkLoop`]: crate::Error::SymlinkLoop
    pub fn follow_symlinks(mut self, follow: bool) -> Self {
        self.follow_symlinks = follow;
        self
    }
}