Skip to main content

rustledger_loader/
vfs.rs

1//! Virtual filesystem abstraction for platform-agnostic file loading.
2//!
3//! This module provides a trait for abstracting file system operations,
4//! enabling the loader to work with both real filesystems and in-memory
5//! file maps (useful for WASM environments).
6
7use crate::LoadError;
8use std::collections::HashMap;
9use std::fs;
10use std::path::{Path, PathBuf};
11use std::sync::Arc;
12
13/// Abstract file system interface for file loading.
14///
15/// This trait allows the loader to work with different file system backends:
16/// - [`DiskFileSystem`]: Reads from the actual filesystem (default)
17/// - [`VirtualFileSystem`]: Reads from an in-memory file map (for WASM)
18pub trait FileSystem: Send + Sync + std::fmt::Debug {
19    /// Read file content at the given path.
20    ///
21    /// # Errors
22    ///
23    /// Returns [`LoadError::Io`] if the file cannot be read.
24    fn read(&self, path: &Path) -> Result<Arc<str>, LoadError>;
25
26    /// Check if a file exists at the given path.
27    fn exists(&self, path: &Path) -> bool;
28
29    /// Check if a path is a GPG-encrypted file.
30    ///
31    /// For virtual filesystems, this always returns false since
32    /// encrypted files should be decrypted before being added.
33    fn is_encrypted(&self, path: &Path) -> bool;
34
35    /// Normalize a path for this filesystem.
36    ///
37    /// For disk filesystems, this makes paths absolute.
38    /// For virtual filesystems, this just cleans up the path.
39    fn normalize(&self, path: &Path) -> PathBuf;
40
41    /// Whether this filesystem supports parallel file reads.
42    ///
43    /// Disk filesystems return `true` — multiple files can be read
44    /// concurrently from different threads. Virtual filesystems return
45    /// `false` since they may use shared mutable state.
46    fn supports_parallel_read(&self) -> bool {
47        false
48    }
49
50    /// Expand a glob pattern and return matching paths.
51    ///
52    /// # Errors
53    ///
54    /// Returns an error string if the pattern is invalid.
55    fn glob(&self, pattern: &str) -> Result<Vec<PathBuf>, String> {
56        let _ = pattern;
57        Err("glob is not supported by this filesystem".to_string())
58    }
59}
60
61/// Default filesystem that reads from disk.
62///
63/// This is the standard implementation used by the CLI and other
64/// filesystem-based tools.
65#[derive(Debug, Default, Clone)]
66pub struct DiskFileSystem;
67
68impl FileSystem for DiskFileSystem {
69    fn read(&self, path: &Path) -> Result<Arc<str>, LoadError> {
70        let bytes = fs::read(path).map_err(|e| LoadError::Io {
71            path: path.to_path_buf(),
72            source: e,
73        })?;
74
75        // Try zero-copy conversion first (common case), fall back to lossy
76        let content = match String::from_utf8(bytes) {
77            Ok(s) => s,
78            Err(e) => String::from_utf8_lossy(e.as_bytes()).into_owned(),
79        };
80
81        Ok(content.into())
82    }
83
84    fn exists(&self, path: &Path) -> bool {
85        path.exists()
86    }
87
88    fn is_encrypted(&self, path: &Path) -> bool {
89        match path.extension().and_then(|e| e.to_str()) {
90            Some("gpg") => true,
91            Some("asc") => {
92                // Check for PGP header in the first 1024 bytes.
93                // Only read what we need instead of the entire file.
94                use std::io::Read;
95                let Ok(file) = std::fs::File::open(path) else {
96                    return false;
97                };
98                let mut buf = [0u8; 1024];
99                let n = file.take(1024).read(&mut buf).unwrap_or(0);
100                let header = String::from_utf8_lossy(&buf[..n]);
101                header.contains("-----BEGIN PGP MESSAGE-----")
102            }
103            _ => false,
104        }
105    }
106
107    fn normalize(&self, path: &Path) -> PathBuf {
108        // Try canonicalize first (works on most platforms, resolves symlinks)
109        if let Ok(canonical) = path.canonicalize() {
110            return canonical;
111        }
112
113        // Fallback: make absolute without resolving symlinks (WASI-compatible)
114        if path.is_absolute() {
115            path.to_path_buf()
116        } else if let Ok(cwd) = std::env::current_dir() {
117            // Join with current directory and clean up the path
118            let mut result = cwd;
119            for component in path.components() {
120                match component {
121                    std::path::Component::ParentDir => {
122                        result.pop();
123                    }
124                    std::path::Component::Normal(s) => {
125                        result.push(s);
126                    }
127                    std::path::Component::CurDir => {}
128                    std::path::Component::RootDir => {
129                        result = PathBuf::from("/");
130                    }
131                    std::path::Component::Prefix(p) => {
132                        result = PathBuf::from(p.as_os_str());
133                    }
134                }
135            }
136            result
137        } else {
138            // Last resort: just return the path as-is
139            path.to_path_buf()
140        }
141    }
142
143    fn glob(&self, pattern: &str) -> Result<Vec<PathBuf>, String> {
144        let entries = glob::glob(pattern).map_err(|e| e.to_string())?;
145        // Skip entries that error (e.g., permission denied) rather than
146        // failing the entire glob. The loader will catch missing/unreadable
147        // files later when it tries to read them.
148        let mut matched: Vec<PathBuf> = entries.filter_map(Result::ok).collect();
149        matched.sort();
150        Ok(matched)
151    }
152
153    fn supports_parallel_read(&self) -> bool {
154        true
155    }
156}
157
158/// In-memory virtual filesystem for WASM and testing.
159///
160/// This implementation stores files in a `HashMap`, allowing the loader
161/// to resolve includes without actual filesystem access. This is essential
162/// for WASM environments where filesystem access is not available.
163///
164/// # Example
165///
166/// ```
167/// use rustledger_loader::VirtualFileSystem;
168/// use std::path::PathBuf;
169///
170/// let mut vfs = VirtualFileSystem::new();
171/// vfs.add_file("main.beancount", "include \"accounts.beancount\"");
172/// vfs.add_file("accounts.beancount", "2024-01-01 open Assets:Bank USD");
173/// ```
174#[derive(Debug, Default, Clone)]
175pub struct VirtualFileSystem {
176    files: HashMap<PathBuf, Arc<str>>,
177}
178
179impl VirtualFileSystem {
180    /// Create a new empty virtual filesystem.
181    #[must_use]
182    pub fn new() -> Self {
183        Self::default()
184    }
185
186    /// Add a file to the virtual filesystem.
187    ///
188    /// The path is normalized to handle different path separators
189    /// and relative paths consistently.
190    pub fn add_file(&mut self, path: impl AsRef<Path>, content: impl Into<String>) {
191        let normalized = normalize_vfs_path(path.as_ref());
192        self.files.insert(normalized, content.into().into());
193    }
194
195    /// Add multiple files from a map.
196    ///
197    /// This is a convenience method for adding many files at once.
198    pub fn add_files(
199        &mut self,
200        files: impl IntoIterator<Item = (impl AsRef<Path>, impl Into<String>)>,
201    ) {
202        for (path, content) in files {
203            self.add_file(path, content);
204        }
205    }
206
207    /// Create a virtual filesystem from a map of files.
208    #[must_use]
209    pub fn from_files(
210        files: impl IntoIterator<Item = (impl AsRef<Path>, impl Into<String>)>,
211    ) -> Self {
212        let mut vfs = Self::new();
213        vfs.add_files(files);
214        vfs
215    }
216
217    /// Get the number of files in the virtual filesystem.
218    #[must_use]
219    pub fn len(&self) -> usize {
220        self.files.len()
221    }
222
223    /// Check if the virtual filesystem is empty.
224    #[must_use]
225    pub fn is_empty(&self) -> bool {
226        self.files.is_empty()
227    }
228}
229
230impl FileSystem for VirtualFileSystem {
231    fn read(&self, path: &Path) -> Result<Arc<str>, LoadError> {
232        let normalized = normalize_vfs_path(path);
233
234        self.files
235            .get(&normalized)
236            .cloned()
237            .ok_or_else(|| LoadError::Io {
238                path: path.to_path_buf(),
239                source: std::io::Error::new(
240                    std::io::ErrorKind::NotFound,
241                    format!("file not found in virtual filesystem: {}", path.display()),
242                ),
243            })
244    }
245
246    fn exists(&self, path: &Path) -> bool {
247        let normalized = normalize_vfs_path(path);
248        self.files.contains_key(&normalized)
249    }
250
251    fn is_encrypted(&self, _path: &Path) -> bool {
252        // Virtual filesystem doesn't support encrypted files
253        // Users should decrypt before adding to VFS
254        false
255    }
256
257    fn normalize(&self, path: &Path) -> PathBuf {
258        // For virtual filesystem, just clean up the path without making it absolute
259        normalize_vfs_path(path)
260    }
261
262    fn glob(&self, pattern: &str) -> Result<Vec<PathBuf>, String> {
263        // Normalize the pattern the same way stored keys are normalized,
264        // so that backslashes or leading "./" in the pattern still match.
265        let normalized = pattern.replace('\\', "/");
266        let normalized = normalized.strip_prefix("./").unwrap_or(&normalized);
267        let glob_pattern = glob::Pattern::new(normalized).map_err(|e| e.to_string())?;
268        let mut matched: Vec<PathBuf> = self
269            .files
270            .keys()
271            .filter(|path| glob_pattern.matches_path(path))
272            .cloned()
273            .collect();
274        matched.sort();
275        Ok(matched)
276    }
277}
278
279/// Normalize a path for virtual filesystem storage and lookup.
280///
281/// This handles:
282/// - Converting backslashes to forward slashes
283/// - Removing leading `./`
284/// - Simplifying `..` components where possible
285fn normalize_vfs_path(path: &Path) -> PathBuf {
286    let path_str = path.to_string_lossy();
287
288    // Convert backslashes to forward slashes
289    let normalized = path_str.replace('\\', "/");
290
291    // Remove leading ./
292    let normalized = normalized.strip_prefix("./").unwrap_or(&normalized);
293
294    // Build normalized path
295    let mut components = Vec::new();
296    for part in normalized.split('/') {
297        match part {
298            "" | "." => {}
299            ".." => {
300                // Only pop if we have non-root components
301                if !components.is_empty() && components.last() != Some(&"..") {
302                    components.pop();
303                } else {
304                    components.push("..");
305                }
306            }
307            _ => components.push(part),
308        }
309    }
310
311    if components.is_empty() {
312        PathBuf::from(".")
313    } else {
314        PathBuf::from(components.join("/"))
315    }
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321
322    #[test]
323    fn test_normalize_vfs_path() {
324        assert_eq!(
325            normalize_vfs_path(Path::new("foo/bar")),
326            PathBuf::from("foo/bar")
327        );
328        assert_eq!(
329            normalize_vfs_path(Path::new("./foo/bar")),
330            PathBuf::from("foo/bar")
331        );
332        assert_eq!(
333            normalize_vfs_path(Path::new("foo/../bar")),
334            PathBuf::from("bar")
335        );
336        assert_eq!(
337            normalize_vfs_path(Path::new("foo/./bar")),
338            PathBuf::from("foo/bar")
339        );
340        assert_eq!(
341            normalize_vfs_path(Path::new("foo\\bar")),
342            PathBuf::from("foo/bar")
343        );
344    }
345
346    #[test]
347    fn test_virtual_filesystem_basic() {
348        let mut vfs = VirtualFileSystem::new();
349        vfs.add_file("test.beancount", "2024-01-01 open Assets:Bank USD");
350
351        assert!(vfs.exists(Path::new("test.beancount")));
352        assert!(!vfs.exists(Path::new("nonexistent.beancount")));
353
354        let content = vfs.read(Path::new("test.beancount")).unwrap();
355        assert_eq!(&*content, "2024-01-01 open Assets:Bank USD");
356    }
357
358    #[test]
359    fn test_virtual_filesystem_path_normalization() {
360        let mut vfs = VirtualFileSystem::new();
361        vfs.add_file("foo/bar.beancount", "content");
362
363        // Should find with normalized path
364        assert!(vfs.exists(Path::new("foo/bar.beancount")));
365        assert!(vfs.exists(Path::new("./foo/bar.beancount")));
366
367        // Content should be accessible
368        let content = vfs.read(Path::new("./foo/bar.beancount")).unwrap();
369        assert_eq!(&*content, "content");
370    }
371
372    #[test]
373    fn test_virtual_filesystem_not_encrypted() {
374        let vfs = VirtualFileSystem::new();
375
376        // Virtual filesystem never reports files as encrypted
377        assert!(!vfs.is_encrypted(Path::new("test.gpg")));
378        assert!(!vfs.is_encrypted(Path::new("test.asc")));
379    }
380
381    #[test]
382    fn test_virtual_filesystem_from_files() {
383        let vfs = VirtualFileSystem::from_files([
384            ("a.beancount", "content a"),
385            ("b.beancount", "content b"),
386        ]);
387
388        assert_eq!(vfs.len(), 2);
389        assert!(vfs.exists(Path::new("a.beancount")));
390        assert!(vfs.exists(Path::new("b.beancount")));
391    }
392}