Skip to main content

chainsaw/
vfs.rs

1//! Virtual filesystem abstraction for dependency graph construction.
2//!
3//! The [`Vfs`] trait abstracts filesystem access so the graph-building pipeline
4//! can operate transparently on the real filesystem ([`OsVfs`]) or on git tree
5//! objects ([`GitTreeVfs`]). This enables in-process git ref diffs without
6//! spawning worktrees.
7
8use std::collections::{HashMap, HashSet};
9use std::io;
10use std::path::{Path, PathBuf};
11use std::sync::Arc;
12use std::time::SystemTime;
13
14/// Metadata returned by [`Vfs::metadata`].
15pub struct VfsMetadata {
16    pub len: u64,
17    pub is_file: bool,
18    pub is_dir: bool,
19    /// File modification time (nanos since epoch). `None` for non-OS sources
20    /// like git tree objects that have no mtime.
21    pub mtime_nanos: Option<u128>,
22}
23
24/// Filesystem abstraction for the graph-building pipeline.
25///
26/// All methods mirror their `std::fs` counterparts. Implementations must be
27/// safe to call from multiple threads concurrently.
28pub trait Vfs: Send + Sync {
29    fn read_to_string(&self, path: &Path) -> io::Result<String>;
30    fn read(&self, path: &Path) -> io::Result<Vec<u8>>;
31    fn metadata(&self, path: &Path) -> io::Result<VfsMetadata>;
32    fn exists(&self, path: &Path) -> bool;
33    fn is_dir(&self, path: &Path) -> bool;
34    fn is_file(&self, path: &Path) -> bool;
35    fn read_dir(&self, path: &Path) -> io::Result<Vec<PathBuf>>;
36    fn canonicalize(&self, path: &Path) -> io::Result<PathBuf>;
37
38    /// Read file content and metadata in one operation. The default calls
39    /// `read_to_string` + `metadata` separately; `OsVfs` overrides this to
40    /// reuse the file descriptor (open + fstat + read = 3 syscalls, not 4).
41    fn read_with_metadata(&self, path: &Path) -> io::Result<(String, VfsMetadata)> {
42        let content = self.read_to_string(path)?;
43        let meta = self.metadata(path)?;
44        Ok((content, meta))
45    }
46}
47
48/// Pass-through to `std::fs`. Zero overhead for normal (non-git) operation.
49pub struct OsVfs;
50
51fn fs_meta_to_vfs(meta: &std::fs::Metadata) -> VfsMetadata {
52    let mtime_nanos = meta
53        .modified()
54        .ok()
55        .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
56        .map(|d| d.as_nanos());
57    VfsMetadata {
58        len: meta.len(),
59        is_file: meta.is_file(),
60        is_dir: meta.is_dir(),
61        mtime_nanos,
62    }
63}
64
65impl Vfs for OsVfs {
66    fn read_to_string(&self, path: &Path) -> io::Result<String> {
67        std::fs::read_to_string(path)
68    }
69
70    fn read(&self, path: &Path) -> io::Result<Vec<u8>> {
71        std::fs::read(path)
72    }
73
74    fn metadata(&self, path: &Path) -> io::Result<VfsMetadata> {
75        std::fs::metadata(path).map(|m| fs_meta_to_vfs(&m))
76    }
77
78    fn exists(&self, path: &Path) -> bool {
79        path.exists()
80    }
81
82    fn is_dir(&self, path: &Path) -> bool {
83        path.is_dir()
84    }
85
86    fn is_file(&self, path: &Path) -> bool {
87        path.is_file()
88    }
89
90    fn read_dir(&self, path: &Path) -> io::Result<Vec<PathBuf>> {
91        let mut entries = Vec::new();
92        for entry in std::fs::read_dir(path)? {
93            entries.push(entry?.path());
94        }
95        Ok(entries)
96    }
97
98    fn canonicalize(&self, path: &Path) -> io::Result<PathBuf> {
99        std::fs::canonicalize(path)
100    }
101
102    /// Open + fstat + read in one pass (3 syscalls, not 4).
103    fn read_with_metadata(&self, path: &Path) -> io::Result<(String, VfsMetadata)> {
104        use std::io::Read;
105
106        let mut file = std::fs::File::open(path)?;
107        let meta = fs_meta_to_vfs(&file.metadata()?);
108        #[allow(clippy::cast_possible_truncation)]
109        let mut content = String::with_capacity(meta.len as usize + 1);
110        file.read_to_string(&mut content)?;
111        Ok((content, meta))
112    }
113}
114
115/// Adapter that wraps our [`Vfs`] trait to satisfy [`oxc_resolver::FileSystem`].
116///
117/// Used to inject a VFS into `ResolverGeneric::new_with_file_system` so the
118/// TypeScript resolver can transparently operate on git tree objects.
119pub struct OxcVfsAdapter(pub Arc<dyn Vfs>);
120
121impl oxc_resolver::FileSystem for OxcVfsAdapter {
122    fn new() -> Self {
123        // Never called — we always construct via new_with_file_system.
124        unreachable!("OxcVfsAdapter must be constructed with a Vfs instance")
125    }
126
127    fn read(&self, path: &Path) -> io::Result<Vec<u8>> {
128        self.0.read(path)
129    }
130
131    fn read_to_string(&self, path: &Path) -> io::Result<String> {
132        self.0.read_to_string(path)
133    }
134
135    fn metadata(&self, path: &Path) -> io::Result<oxc_resolver::FileMetadata> {
136        // Single stat syscall via Vfs::metadata, then map to oxc_resolver's type.
137        // oxc_resolver has its own per-path OnceLock cache, so this is called at
138        // most once per unique CachedPath.
139        let meta = self.0.metadata(path)?;
140        Ok(oxc_resolver::FileMetadata::new(
141            meta.is_file,
142            meta.is_dir,
143            false,
144        ))
145    }
146
147    fn symlink_metadata(&self, path: &Path) -> io::Result<oxc_resolver::FileMetadata> {
148        // We always follow symlinks (stat, not lstat) and never report
149        // is_symlink=true. Combined with symlinks=false in ResolveOptions,
150        // this means the resolver skips canonicalize entirely.
151        self.metadata(path)
152    }
153
154    fn read_link(&self, path: &Path) -> Result<PathBuf, oxc_resolver::ResolveError> {
155        Err(io::Error::new(
156            io::ErrorKind::Unsupported,
157            format!("read_link not supported: {}", path.display()),
158        )
159        .into())
160    }
161
162    fn canonicalize(&self, path: &Path) -> io::Result<PathBuf> {
163        self.0.canonicalize(path)
164    }
165}
166
167/// Reads files from a git tree object, enabling in-process git ref diffs
168/// without spawning worktrees. Construction walks the tree once to build
169/// an in-memory index; subsequent reads decompress blobs from the pack.
170pub struct GitTreeVfs {
171    repo: gix::ThreadSafeRepository,
172    /// Relative path -> (blob `ObjectId`, uncompressed size).
173    blobs: HashMap<PathBuf, (gix::ObjectId, u64)>,
174    /// Set of directories present in the tree (relative paths).
175    dirs: HashSet<PathBuf>,
176    /// Directory (relative) -> direct children as absolute paths.
177    children: HashMap<PathBuf, Vec<PathBuf>>,
178    /// Absolute prefix joined to relative paths for external consumers.
179    root: PathBuf,
180}
181
182impl GitTreeVfs {
183    /// Open the repository, resolve `git_ref` to a commit, walk its tree,
184    /// and build an in-memory index of all blobs and directories.
185    pub fn new(repo_path: &Path, git_ref: &str, root: &Path) -> io::Result<Self> {
186        use gix::prelude::FindExt;
187
188        // Resolve ref to SHA via git CLI (avoids pulling in gix revision feature).
189        let sha = resolve_ref_to_sha(repo_path, git_ref)?;
190        let commit_id = gix::ObjectId::from_hex(sha.as_bytes())
191            .map_err(|e| io::Error::other(format!("parse oid: {e}")))?;
192
193        let ts_repo = gix::ThreadSafeRepository::open(repo_path)
194            .map_err(|e| io::Error::other(format!("open repo: {e}")))?;
195        let repo = ts_repo.to_thread_local();
196
197        // commit -> tree
198        let commit = repo
199            .find_object(commit_id)
200            .map_err(|e| io::Error::other(format!("find commit: {e}")))?
201            .try_into_commit()
202            .map_err(|e| io::Error::other(format!("not a commit: {e}")))?;
203        let tree_id = commit
204            .tree_id()
205            .map_err(|e| io::Error::other(format!("tree id: {e}")))?;
206
207        // Walk tree with Recorder
208        let mut buf = Vec::new();
209        let tree_iter = repo
210            .objects
211            .find_tree_iter(&tree_id, &mut buf)
212            .map_err(|e| io::Error::other(format!("find tree: {e}")))?;
213
214        let mut recorder = gix::traverse::tree::Recorder::default()
215            .track_location(Some(gix::traverse::tree::recorder::Location::Path));
216        gix::traverse::tree::breadthfirst(
217            tree_iter,
218            gix::traverse::tree::breadthfirst::State::default(),
219            &repo.objects,
220            &mut recorder,
221        )
222        .map_err(|e| io::Error::other(format!("traverse: {e}")))?;
223
224        let mut blobs = HashMap::new();
225        let mut dirs = HashSet::new();
226        let mut children: HashMap<PathBuf, Vec<PathBuf>> = HashMap::new();
227
228        dirs.insert(PathBuf::new()); // root dir
229
230        for entry in &recorder.records {
231            let rel = PathBuf::from(
232                std::str::from_utf8(entry.filepath.as_ref())
233                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
234            );
235            let abs = root.join(&rel);
236            let parent_rel = rel.parent().unwrap_or(Path::new("")).to_path_buf();
237
238            if entry.mode.is_tree() {
239                dirs.insert(rel);
240                children.entry(parent_rel).or_default().push(abs);
241            } else if entry.mode.is_blob() {
242                let size = repo
243                    .find_header(entry.oid)
244                    .map_err(|e| io::Error::other(format!("header: {e}")))?
245                    .size();
246                blobs.insert(rel, (entry.oid, size));
247                children.entry(parent_rel).or_default().push(abs);
248            }
249            // Skip symlinks, submodules
250        }
251
252        Ok(Self {
253            repo: ts_repo,
254            blobs,
255            dirs,
256            children,
257            root: root.to_path_buf(),
258        })
259    }
260
261    /// Strip the root prefix to get the relative tree path.
262    fn relative(&self, path: &Path) -> Option<PathBuf> {
263        path.strip_prefix(&self.root).ok().map(Path::to_path_buf)
264    }
265
266    fn not_found(path: &Path) -> io::Error {
267        io::Error::new(
268            io::ErrorKind::NotFound,
269            format!("{} not in git tree", path.display()),
270        )
271    }
272}
273
274fn resolve_ref_to_sha(repo_path: &Path, git_ref: &str) -> io::Result<String> {
275    let output = std::process::Command::new("git")
276        .args(["rev-parse", "--verify", git_ref])
277        .current_dir(repo_path)
278        .output()
279        .map_err(|e| io::Error::other(format!("git rev-parse: {e}")))?;
280    if !output.status.success() {
281        let stderr = String::from_utf8_lossy(&output.stderr);
282        return Err(io::Error::other(format!(
283            "git rev-parse failed for '{git_ref}': {}",
284            stderr.trim()
285        )));
286    }
287    Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
288}
289
290impl Vfs for GitTreeVfs {
291    fn read_to_string(&self, path: &Path) -> io::Result<String> {
292        let rel = self.relative(path).ok_or_else(|| Self::not_found(path))?;
293        let &(oid, _) = self.blobs.get(&rel).ok_or_else(|| Self::not_found(path))?;
294        let repo = self.repo.to_thread_local();
295        let obj = repo
296            .find_object(oid)
297            .map_err(|e| io::Error::other(format!("read object: {e}")))?;
298        String::from_utf8(obj.data.clone())
299            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
300    }
301
302    fn read(&self, path: &Path) -> io::Result<Vec<u8>> {
303        let rel = self.relative(path).ok_or_else(|| Self::not_found(path))?;
304        let &(oid, _) = self.blobs.get(&rel).ok_or_else(|| Self::not_found(path))?;
305        let repo = self.repo.to_thread_local();
306        let obj = repo
307            .find_object(oid)
308            .map_err(|e| io::Error::other(format!("read object: {e}")))?;
309        Ok(obj.data.clone())
310    }
311
312    fn metadata(&self, path: &Path) -> io::Result<VfsMetadata> {
313        let rel = self.relative(path).ok_or_else(|| Self::not_found(path))?;
314        if let Some(&(_, size)) = self.blobs.get(&rel) {
315            Ok(VfsMetadata {
316                len: size,
317                is_file: true,
318                is_dir: false,
319                mtime_nanos: None,
320            })
321        } else if self.dirs.contains(&rel) {
322            Ok(VfsMetadata {
323                len: 0,
324                is_file: false,
325                is_dir: true,
326                mtime_nanos: None,
327            })
328        } else {
329            Err(Self::not_found(path))
330        }
331    }
332
333    fn exists(&self, path: &Path) -> bool {
334        let Some(rel) = self.relative(path) else {
335            return false;
336        };
337        self.blobs.contains_key(&rel) || self.dirs.contains(&rel)
338    }
339
340    fn is_dir(&self, path: &Path) -> bool {
341        let Some(rel) = self.relative(path) else {
342            return false;
343        };
344        self.dirs.contains(&rel)
345    }
346
347    fn is_file(&self, path: &Path) -> bool {
348        let Some(rel) = self.relative(path) else {
349            return false;
350        };
351        self.blobs.contains_key(&rel)
352    }
353
354    fn read_dir(&self, path: &Path) -> io::Result<Vec<PathBuf>> {
355        let rel = self.relative(path).ok_or_else(|| Self::not_found(path))?;
356        self.children
357            .get(&rel)
358            .cloned()
359            .ok_or_else(|| Self::not_found(path))
360    }
361
362    fn canonicalize(&self, path: &Path) -> io::Result<PathBuf> {
363        // Git trees have no symlinks; verify existence and return normalized path.
364        let rel = self.relative(path).ok_or_else(|| Self::not_found(path))?;
365        if self.blobs.contains_key(&rel) || self.dirs.contains(&rel) {
366            Ok(self.root.join(&rel))
367        } else {
368            Err(Self::not_found(path))
369        }
370    }
371
372    /// Single lookup + blob decompress for both content and metadata.
373    fn read_with_metadata(&self, path: &Path) -> io::Result<(String, VfsMetadata)> {
374        let rel = self.relative(path).ok_or_else(|| Self::not_found(path))?;
375        let &(oid, size) = self.blobs.get(&rel).ok_or_else(|| Self::not_found(path))?;
376        let repo = self.repo.to_thread_local();
377        let obj = repo
378            .find_object(oid)
379            .map_err(|e| io::Error::other(format!("read object: {e}")))?;
380        let content = String::from_utf8(obj.data.clone())
381            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
382        let meta = VfsMetadata {
383            len: size,
384            is_file: true,
385            is_dir: false,
386            mtime_nanos: None,
387        };
388        Ok((content, meta))
389    }
390}
391
392#[cfg(test)]
393mod tests {
394    use super::*;
395
396    #[test]
397    fn os_vfs_reads_file() {
398        let vfs = OsVfs;
399        let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("Cargo.toml");
400        let content = vfs.read_to_string(&path).unwrap();
401        assert!(content.contains("chainsaw-cli"));
402    }
403
404    #[test]
405    fn os_vfs_metadata() {
406        let vfs = OsVfs;
407        let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("Cargo.toml");
408        let meta = vfs.metadata(&path).unwrap();
409        assert!(meta.is_file);
410        assert!(!meta.is_dir);
411        assert!(meta.len > 0);
412        assert!(meta.mtime_nanos.is_some());
413    }
414
415    #[test]
416    fn os_vfs_read_with_metadata() {
417        let vfs = OsVfs;
418        let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("Cargo.toml");
419        let (content, meta) = vfs.read_with_metadata(&path).unwrap();
420        assert!(content.contains("chainsaw-cli"));
421        assert!(meta.is_file);
422        assert!(meta.len > 0);
423        assert!(meta.mtime_nanos.is_some());
424    }
425
426    #[test]
427    fn os_vfs_dir_operations() {
428        let vfs = OsVfs;
429        let src = Path::new(env!("CARGO_MANIFEST_DIR")).join("src");
430        assert!(vfs.is_dir(&src));
431        assert!(!vfs.is_file(&src));
432        let entries = vfs.read_dir(&src).unwrap();
433        assert!(entries.iter().any(|p| p.ends_with("main.rs")));
434    }
435
436    #[test]
437    fn os_vfs_nonexistent() {
438        let vfs = OsVfs;
439        let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("nonexistent.xyz");
440        assert!(!vfs.exists(&path));
441        assert!(vfs.read_to_string(&path).is_err());
442        assert!(vfs.metadata(&path).is_err());
443    }
444
445    #[test]
446    fn git_tree_vfs_reads_head() {
447        let root = Path::new(env!("CARGO_MANIFEST_DIR"));
448        let vfs = GitTreeVfs::new(root, "HEAD", root).unwrap();
449
450        // Known file exists and is readable
451        assert!(vfs.is_file(&root.join("Cargo.toml")));
452        assert!(!vfs.is_dir(&root.join("Cargo.toml")));
453        let content = vfs.read_to_string(&root.join("Cargo.toml")).unwrap();
454        assert!(content.contains("chainsaw-cli"));
455
456        // Known directory
457        assert!(vfs.is_dir(&root.join("src")));
458        assert!(!vfs.is_file(&root.join("src")));
459
460        // Non-existent path
461        assert!(!vfs.exists(&root.join("nonexistent.rs")));
462        assert!(vfs.read_to_string(&root.join("nonexistent.rs")).is_err());
463    }
464
465    #[test]
466    fn git_tree_vfs_metadata() {
467        let root = Path::new(env!("CARGO_MANIFEST_DIR"));
468        let vfs = GitTreeVfs::new(root, "HEAD", root).unwrap();
469
470        let meta = vfs.metadata(&root.join("Cargo.toml")).unwrap();
471        assert!(meta.is_file);
472        assert!(!meta.is_dir);
473        assert!(meta.len > 0);
474        assert!(meta.mtime_nanos.is_none(), "git blobs have no mtime");
475    }
476
477    #[test]
478    fn git_tree_vfs_read_dir() {
479        let root = Path::new(env!("CARGO_MANIFEST_DIR"));
480        let vfs = GitTreeVfs::new(root, "HEAD", root).unwrap();
481
482        let entries = vfs.read_dir(&root.join("src")).unwrap();
483        assert!(entries.iter().any(|p| p.ends_with("main.rs")));
484        assert!(entries.iter().any(|p| p.ends_with("lib.rs")));
485    }
486
487    #[test]
488    fn git_tree_vfs_canonicalize() {
489        let root = Path::new(env!("CARGO_MANIFEST_DIR"));
490        let vfs = GitTreeVfs::new(root, "HEAD", root).unwrap();
491
492        let canonical = vfs.canonicalize(&root.join("Cargo.toml")).unwrap();
493        assert_eq!(canonical, root.join("Cargo.toml"));
494
495        assert!(vfs.canonicalize(&root.join("nonexistent")).is_err());
496    }
497
498    #[test]
499    fn oxc_adapter_metadata_single_stat() {
500        use oxc_resolver::FileSystem;
501
502        let adapter = OxcVfsAdapter(Arc::new(OsVfs));
503        let src = Path::new(env!("CARGO_MANIFEST_DIR")).join("src");
504
505        // File should be found
506        let meta = adapter.metadata(&src.join("main.rs")).unwrap();
507        assert!(meta.is_file());
508        assert!(!meta.is_dir());
509
510        // Directory should be found
511        let meta = adapter.metadata(&src.join("lang")).unwrap();
512        assert!(!meta.is_file());
513        assert!(meta.is_dir());
514
515        // Non-existent should be NotFound
516        assert!(adapter.metadata(&src.join("nonexistent.xyz")).is_err());
517    }
518}