Skip to main content

matrixcode_core/
workspace.rs

1//! Workspace root enforcement for filesystem-touching tools.
2//!
3//! A [`Workspace`] carries a canonical root directory plus a policy flag.
4//! Every filesystem tool resolves caller-supplied paths through
5//! [`Workspace::resolve`] (for paths that must already exist) or
6//! [`Workspace::resolve_for_create`] (for paths that may not yet exist,
7//! like `write`'s target file). Both refuse paths that escape the root
8//! after symlink resolution.
9//!
10//! The [`Workspace::unrestricted`] constructor disables the check and
11//! exists solely so legacy callers and the existing unit tests keep
12//! working without change. Production entry points (`main.rs`) build a
13//! restricted workspace via [`Workspace::detect`].
14
15use std::path::{Component, Path, PathBuf};
16use std::sync::Arc;
17
18use anyhow::{Context, Result};
19
20/// Shared handle passed into every filesystem-touching tool.
21#[derive(Debug, Clone)]
22pub struct Workspace {
23    inner: Arc<Inner>,
24}
25
26#[derive(Debug)]
27struct Inner {
28    /// Canonical absolute path to the workspace root. For an
29    /// unrestricted workspace this is still set (to cwd) but
30    /// `restricted` is false so checks are skipped.
31    root: PathBuf,
32    restricted: bool,
33}
34
35impl Workspace {
36    /// Build a restricted workspace. If `override_root` is `Some`, use
37    /// it; otherwise walk up from cwd looking for a `.git` directory,
38    /// and fall back to cwd when none is found.
39    pub fn detect(override_root: Option<&Path>) -> Result<Self> {
40        let root = match override_root {
41            Some(p) => p.to_path_buf(),
42            None => find_git_root().unwrap_or(std::env::current_dir()?),
43        };
44        let root = std::fs::canonicalize(&root)
45            .with_context(|| format!("canonicalizing workspace root {}", root.display()))?;
46        if !root.is_dir() {
47            anyhow::bail!("workspace root is not a directory: {}", root.display());
48        }
49        Ok(Self {
50            inner: Arc::new(Inner { root, restricted: true }),
51        })
52    }
53
54    /// Build an unrestricted workspace rooted at cwd. No path checks
55    /// are performed. Kept for backward compatibility with callers and
56    /// tests that predate workspace enforcement; new production code
57    /// should use [`Workspace::detect`].
58    pub fn unrestricted() -> Self {
59        let root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
60        Self {
61            inner: Arc::new(Inner { root, restricted: false }),
62        }
63    }
64
65    /// Canonical workspace root (always absolute).
66    pub fn root(&self) -> &Path {
67        &self.inner.root
68    }
69
70    /// Whether this workspace enforces the root boundary.
71    pub fn is_restricted(&self) -> bool {
72        self.inner.restricted
73    }
74
75    /// Resolve a caller-provided path that is expected to already exist.
76    /// Returns the canonical absolute path. On a restricted workspace,
77    /// the result must live under `root` after symlink resolution.
78    pub fn resolve(&self, input: &str) -> Result<PathBuf> {
79        let joined = self.join(input);
80        // `canonicalize` requires the path to exist. We want a clean
81        // error for non-existent paths rather than "escapes workspace",
82        // so let the caller handle the not-found case via their normal
83        // I/O error path. Here we only enforce the boundary when the
84        // path resolves successfully.
85        let canonical = std::fs::canonicalize(&joined)
86            .with_context(|| format!("resolving path {}", joined.display()))?;
87        self.ensure_within(&canonical, input)?;
88        Ok(canonical)
89    }
90
91    /// Resolve a caller-provided path that does not need to exist yet
92    /// (e.g. a `write` target). The parent directory must exist *or*
93    /// be creatable inside the workspace. We canonicalize the nearest
94    /// existing ancestor and confirm it lives under `root`, then
95    /// re-attach the non-existent tail.
96    pub fn resolve_for_create(&self, input: &str) -> Result<PathBuf> {
97        let joined = self.join(input);
98        let (existing, tail) = split_existing(&joined);
99        let canonical_existing = std::fs::canonicalize(&existing).with_context(|| {
100            format!(
101                "resolving nearest existing ancestor {} for {}",
102                existing.display(),
103                joined.display()
104            )
105        })?;
106        self.ensure_within(&canonical_existing, input)?;
107        let mut out = canonical_existing;
108        for c in tail.components() {
109            match c {
110                Component::Normal(s) => out.push(s),
111                // split_existing only keeps Normal components in the tail,
112                // so anything else is a bug on our side.
113                other => anyhow::bail!("unexpected path component in tail: {:?}", other),
114            }
115        }
116        // The tail couldn't have introduced a symlink (it doesn't exist
117        // yet), so a lexical check is sufficient here.
118        if self.inner.restricted && !out.starts_with(&self.inner.root) {
119            anyhow::bail!(
120                "path {} escapes workspace root {}",
121                out.display(),
122                self.inner.root.display()
123            );
124        }
125        Ok(out)
126    }
127
128    fn join(&self, input: &str) -> PathBuf {
129        let p = Path::new(input);
130        if p.is_absolute() {
131            p.to_path_buf()
132        } else {
133            self.inner.root.join(p)
134        }
135    }
136
137    fn ensure_within(&self, canonical: &Path, original: &str) -> Result<()> {
138        if !self.inner.restricted {
139            return Ok(());
140        }
141        if canonical.starts_with(&self.inner.root) {
142            return Ok(());
143        }
144        anyhow::bail!(
145            "path {} (resolved to {}) is outside workspace root {}",
146            original,
147            canonical.display(),
148            self.inner.root.display()
149        );
150    }
151}
152
153/// Walk upward from cwd looking for a directory that contains `.git`.
154/// Returns `None` if none is found before hitting the filesystem root.
155fn find_git_root() -> Option<PathBuf> {
156    let mut cur = std::env::current_dir().ok()?;
157    loop {
158        if cur.join(".git").exists() {
159            return Some(cur);
160        }
161        if !cur.pop() {
162            return None;
163        }
164    }
165}
166
167/// Split a path into (nearest existing ancestor, remaining tail).
168/// For `/a/b/c/d.txt` where `/a/b` exists but `/a/b/c` doesn't, returns
169/// (`/a/b`, `c/d.txt`). Always returns an existing ancestor; worst case
170/// it's the filesystem root, which exists by definition.
171fn split_existing(p: &Path) -> (PathBuf, PathBuf) {
172    let mut existing = p.to_path_buf();
173    let mut tail_parts: Vec<PathBuf> = Vec::new();
174    while !existing.exists() {
175        match existing.file_name() {
176            Some(name) => tail_parts.push(PathBuf::from(name)),
177            None => break, // hit root
178        }
179        if !existing.pop() {
180            break;
181        }
182    }
183    let mut tail = PathBuf::new();
184    for part in tail_parts.into_iter().rev() {
185        tail.push(part);
186    }
187    (existing, tail)
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193    use tempfile::TempDir;
194
195    fn ws(dir: &Path) -> Workspace {
196        Workspace::detect(Some(dir)).unwrap()
197    }
198
199    #[test]
200    fn resolve_existing_file_inside_root() {
201        let tmp = TempDir::new().unwrap();
202        let f = tmp.path().join("a.txt");
203        std::fs::write(&f, "x").unwrap();
204        let w = ws(tmp.path());
205        let r = w.resolve("a.txt").unwrap();
206        assert_eq!(r, std::fs::canonicalize(&f).unwrap());
207    }
208
209    #[test]
210    fn resolve_absolute_inside_root() {
211        let tmp = TempDir::new().unwrap();
212        let f = tmp.path().join("a.txt");
213        std::fs::write(&f, "x").unwrap();
214        let w = ws(tmp.path());
215        let r = w.resolve(f.to_str().unwrap()).unwrap();
216        assert_eq!(r, std::fs::canonicalize(&f).unwrap());
217    }
218
219    #[test]
220    fn resolve_rejects_parent_escape() {
221        let tmp = TempDir::new().unwrap();
222        let sub = tmp.path().join("sub");
223        std::fs::create_dir(&sub).unwrap();
224        let outside = tmp.path().join("outside.txt");
225        std::fs::write(&outside, "x").unwrap();
226        let w = ws(&sub);
227        let err = w.resolve("../outside.txt").unwrap_err().to_string();
228        assert!(err.contains("outside workspace root"), "got: {err}");
229    }
230
231    #[test]
232    fn resolve_rejects_absolute_outside() {
233        let tmp = TempDir::new().unwrap();
234        let w = ws(tmp.path());
235        let err = w.resolve("/etc/hostname").unwrap_err().to_string();
236        // Either "outside workspace root" (if it exists) or a resolve
237        // error (if it doesn't) — both are acceptable rejections.
238        assert!(
239            err.contains("outside workspace root") || err.contains("resolving path"),
240            "got: {err}"
241        );
242    }
243
244    #[test]
245    #[cfg(unix)]
246    fn resolve_rejects_symlink_escape() {
247        let tmp = TempDir::new().unwrap();
248        let outside_dir = TempDir::new().unwrap();
249        let secret = outside_dir.path().join("secret.txt");
250        std::fs::write(&secret, "top-secret").unwrap();
251
252        let link = tmp.path().join("escape");
253        std::os::unix::fs::symlink(&secret, &link).unwrap();
254
255        let w = ws(tmp.path());
256        let err = w.resolve("escape").unwrap_err().to_string();
257        assert!(err.contains("outside workspace root"), "got: {err}");
258    }
259
260    #[test]
261    fn resolve_for_create_new_file_inside_root() {
262        let tmp = TempDir::new().unwrap();
263        let w = ws(tmp.path());
264        let r = w.resolve_for_create("newdir/new.txt").unwrap();
265        assert!(r.starts_with(std::fs::canonicalize(tmp.path()).unwrap()));
266        assert!(r.ends_with("newdir/new.txt"));
267    }
268
269    #[test]
270    fn resolve_for_create_rejects_outside() {
271        let tmp = TempDir::new().unwrap();
272        let w = ws(tmp.path());
273        let err = w
274            .resolve_for_create("../evil.txt")
275            .unwrap_err()
276            .to_string();
277        assert!(
278            err.contains("escapes workspace root") || err.contains("outside workspace root"),
279            "got: {err}"
280        );
281    }
282
283    #[test]
284    fn unrestricted_accepts_anything() {
285        let w = Workspace::unrestricted();
286        // Just pick a path that definitely exists.
287        assert!(w.resolve("/").is_ok() || w.resolve(".").is_ok());
288        assert!(!w.is_restricted());
289    }
290}