Skip to main content

matrixcode_core/
workspace.rs

1//! Workspace root enforcement for filesystem-touching tools.
2//!
3//! A [`Workspace`] carries a canonical root directory plus a policy flag.
4//! Every filesystem tool resolves caller-supplied paths through
5//! [`Workspace::resolve`] (for paths that must already exist) or
6//! [`Workspace::resolve_for_create`] (for paths that may not yet exist,
7//! like `write`'s target file). Both refuse paths that escape the root
8//! after symlink resolution.
9//!
10//! The [`Workspace::unrestricted`] constructor disables the check and
11//! exists solely so legacy callers and the existing unit tests keep
12//! working without change. Production entry points (`main.rs`) build a
13//! restricted workspace via [`Workspace::detect`].
14
15use std::path::{Component, Path, PathBuf};
16use std::sync::Arc;
17
18use anyhow::{Context, Result};
19
20/// Shared handle passed into every filesystem-touching tool.
21#[derive(Debug, Clone)]
22pub struct Workspace {
23    inner: Arc<Inner>,
24}
25
26#[derive(Debug)]
27struct Inner {
28    /// Canonical absolute path to the workspace root. For an
29    /// unrestricted workspace this is still set (to cwd) but
30    /// `restricted` is false so checks are skipped.
31    root: PathBuf,
32    restricted: bool,
33}
34
35impl Workspace {
36    /// Build a restricted workspace. If `override_root` is `Some`, use
37    /// it; otherwise walk up from cwd looking for a `.git` directory,
38    /// and fall back to cwd when none is found.
39    pub fn detect(override_root: Option<&Path>) -> Result<Self> {
40        let root = match override_root {
41            Some(p) => p.to_path_buf(),
42            None => find_git_root().unwrap_or(std::env::current_dir()?),
43        };
44        let root = std::fs::canonicalize(&root)
45            .with_context(|| format!("canonicalizing workspace root {}", root.display()))?;
46        if !root.is_dir() {
47            anyhow::bail!("workspace root is not a directory: {}", root.display());
48        }
49        Ok(Self {
50            inner: Arc::new(Inner {
51                root,
52                restricted: true,
53            }),
54        })
55    }
56
57    /// Build an unrestricted workspace rooted at cwd. No path checks
58    /// are performed. Kept for backward compatibility with callers and
59    /// tests that predate workspace enforcement; new production code
60    /// should use [`Workspace::detect`].
61    pub fn unrestricted() -> Self {
62        let root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
63        Self {
64            inner: Arc::new(Inner {
65                root,
66                restricted: false,
67            }),
68        }
69    }
70
71    /// Canonical workspace root (always absolute).
72    pub fn root(&self) -> &Path {
73        &self.inner.root
74    }
75
76    /// Whether this workspace enforces the root boundary.
77    pub fn is_restricted(&self) -> bool {
78        self.inner.restricted
79    }
80
81    /// Resolve a caller-provided path that is expected to already exist.
82    /// Returns the canonical absolute path. On a restricted workspace,
83    /// the result must live under `root` after symlink resolution.
84    pub fn resolve(&self, input: &str) -> Result<PathBuf> {
85        let joined = self.join(input);
86        // `canonicalize` requires the path to exist. We want a clean
87        // error for non-existent paths rather than "escapes workspace",
88        // so let the caller handle the not-found case via their normal
89        // I/O error path. Here we only enforce the boundary when the
90        // path resolves successfully.
91        let canonical = std::fs::canonicalize(&joined)
92            .with_context(|| format!("resolving path {}", joined.display()))?;
93        self.ensure_within(&canonical, input)?;
94        Ok(canonical)
95    }
96
97    /// Resolve a caller-provided path that does not need to exist yet
98    /// (e.g. a `write` target). The parent directory must exist *or*
99    /// be creatable inside the workspace. We canonicalize the nearest
100    /// existing ancestor and confirm it lives under `root`, then
101    /// re-attach the non-existent tail.
102    pub fn resolve_for_create(&self, input: &str) -> Result<PathBuf> {
103        let joined = self.join(input);
104        let (existing, tail) = split_existing(&joined);
105        let canonical_existing = std::fs::canonicalize(&existing).with_context(|| {
106            format!(
107                "resolving nearest existing ancestor {} for {}",
108                existing.display(),
109                joined.display()
110            )
111        })?;
112        self.ensure_within(&canonical_existing, input)?;
113        let mut out = canonical_existing;
114        for c in tail.components() {
115            match c {
116                Component::Normal(s) => out.push(s),
117                // split_existing only keeps Normal components in the tail,
118                // so anything else is a bug on our side.
119                other => anyhow::bail!("unexpected path component in tail: {:?}", other),
120            }
121        }
122        // The tail couldn't have introduced a symlink (it doesn't exist
123        // yet), so a lexical check is sufficient here.
124        if self.inner.restricted && !out.starts_with(&self.inner.root) {
125            anyhow::bail!(
126                "path {} escapes workspace root {}",
127                out.display(),
128                self.inner.root.display()
129            );
130        }
131        Ok(out)
132    }
133
134    fn join(&self, input: &str) -> PathBuf {
135        let p = Path::new(input);
136        if p.is_absolute() {
137            p.to_path_buf()
138        } else {
139            self.inner.root.join(p)
140        }
141    }
142
143    fn ensure_within(&self, canonical: &Path, original: &str) -> Result<()> {
144        if !self.inner.restricted {
145            return Ok(());
146        }
147        if canonical.starts_with(&self.inner.root) {
148            return Ok(());
149        }
150        anyhow::bail!(
151            "path {} (resolved to {}) is outside workspace root {}",
152            original,
153            canonical.display(),
154            self.inner.root.display()
155        );
156    }
157}
158
159/// Walk upward from cwd looking for a directory that contains `.git`.
160/// Returns `None` if none is found before hitting the filesystem root.
161fn find_git_root() -> Option<PathBuf> {
162    let mut cur = std::env::current_dir().ok()?;
163    loop {
164        if cur.join(".git").exists() {
165            return Some(cur);
166        }
167        if !cur.pop() {
168            return None;
169        }
170    }
171}
172
173/// Split a path into (nearest existing ancestor, remaining tail).
174/// For `/a/b/c/d.txt` where `/a/b` exists but `/a/b/c` doesn't, returns
175/// (`/a/b`, `c/d.txt`). Always returns an existing ancestor; worst case
176/// it's the filesystem root, which exists by definition.
177fn split_existing(p: &Path) -> (PathBuf, PathBuf) {
178    let mut existing = p.to_path_buf();
179    let mut tail_parts: Vec<PathBuf> = Vec::new();
180    while !existing.exists() {
181        match existing.file_name() {
182            Some(name) => tail_parts.push(PathBuf::from(name)),
183            None => break, // hit root
184        }
185        if !existing.pop() {
186            break;
187        }
188    }
189    let mut tail = PathBuf::new();
190    for part in tail_parts.into_iter().rev() {
191        tail.push(part);
192    }
193    (existing, tail)
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199    use tempfile::TempDir;
200
201    fn ws(dir: &Path) -> Workspace {
202        Workspace::detect(Some(dir)).unwrap()
203    }
204
205    #[test]
206    fn resolve_existing_file_inside_root() {
207        let tmp = TempDir::new().unwrap();
208        let f = tmp.path().join("a.txt");
209        std::fs::write(&f, "x").unwrap();
210        let w = ws(tmp.path());
211        let r = w.resolve("a.txt").unwrap();
212        assert_eq!(r, std::fs::canonicalize(&f).unwrap());
213    }
214
215    #[test]
216    fn resolve_absolute_inside_root() {
217        let tmp = TempDir::new().unwrap();
218        let f = tmp.path().join("a.txt");
219        std::fs::write(&f, "x").unwrap();
220        let w = ws(tmp.path());
221        let r = w.resolve(f.to_str().unwrap()).unwrap();
222        assert_eq!(r, std::fs::canonicalize(&f).unwrap());
223    }
224
225    #[test]
226    fn resolve_rejects_parent_escape() {
227        let tmp = TempDir::new().unwrap();
228        let sub = tmp.path().join("sub");
229        std::fs::create_dir(&sub).unwrap();
230        let outside = tmp.path().join("outside.txt");
231        std::fs::write(&outside, "x").unwrap();
232        let w = ws(&sub);
233        let err = w.resolve("../outside.txt").unwrap_err().to_string();
234        assert!(err.contains("outside workspace root"), "got: {err}");
235    }
236
237    #[test]
238    fn resolve_rejects_absolute_outside() {
239        let tmp = TempDir::new().unwrap();
240        let w = ws(tmp.path());
241        let err = w.resolve("/etc/hostname").unwrap_err().to_string();
242        // Either "outside workspace root" (if it exists) or a resolve
243        // error (if it doesn't) — both are acceptable rejections.
244        assert!(
245            err.contains("outside workspace root") || err.contains("resolving path"),
246            "got: {err}"
247        );
248    }
249
250    #[test]
251    #[cfg(unix)]
252    fn resolve_rejects_symlink_escape() {
253        let tmp = TempDir::new().unwrap();
254        let outside_dir = TempDir::new().unwrap();
255        let secret = outside_dir.path().join("secret.txt");
256        std::fs::write(&secret, "top-secret").unwrap();
257
258        let link = tmp.path().join("escape");
259        std::os::unix::fs::symlink(&secret, &link).unwrap();
260
261        let w = ws(tmp.path());
262        let err = w.resolve("escape").unwrap_err().to_string();
263        assert!(err.contains("outside workspace root"), "got: {err}");
264    }
265
266    #[test]
267    fn resolve_for_create_new_file_inside_root() {
268        let tmp = TempDir::new().unwrap();
269        let w = ws(tmp.path());
270        let r = w.resolve_for_create("newdir/new.txt").unwrap();
271        assert!(r.starts_with(std::fs::canonicalize(tmp.path()).unwrap()));
272        assert!(r.ends_with("newdir/new.txt"));
273    }
274
275    #[test]
276    fn resolve_for_create_rejects_outside() {
277        let tmp = TempDir::new().unwrap();
278        let w = ws(tmp.path());
279        let err = w.resolve_for_create("../evil.txt").unwrap_err().to_string();
280        assert!(
281            err.contains("escapes workspace root") || err.contains("outside workspace root"),
282            "got: {err}"
283        );
284    }
285
286    #[test]
287    fn unrestricted_accepts_anything() {
288        let w = Workspace::unrestricted();
289        // Just pick a path that definitely exists.
290        assert!(w.resolve("/").is_ok() || w.resolve(".").is_ok());
291        assert!(!w.is_restricted());
292    }
293}