Skip to main content

solid_pod_rs_git/
guard.rs

1//! Path-traversal guard — mirrors JSS `src/handlers/git.js` lines
2//! 31-62 (`extractRepoPath` + `isPathWithinDataRoot`).
3//!
4//! JSS iteratively strips `..` segments (multi-pass, to defeat
5//! `....//` bypass) and then asserts the resolved absolute path still
6//! starts with the data-root prefix. We do the same, plus an explicit
7//! rejection of absolute paths and of any remaining `..` component
8//! after canonicalisation.
9
10use std::path::{Component, Path, PathBuf};
11
12use crate::error::GitError;
13
14/// Strip the Git service suffixes (`/info/refs`, `/git-upload-pack`,
15/// `/git-receive-pack`) from the incoming URL path to recover the
16/// repository-relative slug.
17#[must_use]
18pub fn extract_repo_slug(url_path: &str) -> String {
19    // Keep in sync with JSS extractRepoPath (lines 31-50).
20    let mut clean = url_path.to_string();
21
22    // Strip query string if present (belt-and-braces — callers should
23    // split on '?' first, but this is cheap).
24    if let Some(q) = clean.find('?') {
25        clean.truncate(q);
26    }
27
28    for suffix in ["/info/refs", "/git-upload-pack", "/git-receive-pack"] {
29        if let Some(idx) = clean.rfind(suffix) {
30            // Must be at end (or immediately followed by '/')
31            if idx + suffix.len() == clean.len() {
32                clean.truncate(idx);
33                break;
34            }
35        }
36    }
37
38    // Strip leading '/'.
39    clean = clean.trim_start_matches('/').to_string();
40
41    // Multi-pass `..` removal, mirroring JSS's do/while loop. This is
42    // a string-level guard on top of the component-level guard below,
43    // identical in spirit to JSS.
44    loop {
45        let stripped = clean.replace("..", "");
46        if stripped == clean {
47            break;
48        }
49        clean = stripped;
50    }
51
52    if clean.is_empty() {
53        ".".into()
54    } else {
55        clean
56    }
57}
58
59/// Resolve `requested` against `repo_root` and assert the result
60/// stays inside the root. Rejects:
61/// - absolute `requested` paths,
62/// - any `Component::ParentDir` (`..`),
63/// - resolved paths that don't share the root prefix.
64///
65/// Does **not** require the path to exist on disk — callers handle the
66/// existence check separately so a missing repo becomes a `404`
67/// rather than a `400`.
68pub fn path_safe(repo_root: &Path, requested: &str) -> Result<PathBuf, GitError> {
69    let req = Path::new(requested);
70    if req.is_absolute() {
71        return Err(GitError::PathTraversal(format!(
72            "absolute path rejected: {requested}"
73        )));
74    }
75
76    // Component-level check — reject any ParentDir segment. Belt and
77    // braces with the string-level pass in `extract_repo_slug`.
78    for component in req.components() {
79        if matches!(component, Component::ParentDir) {
80            return Err(GitError::PathTraversal(format!(
81                "`..` component rejected: {requested}"
82            )));
83        }
84    }
85
86    // Canonicalise the root (must exist), then join. We deliberately
87    // do NOT canonicalise the full path (the leaf may not yet exist),
88    // so we rely on component-level filtering above.
89    let root_canon = repo_root
90        .canonicalize()
91        .map_err(|e| GitError::PathTraversal(format!("root canonicalize: {e}")))?;
92    let candidate = root_canon.join(req);
93
94    // Final prefix check. Use starts_with on canonical root; the join
95    // result is guaranteed to start with root_canon unless req had an
96    // absolute component, which we've already rejected.
97    if !candidate.starts_with(&root_canon) {
98        return Err(GitError::PathTraversal(format!(
99            "resolved path escapes root: {}",
100            candidate.display()
101        )));
102    }
103
104    Ok(candidate)
105}
106
107#[cfg(test)]
108mod tests {
109    use super::*;
110    use tempfile::TempDir;
111
112    #[test]
113    fn extract_slug_strips_info_refs() {
114        assert_eq!(extract_repo_slug("/alice/repo/info/refs"), "alice/repo");
115    }
116
117    #[test]
118    fn extract_slug_strips_upload_pack() {
119        assert_eq!(
120            extract_repo_slug("/alice/repo/git-upload-pack"),
121            "alice/repo"
122        );
123    }
124
125    #[test]
126    fn extract_slug_strips_receive_pack() {
127        assert_eq!(
128            extract_repo_slug("/alice/repo/git-receive-pack"),
129            "alice/repo"
130        );
131    }
132
133    #[test]
134    fn extract_slug_empty_returns_dot() {
135        assert_eq!(extract_repo_slug("/info/refs"), ".");
136    }
137
138    #[test]
139    fn extract_slug_removes_parent_dirs() {
140        // Multi-pass: `....//` becomes `//` then normal component
141        // filtering denies the absolute-ish path.
142        let slug = extract_repo_slug("/..%2F..%2Fetc/info/refs");
143        // The % is not decoded by us — by design: we receive a
144        // decoded path from the HTTP layer. Test with raw `..`:
145        let slug2 = extract_repo_slug("/../../etc/info/refs");
146        assert!(!slug2.contains(".."), "slug still has `..`: {slug2}");
147        // `%2F` is an opaque char at this layer; not our job:
148        assert!(slug.contains('%'), "slug={slug}");
149    }
150
151    #[test]
152    fn path_safe_accepts_child() {
153        let td = TempDir::new().unwrap();
154        let result = path_safe(td.path(), "alice/repo").unwrap();
155        assert!(result.starts_with(td.path().canonicalize().unwrap()));
156    }
157
158    #[test]
159    fn path_safe_rejects_absolute() {
160        let td = TempDir::new().unwrap();
161        let err = path_safe(td.path(), "/etc/passwd").unwrap_err();
162        assert!(matches!(err, GitError::PathTraversal(_)));
163    }
164
165    #[test]
166    fn path_safe_rejects_parent_dir() {
167        let td = TempDir::new().unwrap();
168        let err = path_safe(td.path(), "../etc").unwrap_err();
169        assert!(matches!(err, GitError::PathTraversal(_)));
170    }
171
172    #[test]
173    fn path_safe_rejects_nested_parent() {
174        let td = TempDir::new().unwrap();
175        let err = path_safe(td.path(), "alice/../../etc").unwrap_err();
176        assert!(matches!(err, GitError::PathTraversal(_)));
177    }
178}