Skip to main content

git_paw/mcp/query/
source.rs

1//! Source-tree reads for the MCP server.
2//!
3//! Browses and reads the repository's working tree via git plumbing so that
4//! gitignore handling and the tracked-set semantics come for free and stay
5//! consistent with the git-context tools. The "working tree" is defined as
6//! tracked files **plus** untracked-but-not-ignored files; gitignored paths
7//! (build artifacts, secrets) are excluded throughout.
8//!
9//! [`read_file`] is confined to the repository root: the requested path is
10//! resolved under the root, canonicalised, and verified to still lie within
11//! it, so `..`/absolute/symlink escapes are refused before any file outside
12//! the root is read (the same guard as [`super::docs::read_doc`]). It
13//! additionally refuses gitignored paths via `git check-ignore`.
14//!
15//! Degradation contract (design D4): a non-git directory or a search with no
16//! matches yields an empty result, never a transport error.
17
18use std::path::Path;
19use std::process::Command;
20
21use rmcp::schemars;
22use serde::Serialize;
23
24use crate::error::PawError;
25
26use super::resolve_under_root;
27
28/// Maximum number of search matches returned by [`search_code`]; results
29/// beyond this cap are dropped and the caller is told the result was
30/// truncated, rather than returning an unbounded response.
31const SEARCH_MATCH_CAP: usize = 200;
32
33/// One match returned by [`search_code`].
34#[derive(Debug, Clone, Serialize, schemars::JsonSchema, PartialEq, Eq)]
35pub struct CodeMatch {
36    /// Path relative to the repository root (forward slashes, as git emits).
37    pub path: String,
38    /// 1-based line number of the match.
39    pub line_number: u64,
40    /// The matching line's content (trailing newline stripped).
41    pub line: String,
42}
43
44/// Outcome of a [`read_file`] call: either the file's working-tree content, or
45/// a refusal/absence carrying a human-readable reason.
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct ReadOutcome {
48    /// File content from the local working tree, or `None` when refused or
49    /// absent.
50    pub content: Option<String>,
51    /// Human-readable note when `content` is `None` (refused traversal, a
52    /// gitignored path, or a missing file). `None` when `content` is present.
53    pub message: Option<String>,
54}
55
56/// Runs `git` in `repo_root`, returning stdout on success and `None` when git
57/// is unavailable or exits non-zero.
58fn git(repo_root: &Path, args: &[&str]) -> Option<String> {
59    let out = Command::new("git")
60        .current_dir(repo_root)
61        .args(args)
62        .output()
63        .ok()?;
64    if !out.status.success() {
65        return None;
66    }
67    Some(String::from_utf8_lossy(&out.stdout).into_owned())
68}
69
70/// Lists the repository's working-tree files (tracked plus
71/// untracked-but-not-ignored), optionally scoped to `subpath`.
72///
73/// Runs `git ls-files --cached --others --exclude-standard [-- <subpath>]` in
74/// `repo_root`, so gitignored paths are excluded and untracked-not-ignored
75/// files are included. Returns paths relative to the repository root. Yields an
76/// empty list when the directory is not a git repository or git fails
77/// (graceful degradation).
78#[must_use]
79pub fn list_files(repo_root: &Path, subpath: Option<&str>) -> Vec<String> {
80    let mut args = vec!["ls-files", "--cached", "--others", "--exclude-standard"];
81    if let Some(sub) = subpath {
82        args.push("--");
83        args.push(sub);
84    }
85    let Some(raw) = git(repo_root, &args) else {
86        return Vec::new();
87    };
88    raw.lines()
89        .filter(|l| !l.is_empty())
90        .map(ToString::to_string)
91        .collect()
92}
93
94/// Returns true when `path` (relative to `repo_root`) is gitignored.
95///
96/// Uses `git check-ignore -q <path>`: exit 0 means the path is ignored, exit 1
97/// means it is not, any other exit (or git unavailable) is treated as "not
98/// ignored" so the confinement guard remains the primary gate.
99fn is_gitignored(repo_root: &Path, path: &str) -> bool {
100    Command::new("git")
101        .current_dir(repo_root)
102        .args(["check-ignore", "-q", "--", path])
103        .output()
104        .ok()
105        .is_some_and(|out| out.status.success())
106}
107
108/// Reads one file from the local working tree, confined to the repository
109/// root and refusing gitignored paths.
110///
111/// Steps, in order:
112/// 1. Resolve `path` under `repo_root`, canonicalise it, and verify it still
113///    lies within the canonical repository root. Any escape (`..`, an absolute
114///    path, a symlink target outside the root) is **refused** — no file
115///    outside the root is read.
116/// 2. Refuse gitignored paths (`git check-ignore`), so secrets/build artifacts
117///    are never returned even when they sit inside the root.
118/// 3. Read the on-disk working-tree content (so uncommitted/branch state is
119///    reflected).
120///
121/// Returns:
122/// - refused traversal/escape → `Ok(ReadOutcome { content: None, message })`.
123/// - gitignored path → `Ok(ReadOutcome { content: None, message })`.
124/// - missing file → `Ok(ReadOutcome { content: None, message })`.
125/// - readable file → `Ok(ReadOutcome { content: Some(..), message: None })`.
126/// - present-but-unreadable (e.g. a permission error) → `Err`, so the tool
127///   layer can surface the misconfiguration.
128pub fn read_file(repo_root: &Path, path: &str) -> Result<ReadOutcome, PawError> {
129    let refused = |reason: &str| {
130        Ok(ReadOutcome {
131            content: None,
132            message: Some(reason.to_string()),
133        })
134    };
135
136    // Confinement: the canonical repository root must exist for the guard to
137    // be meaningful.
138    let Ok(canonical_root) = repo_root.canonicalize() else {
139        return refused("repository root could not be resolved");
140    };
141
142    let requested = resolve_under_root(repo_root, Path::new(path));
143    // Canonicalise the requested path; a non-existent file (or a broken
144    // traversal target) cannot be confirmed inside the root, so it is treated
145    // as absent.
146    let Ok(canonical) = requested.canonicalize() else {
147        return refused(&format!("file not found within the repository: {path:?}"));
148    };
149    // Confinement check: the canonical target must stay within the canonical
150    // repository root. This rejects `..`, absolute paths, and symlink escapes
151    // alike.
152    if !canonical.starts_with(&canonical_root) {
153        return refused(&format!(
154            "path {path:?} resolves outside the repository root and was refused"
155        ));
156    }
157
158    // Refuse gitignored paths even when confined to the root.
159    if is_gitignored(repo_root, path) {
160        return refused(&format!("path {path:?} is gitignored and was refused"));
161    }
162
163    match std::fs::read_to_string(&canonical) {
164        Ok(content) => Ok(ReadOutcome {
165            content: Some(content),
166            message: None,
167        }),
168        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
169            refused(&format!("file not found within the repository: {path:?}"))
170        }
171        Err(e) => Err(PawError::McpError(format!(
172            "file {} could not be read: {e}",
173            canonical.display()
174        ))),
175    }
176}
177
178/// Searches file contents across the repository's working tree (tracked plus
179/// untracked-but-not-ignored), optionally scoped to `subpath`.
180///
181/// Runs `git grep -n -I --untracked -e <query> [-- <subpath>]` in `repo_root`.
182/// `-I` skips binary files, `--untracked` extends the search to
183/// untracked-not-ignored files. Returns matches as `{ path, line_number, line }`,
184/// capped at [`SEARCH_MATCH_CAP`]; the returned flag reports whether the result
185/// was truncated. `git grep` exits 1 when there are no matches — that and a
186/// non-git directory both degrade to an empty list (never an error).
187#[must_use]
188pub fn search_code(repo_root: &Path, query: &str, subpath: Option<&str>) -> (Vec<CodeMatch>, bool) {
189    let mut args = vec!["grep", "-n", "-I", "--untracked", "-e", query];
190    if let Some(sub) = subpath {
191        args.push("--");
192        args.push(sub);
193    }
194    // `git grep` exits 1 on no-match (treated as empty by `git()` returning
195    // None), and 0 with output on match.
196    let Some(raw) = git(repo_root, &args) else {
197        return (Vec::new(), false);
198    };
199
200    let mut matches = Vec::new();
201    let mut truncated = false;
202    for line in raw.lines() {
203        // Format: "<path>:<line_number>:<content>".
204        let mut parts = line.splitn(3, ':');
205        let (Some(path), Some(num), Some(content)) = (parts.next(), parts.next(), parts.next())
206        else {
207            continue;
208        };
209        let Ok(line_number) = num.parse::<u64>() else {
210            continue;
211        };
212        if matches.len() >= SEARCH_MATCH_CAP {
213            truncated = true;
214            break;
215        }
216        matches.push(CodeMatch {
217            path: path.to_string(),
218            line_number,
219            line: content.to_string(),
220        });
221    }
222    (matches, truncated)
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228    use std::process::Command;
229
230    fn init_repo() -> tempfile::TempDir {
231        let tmp = tempfile::tempdir().unwrap();
232        let dir = tmp.path();
233        for args in [
234            vec!["init", "-q", "-b", "main"],
235            vec!["config", "user.email", "t@example.com"],
236            vec!["config", "user.name", "Test"],
237        ] {
238            assert!(
239                Command::new("git")
240                    .current_dir(dir)
241                    .args(&args)
242                    .status()
243                    .unwrap()
244                    .success()
245            );
246        }
247        tmp
248    }
249
250    fn git_run(dir: &Path, args: &[&str]) {
251        assert!(
252            Command::new("git")
253                .current_dir(dir)
254                .args(args)
255                .status()
256                .unwrap()
257                .success(),
258            "git {args:?} failed"
259        );
260    }
261
262    /// Builds a fixture repo: a tracked file, an untracked-but-not-ignored
263    /// file, a `.gitignore`, and a gitignored path.
264    fn fixture() -> tempfile::TempDir {
265        let tmp = init_repo();
266        let dir = tmp.path();
267        std::fs::create_dir_all(dir.join("src")).unwrap();
268        std::fs::write(
269            dir.join("src/main.rs"),
270            "fn main() {\n    register_watch_target_http();\n}\n",
271        )
272        .unwrap();
273        std::fs::write(dir.join(".gitignore"), "target/\n").unwrap();
274        git_run(dir, &["add", "src/main.rs", ".gitignore"]);
275        git_run(dir, &["commit", "-q", "-m", "first"]);
276        // Untracked-but-not-ignored.
277        std::fs::write(dir.join("notes.txt"), "loose notes\n").unwrap();
278        // Gitignored path.
279        std::fs::create_dir_all(dir.join("target/debug")).unwrap();
280        std::fs::write(dir.join("target/debug/foo"), "build artifact\n").unwrap();
281        tmp
282    }
283
284    // Scenario: list_files returns the working tree excluding gitignored paths.
285    #[test]
286    fn list_files_includes_tracked_and_untracked_excludes_gitignored() {
287        let tmp = fixture();
288        let files = list_files(tmp.path(), None);
289        assert!(files.iter().any(|f| f == "src/main.rs"), "tracked listed");
290        assert!(
291            files.iter().any(|f| f == "notes.txt"),
292            "untracked-not-ignored listed"
293        );
294        assert!(
295            !files.iter().any(|f| f.starts_with("target/")),
296            "gitignored excluded: {files:?}"
297        );
298    }
299
300    // Scenario: list_files scopes to a subpath.
301    #[test]
302    fn list_files_scopes_to_subpath() {
303        let tmp = fixture();
304        let files = list_files(tmp.path(), Some("src"));
305        assert_eq!(files, vec!["src/main.rs".to_string()]);
306    }
307
308    // Scenario: list_files degrades to empty when not a git repository.
309    #[test]
310    fn list_files_empty_when_not_a_git_repo() {
311        let tmp = tempfile::tempdir().unwrap();
312        assert!(list_files(tmp.path(), None).is_empty());
313    }
314
315    // Scenario: read_file returns a file's content from the local working tree.
316    #[test]
317    fn read_file_happy_path_returns_working_tree_content() {
318        let tmp = fixture();
319        let out = read_file(tmp.path(), "src/main.rs").unwrap();
320        assert!(
321            out.content
322                .as_deref()
323                .unwrap()
324                .contains("register_watch_target_http")
325        );
326        assert!(out.message.is_none());
327    }
328
329    // Scenario: read_file refuses path traversal outside the repository root.
330    #[test]
331    fn read_file_refuses_dotdot_traversal() {
332        let tmp = fixture();
333        // A secret outside the repo root.
334        let parent = tmp.path().parent().unwrap();
335        std::fs::write(parent.join("paw-secret.txt"), "TOPSECRET").unwrap();
336        let out = read_file(tmp.path(), "../paw-secret.txt").unwrap();
337        assert!(out.content.is_none(), "traversal must be refused");
338        assert!(out.message.is_some());
339    }
340
341    // Scenario: read_file refuses path traversal outside the repository root
342    // (absolute form).
343    #[test]
344    fn read_file_refuses_absolute_path_outside_root() {
345        let tmp = fixture();
346        let parent = tmp.path().parent().unwrap();
347        let secret = parent.join("paw-secret-abs.txt");
348        std::fs::write(&secret, "TOPSECRET").unwrap();
349        let abs = secret.to_string_lossy().into_owned();
350        let out = read_file(tmp.path(), &abs).unwrap();
351        assert!(out.content.is_none(), "absolute escape must be refused");
352        assert!(out.message.is_some());
353    }
354
355    // Scenario: read_file refuses a gitignored path.
356    #[test]
357    fn read_file_refuses_gitignored_path() {
358        let tmp = fixture();
359        let out = read_file(tmp.path(), "target/debug/foo").unwrap();
360        assert!(out.content.is_none(), "gitignored path must be refused");
361        assert!(
362            out.message.as_deref().unwrap().contains("gitignored"),
363            "message: {:?}",
364            out.message
365        );
366    }
367
368    #[test]
369    fn read_file_missing_file_yields_none() {
370        let tmp = fixture();
371        let out = read_file(tmp.path(), "src/does-not-exist.rs").unwrap();
372        assert!(out.content.is_none());
373        assert!(out.message.is_some());
374    }
375
376    // Scenario: search_code returns matches across the working tree.
377    #[test]
378    fn search_code_finds_known_string() {
379        let tmp = fixture();
380        let (matches, truncated) = search_code(tmp.path(), "register_watch_target_http", None);
381        assert!(!truncated);
382        assert_eq!(matches.len(), 1);
383        assert_eq!(matches[0].path, "src/main.rs");
384        assert_eq!(matches[0].line_number, 2);
385        assert!(matches[0].line.contains("register_watch_target_http"));
386    }
387
388    // Scenario: search_code degrades to empty when there are no matches.
389    #[test]
390    fn search_code_empty_when_no_match() {
391        let tmp = fixture();
392        let (matches, truncated) = search_code(tmp.path(), "a-string-that-appears-nowhere", None);
393        assert!(matches.is_empty());
394        assert!(!truncated);
395    }
396
397    #[test]
398    fn search_code_empty_when_not_a_git_repo() {
399        let tmp = tempfile::tempdir().unwrap();
400        let (matches, truncated) = search_code(tmp.path(), "anything", None);
401        assert!(matches.is_empty());
402        assert!(!truncated);
403    }
404
405    #[test]
406    fn search_code_truncates_beyond_cap() {
407        let tmp = init_repo();
408        let dir = tmp.path();
409        let mut body = String::new();
410        for _ in 0..(SEARCH_MATCH_CAP + 50) {
411            body.push_str("needle\n");
412        }
413        std::fs::write(dir.join("big.txt"), body).unwrap();
414        git_run(dir, &["add", "big.txt"]);
415        let (matches, truncated) = search_code(dir, "needle", None);
416        assert_eq!(matches.len(), SEARCH_MATCH_CAP);
417        assert!(truncated);
418    }
419}