Skip to main content

mcp_methods/server/
source.rs

1//! Source-file tooling: ``read_source`` / ``grep`` / ``list_source``.
2//!
3//! Operates on a *dynamic* source root provider — a closure returning
4//! the active list of allowed dirs at the moment of each tool call.
5//! GitHub-workspace mode wires this to the active repo's path; local-
6//! workspace mode wires it to the bound root (re-routed on each
7//! `set_root_dir` call); `--source-root` and `--watch` modes wire it
8//! to a fixed root. An empty list signals "no active source" and the
9//! tools return a friendly error.
10//!
11//! All path traversal protection is done by canonicalising the
12//! resolved path against the allowed dirs before any I/O happens.
13//!
14//! Design: stay close to the existing Python `mcp_methods` semantics
15//! (line numbers, header format, "showing N of M matches", etc.) so a
16//! manifest written for the legacy Python server returns visually
17//! similar output.
18
19#![allow(dead_code)]
20
21use std::fs;
22use std::path::{Path, PathBuf};
23use std::sync::Arc;
24
25use grep_matcher::Matcher;
26use grep_regex::RegexMatcherBuilder;
27use grep_searcher::sinks::UTF8;
28use grep_searcher::SearcherBuilder;
29use ignore::overrides::OverrideBuilder;
30use ignore::WalkBuilder;
31use regex::Regex;
32
33/// Provider returning the current allowed source dirs.
34pub type SourceRootsProvider = Arc<dyn Fn() -> Vec<String> + Send + Sync>;
35
36// ---------------------------------------------------------------------------
37// read_source
38// ---------------------------------------------------------------------------
39
40#[derive(Debug, Default, Clone)]
41pub struct ReadOpts {
42    pub start_line: Option<usize>,
43    pub end_line: Option<usize>,
44    pub grep: Option<String>,
45    pub grep_context: Option<usize>,
46    pub max_matches: Option<usize>,
47    pub max_chars: Option<usize>,
48}
49
50/// Read a file from one of the allowed source dirs.
51///
52/// Returns a user-facing string. Path traversal attempts and missing
53/// files surface as ``Error: …`` strings rather than panics, mirroring
54/// the existing Python-server behaviour so the agent sees a clean
55/// error in tool output rather than an MCP error envelope.
56pub fn read_source(file_path: &str, allowed_dirs: &[String], opts: &ReadOpts) -> String {
57    let resolved = match resolve_under_roots(file_path, allowed_dirs) {
58        Some(p) => p,
59        None => return format!("Error: file not found or access denied: {file_path}"),
60    };
61    let raw = match fs::read_to_string(&resolved) {
62        Ok(s) => s,
63        Err(e) => return format!("Error reading file: {e}"),
64    };
65    apply_read_options(file_path, &raw, opts)
66}
67
68fn apply_read_options(file_path: &str, raw: &str, opts: &ReadOpts) -> String {
69    let all_lines: Vec<&str> = raw.lines().collect();
70    let total = all_lines.len();
71
72    let (selected, start) = if opts.start_line.is_some() || opts.end_line.is_some() {
73        let s = opts.start_line.unwrap_or(1).max(1);
74        let e = opts.end_line.unwrap_or(total).min(total);
75        let sel: Vec<&str> = all_lines
76            .get(s.saturating_sub(1)..e.min(all_lines.len()))
77            .unwrap_or(&[])
78            .to_vec();
79        (sel, s)
80    } else {
81        (all_lines.clone(), 1usize)
82    };
83
84    if let Some(pattern) = opts.grep.as_deref() {
85        let re = match Regex::new(pattern) {
86            Ok(r) => r,
87            Err(e) => return format!("Error: invalid grep pattern: {e}"),
88        };
89        let ctx = opts.grep_context.unwrap_or(2);
90        let numbered: Vec<(usize, &str)> = selected
91            .iter()
92            .enumerate()
93            .map(|(i, line)| (start + i, *line))
94            .collect();
95        let gr = grep_lines(&numbered, &re, ctx, opts.max_matches);
96        let match_label = if gr.shown < gr.total {
97            format!("showing {} of {} matches", gr.shown, gr.total)
98        } else {
99            format!("{} matches", gr.total)
100        };
101        let header = format!("{file_path}  ({match_label} in {total} lines)");
102        if gr.lines.is_empty() {
103            return header;
104        }
105        let mut text = format!("{header}\n{}", gr.lines.join("\n"));
106        truncate_at_max_chars(&mut text, opts.max_chars, gr.total);
107        return text;
108    }
109
110    let body = selected.join("\n");
111    let mut text = if opts.start_line.is_some() || opts.end_line.is_some() {
112        let s = opts.start_line.unwrap_or(1).max(1);
113        let e = opts.end_line.unwrap_or(total).min(total);
114        format!("{file_path}  (lines {s}-{e} of {total})\n{body}")
115    } else {
116        format!("{file_path}  ({total} lines)\n{body}")
117    };
118    truncate_at_max_chars(&mut text, opts.max_chars, 0);
119    text
120}
121
122struct GrepResult {
123    total: usize,
124    shown: usize,
125    lines: Vec<String>,
126}
127
128/// In-memory grep over (line_number, line_text) pairs.
129fn grep_lines(
130    lines: &[(usize, &str)],
131    re: &Regex,
132    context: usize,
133    max_matches: Option<usize>,
134) -> GrepResult {
135    let mut match_idx: Vec<usize> = Vec::new();
136    for (i, (_, content)) in lines.iter().enumerate() {
137        if re.is_match(content) {
138            match_idx.push(i);
139        }
140    }
141    let total = match_idx.len();
142    let shown_idx = if let Some(cap) = max_matches {
143        match_idx.into_iter().take(cap).collect::<Vec<_>>()
144    } else {
145        match_idx
146    };
147    let shown = shown_idx.len();
148
149    if shown_idx.is_empty() {
150        return GrepResult {
151            total,
152            shown: 0,
153            lines: Vec::new(),
154        };
155    }
156
157    // Build inclusive (start, end) windows for each match, then merge overlapping.
158    let mut windows: Vec<(usize, usize)> = shown_idx
159        .iter()
160        .map(|&i| {
161            (
162                i.saturating_sub(context),
163                (i + context).min(lines.len() - 1),
164            )
165        })
166        .collect();
167    windows.sort_by_key(|w| w.0);
168
169    let mut merged: Vec<(usize, usize)> = Vec::new();
170    for w in windows {
171        if let Some(last) = merged.last_mut() {
172            if w.0 <= last.1 + 1 {
173                last.1 = last.1.max(w.1);
174                continue;
175            }
176        }
177        merged.push(w);
178    }
179
180    let mut out: Vec<String> = Vec::new();
181    for (k, (s, e)) in merged.iter().enumerate() {
182        if k > 0 {
183            out.push("--".to_string());
184        }
185        for &(lineno, text) in lines.iter().take(*e + 1).skip(*s) {
186            out.push(format!("{lineno:>6}: {text}"));
187        }
188    }
189
190    GrepResult {
191        total,
192        shown,
193        lines: out,
194    }
195}
196
197fn truncate_at_max_chars(text: &mut String, max_chars: Option<usize>, total_matches: usize) {
198    let Some(mc) = max_chars else { return };
199    if text.len() <= mc {
200        return;
201    }
202    let mut end = mc;
203    while end > 0 && !text.is_char_boundary(end) {
204        end -= 1;
205    }
206    text.truncate(end);
207    if total_matches > 0 {
208        text.push_str(&format!(
209            "\n\n[... truncated at {mc} chars — {total_matches} matches total]"
210        ));
211    } else {
212        text.push_str(&format!("\n\n[... truncated at {mc} chars]"));
213    }
214}
215
216// ---------------------------------------------------------------------------
217// grep — ripgrep across files
218// ---------------------------------------------------------------------------
219
220#[derive(Debug, Default, Clone)]
221pub struct GrepOpts {
222    pub glob: Option<String>,
223    pub context: usize,
224    pub max_results: Option<usize>,
225    pub case_insensitive: bool,
226}
227
228pub fn grep(allowed_dirs: &[String], pattern: &str, opts: &GrepOpts) -> String {
229    if allowed_dirs.is_empty() {
230        return "Error: no source roots configured.".to_string();
231    }
232    let matcher = match RegexMatcherBuilder::new()
233        .case_insensitive(opts.case_insensitive)
234        .build(pattern)
235    {
236        Ok(m) => m,
237        Err(e) => return format!("Error: invalid regex pattern: {e}"),
238    };
239
240    let primary = PathBuf::from(&allowed_dirs[0]);
241    let mut walker = WalkBuilder::new(&primary);
242    for d in allowed_dirs.iter().skip(1) {
243        walker.add(d);
244    }
245    walker
246        .standard_filters(true)
247        .git_ignore(true)
248        .git_global(true)
249        .git_exclude(true)
250        .hidden(true);
251
252    if let Some(g) = &opts.glob {
253        if !g.is_empty() && g != "*" {
254            let mut overrides = OverrideBuilder::new(&primary);
255            if let Err(e) = overrides.add(g) {
256                return format!("Error: invalid glob pattern '{g}': {e}");
257            }
258            match overrides.build() {
259                Ok(ov) => {
260                    walker.overrides(ov);
261                }
262                Err(e) => return format!("Error: failed to compile glob '{g}': {e}"),
263            }
264        }
265    }
266
267    let mut searcher = SearcherBuilder::new()
268        .before_context(opts.context)
269        .after_context(opts.context)
270        .build();
271
272    let mut output: Vec<String> = Vec::new();
273    let mut total_matches: usize = 0;
274    let cap = opts.max_results;
275
276    'walk: for result in walker.build() {
277        let entry = match result {
278            Ok(e) => e,
279            Err(_) => continue,
280        };
281        if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
282            continue;
283        }
284        let path = entry.path();
285        let mut path_matches: Vec<(u64, String, bool)> = Vec::new();
286        let sink_result = searcher.search_path(
287            &matcher,
288            path,
289            UTF8(|lnum, line| {
290                let hit = matcher.find(line.as_bytes()).ok().flatten().is_some();
291                path_matches.push((lnum, line.trim_end().to_string(), hit));
292                Ok(true)
293            }),
294        );
295        if sink_result.is_err() {
296            continue;
297        }
298        if path_matches.is_empty() {
299            continue;
300        }
301        let rel = path.strip_prefix(&primary).unwrap_or(path);
302        let prefix = rel.display().to_string();
303        for (lnum, content, is_match) in path_matches {
304            let sep = if is_match { ":" } else { "-" };
305            if is_match {
306                total_matches += 1;
307            }
308            output.push(format!("{prefix}{sep}{lnum}{sep}{content}"));
309            if let Some(c) = cap {
310                if total_matches >= c {
311                    break 'walk;
312                }
313            }
314        }
315    }
316
317    if output.is_empty() {
318        return format!("No matches for pattern '{pattern}'.");
319    }
320    let mut text = output.join("\n");
321    if let Some(c) = cap {
322        if total_matches >= c {
323            text.push_str(&format!(
324                "\n\n(showing first {c} matches — pass max_results=None for all)"
325            ));
326        }
327    }
328    text
329}
330
331// ---------------------------------------------------------------------------
332// list_source — directory listing
333// ---------------------------------------------------------------------------
334
335#[derive(Debug, Default, Clone)]
336pub struct ListOpts {
337    pub depth: usize,
338    pub glob: Option<String>,
339    pub dirs_only: bool,
340}
341
342pub fn list_source(target: &Path, primary_root: &Path, opts: &ListOpts) -> String {
343    if !target.exists() {
344        return format!("Error: path '{}' does not exist.", target.display());
345    }
346    if !target.is_dir() {
347        return format!("Error: path '{}' is not a directory.", target.display());
348    }
349
350    let depth = if opts.depth == 0 { 1 } else { opts.depth };
351    let glob_re = opts
352        .glob
353        .as_deref()
354        .map(glob_to_regex)
355        .transpose()
356        .unwrap_or_else(|e| {
357            tracing::warn!("ignoring invalid glob: {e}");
358            None
359        });
360
361    let mut entries: Vec<String> = Vec::new();
362    walk_listing(
363        target,
364        primary_root,
365        opts,
366        glob_re.as_ref(),
367        0,
368        depth,
369        &mut entries,
370    );
371
372    if entries.is_empty() {
373        return format!("No entries in '{}'.", target.display());
374    }
375    entries.join("\n")
376}
377
378fn walk_listing(
379    dir: &Path,
380    primary_root: &Path,
381    opts: &ListOpts,
382    glob_re: Option<&Regex>,
383    current_depth: usize,
384    max_depth: usize,
385    out: &mut Vec<String>,
386) {
387    let read = match fs::read_dir(dir) {
388        Ok(r) => r,
389        Err(_) => return,
390    };
391    let mut children: Vec<_> = read.filter_map(|e| e.ok()).collect();
392    children.sort_by_key(|e| e.file_name());
393
394    for entry in children {
395        let path = entry.path();
396        let is_dir = entry.file_type().map(|t| t.is_dir()).unwrap_or(false);
397        if opts.dirs_only && !is_dir {
398            continue;
399        }
400        if let Some(re) = glob_re {
401            let name = entry.file_name().to_string_lossy().into_owned();
402            if !is_dir && !re.is_match(&name) {
403                continue;
404            }
405        }
406        let rel = path
407            .strip_prefix(primary_root)
408            .unwrap_or(&path)
409            .display()
410            .to_string();
411        let indent = "  ".repeat(current_depth);
412        let suffix = if is_dir { "/" } else { "" };
413        out.push(format!("{indent}{rel}{suffix}"));
414        if is_dir && current_depth + 1 < max_depth {
415            walk_listing(
416                &path,
417                primary_root,
418                opts,
419                glob_re,
420                current_depth + 1,
421                max_depth,
422                out,
423            );
424        }
425    }
426}
427
428/// Translate a shell glob to a regex anchored at start/end.
429fn glob_to_regex(glob: &str) -> Result<Regex, regex::Error> {
430    let mut out = String::with_capacity(glob.len() * 2 + 4);
431    out.push('^');
432    let mut chars = glob.chars().peekable();
433    for c in &mut chars {
434        match c {
435            '*' => out.push_str(".*"),
436            '?' => out.push('.'),
437            '.' | '+' | '(' | ')' | '|' | '^' | '$' | '\\' => {
438                out.push('\\');
439                out.push(c);
440            }
441            other => out.push(other),
442        }
443    }
444    out.push('$');
445    Regex::new(&out)
446}
447
448// ---------------------------------------------------------------------------
449// Path resolution
450// ---------------------------------------------------------------------------
451
452/// Resolve ``file_path`` against the allowed dirs and verify the canonical
453/// path lives under at least one of them. Returns ``None`` when the file
454/// is missing or the path traversal lands outside the sandbox.
455pub fn resolve_under_roots(file_path: &str, allowed_dirs: &[String]) -> Option<PathBuf> {
456    if allowed_dirs.is_empty() {
457        return None;
458    }
459    let canon_dirs: Vec<PathBuf> = allowed_dirs
460        .iter()
461        .filter_map(|d| PathBuf::from(d).canonicalize().ok())
462        .collect();
463
464    for (i, d) in allowed_dirs.iter().enumerate() {
465        let candidate = PathBuf::from(d).join(file_path);
466        if let Ok(canon) = candidate.canonicalize() {
467            if let Some(dir_canon) = canon_dirs.get(i) {
468                if canon.starts_with(dir_canon) && canon.exists() {
469                    return Some(canon);
470                }
471            }
472        }
473    }
474
475    let abs = PathBuf::from(file_path);
476    if let Ok(canon) = abs.canonicalize() {
477        for dir_canon in &canon_dirs {
478            if canon.starts_with(dir_canon) && canon.exists() {
479                return Some(canon);
480            }
481        }
482    }
483    None
484}
485
486/// Resolve a path under the first allowed dir for directory listing.
487/// Differs from [`resolve_under_roots`] in that it accepts directories,
488/// non-existent paths included only after canonicalisation succeeds.
489pub fn resolve_dir_under_roots(path: &str, allowed_dirs: &[String]) -> Option<PathBuf> {
490    if allowed_dirs.is_empty() {
491        return None;
492    }
493    let primary = PathBuf::from(&allowed_dirs[0]);
494    let canon_primary = primary.canonicalize().ok()?;
495    let candidate = if path == "." {
496        canon_primary.clone()
497    } else {
498        primary.join(path).canonicalize().ok()?
499    };
500    let canon_dirs: Vec<PathBuf> = allowed_dirs
501        .iter()
502        .filter_map(|d| PathBuf::from(d).canonicalize().ok())
503        .collect();
504    for d in &canon_dirs {
505        if candidate.starts_with(d) {
506            return Some(candidate);
507        }
508    }
509    None
510}
511
512#[cfg(test)]
513mod tests {
514    use super::*;
515
516    fn make_tree() -> tempfile::TempDir {
517        let dir = tempfile::tempdir().unwrap();
518        std::fs::write(
519            dir.path().join("hello.txt"),
520            "line one\nline two with marker\nline three\n",
521        )
522        .unwrap();
523        std::fs::write(dir.path().join("data.json"), "{\"name\": \"Alice\"}\n").unwrap();
524        std::fs::create_dir_all(dir.path().join("sub")).unwrap();
525        std::fs::write(dir.path().join("sub").join("nested.txt"), "nested file\n").unwrap();
526        dir
527    }
528
529    #[test]
530    fn read_source_full_file() {
531        let dir = make_tree();
532        let roots = vec![dir.path().to_string_lossy().into_owned()];
533        let out = read_source("hello.txt", &roots, &ReadOpts::default());
534        assert!(out.contains("line one"));
535        assert!(out.contains("line three"));
536    }
537
538    #[test]
539    fn read_source_grep_filter() {
540        let dir = make_tree();
541        let roots = vec![dir.path().to_string_lossy().into_owned()];
542        let opts = ReadOpts {
543            grep: Some("marker".to_string()),
544            ..Default::default()
545        };
546        let out = read_source("hello.txt", &roots, &opts);
547        assert!(out.contains("marker"));
548        assert!(out.contains("matches"));
549    }
550
551    #[test]
552    fn read_source_blocks_traversal() {
553        let dir = make_tree();
554        let roots = vec![dir.path().to_string_lossy().into_owned()];
555        let out = read_source("../escape.txt", &roots, &ReadOpts::default());
556        assert!(out.starts_with("Error:"));
557    }
558
559    #[test]
560    fn read_source_line_range() {
561        let dir = make_tree();
562        let roots = vec![dir.path().to_string_lossy().into_owned()];
563        let opts = ReadOpts {
564            start_line: Some(2),
565            end_line: Some(2),
566            ..Default::default()
567        };
568        let out = read_source("hello.txt", &roots, &opts);
569        assert!(out.contains("line two with marker"));
570        assert!(!out.contains("line one"));
571        assert!(!out.contains("line three"));
572    }
573
574    #[test]
575    fn grep_finds_pattern() {
576        let dir = make_tree();
577        let roots = vec![dir.path().to_string_lossy().into_owned()];
578        let out = grep(&roots, "Alice", &GrepOpts::default());
579        assert!(out.contains("data.json"));
580    }
581
582    #[test]
583    fn grep_glob_filter() {
584        let dir = make_tree();
585        std::fs::write(dir.path().join("extra.json"), "marker in json\n").unwrap();
586        let roots = vec![dir.path().to_string_lossy().into_owned()];
587        let opts = GrepOpts {
588            glob: Some("*.txt".to_string()),
589            ..Default::default()
590        };
591        let out = grep(&roots, "marker", &opts);
592        assert!(out.contains("hello.txt"));
593        assert!(!out.contains("extra.json"));
594    }
595
596    #[test]
597    fn grep_no_matches() {
598        let dir = make_tree();
599        let roots = vec![dir.path().to_string_lossy().into_owned()];
600        let out = grep(&roots, "xyznotfound", &GrepOpts::default());
601        assert!(out.contains("No matches"));
602    }
603
604    #[test]
605    fn list_source_root() {
606        let dir = make_tree();
607        let primary = dir.path();
608        let out = list_source(primary, primary, &ListOpts::default());
609        assert!(out.contains("hello.txt"));
610        assert!(out.contains("data.json"));
611    }
612
613    #[test]
614    fn list_source_dirs_only() {
615        let dir = make_tree();
616        let primary = dir.path();
617        let opts = ListOpts {
618            dirs_only: true,
619            depth: 1,
620            ..Default::default()
621        };
622        let out = list_source(primary, primary, &opts);
623        assert!(out.contains("sub"));
624        assert!(!out.contains("hello.txt"));
625    }
626
627    #[test]
628    fn list_source_subdir() {
629        let dir = make_tree();
630        let target = dir.path().join("sub");
631        let out = list_source(&target, dir.path(), &ListOpts::default());
632        assert!(out.contains("nested.txt"));
633    }
634
635    #[test]
636    fn glob_translation() {
637        let re = glob_to_regex("*.py").unwrap();
638        assert!(re.is_match("foo.py"));
639        assert!(!re.is_match("foo.rs"));
640        let re = glob_to_regex("test_*").unwrap();
641        assert!(re.is_match("test_x"));
642        assert!(!re.is_match("xtest"));
643    }
644
645    #[test]
646    fn resolve_blocks_escape() {
647        let dir = make_tree();
648        let outside = tempfile::tempdir().unwrap();
649        std::fs::write(outside.path().join("secret.txt"), "x").unwrap();
650        let roots = vec![dir.path().to_string_lossy().into_owned()];
651        let escape = format!(
652            "../{}/secret.txt",
653            outside.path().file_name().unwrap().to_string_lossy()
654        );
655        assert!(resolve_under_roots(&escape, &roots).is_none());
656    }
657}