Skip to main content

mcp_methods/
files.rs

1//! Safe file reading with allowed-dir sandbox + optional grep / section /
2//! row / line-range slicing.
3//!
4//! Pure Rust — Python bindings are in the sibling `mcp-methods-py`
5//! crate and wrap [`read_file`] with the legacy `transform=…` /
6//! `section=…` keyword arguments. The wrapper translates a Python
7//! callable into the `&dyn Fn(&str) -> String` slot on
8//! [`ReadFileOpts::transform`].
9
10use regex::Regex;
11use std::path::PathBuf;
12
13/// Optional knobs for [`read_file`]. Default-constructible.
14#[derive(Default)]
15pub struct ReadFileOpts<'a> {
16    /// Extract an HTML element by `id` attribute (returns the balanced
17    /// open/close fragment).
18    pub section: Option<&'a str>,
19    /// Slice the file to lines `start_line..=end_line` (1-indexed).
20    pub start_line: Option<usize>,
21    pub end_line: Option<usize>,
22    /// CSV-style row slicing: `(start, end)` zero-indexed against the
23    /// data rows (after the header).
24    pub rows: Option<(usize, usize)>,
25    /// Cap the output at this many characters.
26    pub max_chars: Option<usize>,
27    /// Apply the built-in HTML → markdown transform via
28    /// [`crate::html::html_to_text_impl`].
29    pub html_transform: bool,
30    /// Apply a caller-supplied transform to the raw file content (run
31    /// before section/grep). Used by the Python wrapper to bridge
32    /// `transform=callable` to a Rust closure that re-enters Python.
33    pub transform: Option<&'a dyn Fn(&str) -> String>,
34    /// Filter selected lines to those matching the regex (within the
35    /// selected line range / section).
36    pub grep: Option<&'a str>,
37    /// Lines of context around each grep match (default 2).
38    pub grep_context: Option<usize>,
39    /// Cap the number of matches returned.
40    pub max_matches: Option<usize>,
41}
42
43/// Return type for grep_lines: total matches found, matches shown, formatted lines.
44struct GrepResult {
45    total: usize,
46    shown: usize,
47    lines: Vec<String>,
48}
49
50fn grep_lines(
51    lines: &[(usize, &str)],
52    re: &Regex,
53    context: usize,
54    max_matches: Option<usize>,
55) -> GrepResult {
56    let match_indices: Vec<usize> = lines
57        .iter()
58        .enumerate()
59        .filter(|(_, (_, content))| re.is_match(content))
60        .map(|(i, _)| i)
61        .collect();
62
63    let total = match_indices.len();
64
65    if match_indices.is_empty() {
66        return GrepResult {
67            total: 0,
68            shown: 0,
69            lines: Vec::new(),
70        };
71    }
72
73    let used = match max_matches {
74        Some(limit) => &match_indices[..limit.min(total)],
75        None => &match_indices[..],
76    };
77    let shown = used.len();
78
79    let mut windows: Vec<(usize, usize)> = Vec::new();
80    for &mi in used {
81        let start = mi.saturating_sub(context);
82        let end = (mi + context + 1).min(lines.len());
83        if let Some(last) = windows.last_mut() {
84            if start <= last.1 {
85                last.1 = last.1.max(end);
86                continue;
87            }
88        }
89        windows.push((start, end));
90    }
91
92    let mut output: Vec<String> = Vec::new();
93    for (wi, (start, end)) in windows.iter().enumerate() {
94        if wi > 0 {
95            output.push("--".to_string());
96        }
97        for &(line_num, content) in &lines[*start..*end] {
98            output.push(format!("{:>5}  {}", line_num, content));
99        }
100    }
101
102    GrepResult {
103        total,
104        shown,
105        lines: output,
106    }
107}
108
109/// Extract an HTML element by its `id` attribute, returning the full element
110/// from opening tag to its balanced closing tag.
111///
112/// Returns `None` if no element with the given id is found.
113fn extract_section(html: &str, section_id: &str) -> Option<String> {
114    let id_attr = format!("id=\"{}\"", section_id);
115    let pos = html.find(&id_attr)?;
116    let tag_start = html[..pos].rfind('<')?;
117    let after_lt = &html[tag_start + 1..];
118    let tag_name: String = after_lt
119        .chars()
120        .take_while(|c| c.is_ascii_alphanumeric() || *c == '-' || *c == '_')
121        .collect();
122    if tag_name.is_empty() {
123        return None;
124    }
125
126    let open_tag = format!("<{}", tag_name);
127    let close_tag = format!("</{}>", tag_name);
128
129    let mut depth: usize = 0;
130    let mut i = tag_start;
131    let bytes = html.as_bytes();
132    let len = bytes.len();
133
134    let open_bytes = open_tag.as_bytes();
135    let close_bytes = close_tag.as_bytes();
136
137    while i < len {
138        if i + open_bytes.len() <= len
139            && &bytes[i..i + open_bytes.len()] == open_bytes
140            && (i + open_bytes.len() == len || !bytes[i + open_bytes.len()].is_ascii_alphanumeric())
141        {
142            depth += 1;
143            i += open_bytes.len();
144        } else if i + close_bytes.len() <= len && &bytes[i..i + close_bytes.len()] == close_bytes {
145            depth -= 1;
146            if depth == 0 {
147                return Some(html[tag_start..i + close_bytes.len()].to_string());
148            }
149            i += close_bytes.len();
150        } else {
151            i += 1;
152        }
153    }
154
155    Some(html[tag_start..].to_string())
156}
157
158/// Read a file with path-traversal protection.
159///
160/// Returns the file content as a formatted string with line numbers.
161/// Every code path returns a status string — invalid path, read failure,
162/// successful content — all surface as `String`; pyo3 wrappers convert
163/// to `Py<str>` automatically.
164pub fn read_file(file_path: &str, allowed_dirs: &[String], opts: &ReadFileOpts) -> String {
165    let canon_dirs: Vec<PathBuf> = allowed_dirs
166        .iter()
167        .filter_map(|d| PathBuf::from(d).canonicalize().ok())
168        .collect();
169
170    let mut resolved: Option<PathBuf> = None;
171
172    for (i, d) in allowed_dirs.iter().enumerate() {
173        let candidate = PathBuf::from(d).join(file_path);
174        if let Ok(canon) = candidate.canonicalize() {
175            if let Some(dir_canon) = canon_dirs.get(i) {
176                if canon.starts_with(dir_canon) && canon.exists() {
177                    resolved = Some(canon);
178                    break;
179                }
180            }
181        }
182    }
183
184    if resolved.is_none() {
185        let abs_path = PathBuf::from(file_path);
186        if let Ok(canon) = abs_path.canonicalize() {
187            for dir_canon in &canon_dirs {
188                if canon.starts_with(dir_canon) && canon.exists() {
189                    resolved = Some(canon);
190                    break;
191                }
192            }
193        }
194    }
195
196    let resolved = match resolved {
197        Some(p) => p,
198        None => {
199            return format!("Error: file not found or access denied: {}", file_path);
200        }
201    };
202
203    let raw = match std::fs::read_to_string(&resolved) {
204        Ok(s) => s,
205        Err(e) => return format!("Error reading file: {}", e),
206    };
207
208    // Apply caller-supplied transform first (e.g. Python callable via the
209    // wrapper crate). HTML transform is a flag, applied below.
210    let raw = if let Some(tf) = opts.transform {
211        tf(&raw)
212    } else {
213        raw
214    };
215
216    // HTML section extraction by id
217    if let Some(sid) = opts.section {
218        return match extract_section(&raw, sid) {
219            Some(fragment) => {
220                let fragment = if opts.html_transform {
221                    crate::html::html_to_text_impl(&fragment)
222                } else {
223                    fragment
224                };
225
226                if let Some(pattern) = opts.grep {
227                    let re = match Regex::new(pattern) {
228                        Ok(r) => r,
229                        Err(e) => return format!("Error: invalid grep pattern: {}", e),
230                    };
231                    let ctx = opts.grep_context.unwrap_or(2);
232                    let section_lines: Vec<&str> = fragment.lines().collect();
233                    let section_total = section_lines.len();
234                    let numbered: Vec<(usize, &str)> = section_lines
235                        .iter()
236                        .enumerate()
237                        .map(|(i, line)| (i + 1, *line))
238                        .collect();
239
240                    let gr = grep_lines(&numbered, &re, ctx, opts.max_matches);
241
242                    let match_label = if gr.shown < gr.total {
243                        format!("showing {} of {} matches", gr.shown, gr.total)
244                    } else {
245                        format!("{} matches", gr.total)
246                    };
247                    let header = format!(
248                        "{}  section '{}'  ({} in {} lines)",
249                        file_path, sid, match_label, section_total
250                    );
251
252                    if gr.lines.is_empty() {
253                        return header;
254                    }
255
256                    let mut text = format!("{}\n{}", header, gr.lines.join("\n"));
257
258                    if let Some(mc) = opts.max_chars {
259                        if text.len() > mc {
260                            let mut end = mc;
261                            while end > 0 && !text.is_char_boundary(end) {
262                                end -= 1;
263                            }
264                            text.truncate(end);
265                            text.push_str(&format!(
266                                "\n\n[... truncated at {} chars — {} matches total]",
267                                mc, gr.total
268                            ));
269                        }
270                    }
271
272                    return text;
273                }
274
275                let mut fragment = fragment;
276                if let Some(mc) = opts.max_chars {
277                    if fragment.len() > mc {
278                        let mut end = mc;
279                        while end > 0 && !fragment.is_char_boundary(end) {
280                            end -= 1;
281                        }
282                        fragment.truncate(end);
283                        fragment.push_str(&format!("\n\n[... truncated at {} chars]", mc));
284                    }
285                }
286                fragment
287            }
288            None => format!("Error: section '{}' not found in {}", sid, file_path),
289        };
290    }
291
292    if let Some((row_start, row_end)) = opts.rows {
293        let all_lines: Vec<&str> = raw.lines().collect();
294        let header = all_lines.first().copied().unwrap_or("");
295        let start = row_start + 1;
296        let end = row_end + 2;
297        let selected: Vec<&str> = all_lines
298            .get(start..end.min(all_lines.len()))
299            .unwrap_or(&[])
300            .to_vec();
301        let mut text = format!("{}\n{}", header, selected.join("\n"));
302        let total_data_rows = if all_lines.is_empty() {
303            0
304        } else {
305            all_lines.len() - 1
306        };
307        text.push_str(&format!(
308            "\n\n[rows {}-{} of {} total]",
309            row_start, row_end, total_data_rows
310        ));
311        if let Some(mc) = opts.max_chars {
312            if text.len() > mc {
313                let mut end = mc;
314                while end > 0 && !text.is_char_boundary(end) {
315                    end -= 1;
316                }
317                text.truncate(end);
318                text.push_str(&format!("\n\n[... truncated at {} chars]", mc));
319            }
320        }
321        return text;
322    }
323
324    let raw = if opts.html_transform {
325        crate::html::html_to_text_impl(&raw)
326    } else {
327        raw
328    };
329
330    let all_lines: Vec<&str> = raw.lines().collect();
331    let total = all_lines.len();
332
333    let (selected, s, e) = if opts.start_line.is_some() || opts.end_line.is_some() {
334        let s = opts.start_line.unwrap_or(1).max(1);
335        let e = opts.end_line.unwrap_or(total).min(total);
336        let sel: Vec<&str> = all_lines
337            .get(s.saturating_sub(1)..e.min(all_lines.len()))
338            .unwrap_or(&[])
339            .to_vec();
340        (sel, s, e)
341    } else {
342        (all_lines.clone(), 1, total)
343    };
344
345    if let Some(pattern) = opts.grep {
346        let re = match Regex::new(pattern) {
347            Ok(r) => r,
348            Err(e) => return format!("Error: invalid grep pattern: {}", e),
349        };
350        let ctx = opts.grep_context.unwrap_or(2);
351
352        let numbered_lines: Vec<(usize, &str)> = selected
353            .iter()
354            .enumerate()
355            .map(|(i, line)| (s + i, *line))
356            .collect();
357
358        let gr = grep_lines(&numbered_lines, &re, ctx, opts.max_matches);
359
360        let match_label = if gr.shown < gr.total {
361            format!("showing {} of {} matches", gr.shown, gr.total)
362        } else {
363            format!("{} matches", gr.total)
364        };
365        let header = format!("{}  ({} in {} lines)", file_path, match_label, total);
366
367        if gr.lines.is_empty() {
368            return header;
369        }
370
371        let mut text = format!("{}\n{}", header, gr.lines.join("\n"));
372
373        if let Some(mc) = opts.max_chars {
374            if text.len() > mc {
375                let mut end = mc;
376                while end > 0 && !text.is_char_boundary(end) {
377                    end -= 1;
378                }
379                text.truncate(end);
380                text.push_str(&format!(
381                    "\n\n[... truncated at {} chars — {} matches, {} chars total]",
382                    mc,
383                    gr.total,
384                    raw.len()
385                ));
386            }
387        }
388
389        return text;
390    }
391
392    let numbered: Vec<String> = selected
393        .iter()
394        .enumerate()
395        .map(|(i, line)| format!("{:>5}  {}", s + i, line))
396        .collect();
397
398    let header = if opts.start_line.is_some() || opts.end_line.is_some() {
399        format!(
400            "{}:{}-{}  ({} of {} lines)",
401            file_path,
402            s,
403            e,
404            e - s + 1,
405            total
406        )
407    } else {
408        format!("{}  ({} lines)", file_path, total)
409    };
410
411    let mut text = format!("{}\n{}", header, numbered.join("\n"));
412
413    if let Some(mc) = opts.max_chars {
414        if text.len() > mc {
415            let mut end = mc;
416            while end > 0 && !text.is_char_boundary(end) {
417                end -= 1;
418            }
419            text.truncate(end);
420            text.push_str(&format!(
421                "\n\n[... truncated at {} chars — {} total]",
422                mc,
423                raw.len()
424            ));
425        }
426    }
427
428    text
429}