Skip to main content

harness_grep/
schema.rs

1use serde::{Deserialize, Serialize};
2use serde_json::Value;
3use std::collections::HashSet;
4
5use crate::types::GrepOutputMode;
6
7/// Parsed + validated input params. Mirrors `GrepParams` in TS — all
8/// fields optional except `pattern`. The camelCase→snake_case rename at
9/// the wire boundary is handled by serde `rename_all`.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11#[serde(deny_unknown_fields)]
12pub struct GrepParams {
13    pub pattern: String,
14
15    #[serde(default, skip_serializing_if = "Option::is_none")]
16    pub path: Option<String>,
17
18    #[serde(default, skip_serializing_if = "Option::is_none")]
19    pub glob: Option<String>,
20
21    #[serde(default, skip_serializing_if = "Option::is_none")]
22    pub r#type: Option<String>,
23
24    #[serde(default, skip_serializing_if = "Option::is_none")]
25    pub output_mode: Option<GrepOutputMode>,
26
27    #[serde(default, skip_serializing_if = "Option::is_none")]
28    pub case_insensitive: Option<bool>,
29
30    #[serde(default, skip_serializing_if = "Option::is_none")]
31    pub multiline: Option<bool>,
32
33    #[serde(default, skip_serializing_if = "Option::is_none")]
34    pub context_before: Option<usize>,
35
36    #[serde(default, skip_serializing_if = "Option::is_none")]
37    pub context_after: Option<usize>,
38
39    #[serde(default, skip_serializing_if = "Option::is_none")]
40    pub context: Option<usize>,
41
42    #[serde(default, skip_serializing_if = "Option::is_none")]
43    pub head_limit: Option<usize>,
44
45    #[serde(default, skip_serializing_if = "Option::is_none")]
46    pub offset: Option<usize>,
47}
48
49/// Errors produced by the parsing/validation layer. These always end up
50/// as `INVALID_PARAM` with the `.message()` rendered into the tool error.
51#[derive(Debug, Clone, thiserror::Error)]
52pub enum GrepParseError {
53    #[error("{0}")]
54    Message(String),
55}
56
57/// Aliases we've observed models send when they meant a different
58/// parameter. Mirrors the TS `KNOWN_PARAM_ALIASES` table — same coverage,
59/// same wording so the LLM sees identical hints regardless of language
60/// binding.
61fn known_alias_hint(key: &str) -> Option<&'static str> {
62    match key {
63        "content" => Some(
64            "unknown parameter 'content'. Did you mean 'context' (lines around a match)? If you wanted matching lines back, set output_mode: 'content' instead.",
65        ),
66        "regex" => Some("unknown parameter 'regex'. Use 'pattern' instead."),
67        "query" => Some("unknown parameter 'query'. Use 'pattern' instead."),
68        "mode" => Some("unknown parameter 'mode'. Use 'output_mode' instead."),
69        "output" => Some("unknown parameter 'output'. Use 'output_mode' instead."),
70        "filter" => Some("unknown parameter 'filter'. Use 'glob' or 'type' instead."),
71        "file_type" => Some("unknown parameter 'file_type'. Use 'type' instead."),
72        "glob_pattern" => Some("unknown parameter 'glob_pattern'. Use 'glob' instead."),
73        "pattern_glob" => Some("unknown parameter 'pattern_glob'. Use 'glob' instead."),
74        "ignore_case" => Some("unknown parameter 'ignore_case'. Use 'case_insensitive' instead."),
75        "insensitive" => Some("unknown parameter 'insensitive'. Use 'case_insensitive' instead."),
76        "cwd" => Some("unknown parameter 'cwd'. Use 'path' instead."),
77        "dir" => Some("unknown parameter 'dir'. Use 'path' instead."),
78        "directory" => Some("unknown parameter 'directory'. Use 'path' instead."),
79        "max_results" => Some(
80            "unknown parameter 'max_results'. Use 'head_limit' instead (default 250).",
81        ),
82        "max_count" => Some(
83            "unknown parameter 'max_count'. Use 'head_limit' instead (default 250).",
84        ),
85        "limit" => Some("unknown parameter 'limit'. Use 'head_limit' instead (default 250)."),
86        "skip" => Some("unknown parameter 'skip'. Use 'offset' instead."),
87        "before" => Some("unknown parameter 'before'. Use 'context_before' instead."),
88        "after" => Some("unknown parameter 'after'. Use 'context_after' instead."),
89        _ => None,
90    }
91}
92
93/// Accepted (canonical) field names. Any key not in this set AND not in
94/// the alias table bubbles up as a generic `unknown field` error via
95/// serde's `deny_unknown_fields`.
96fn canonical_fields() -> HashSet<&'static str> {
97    [
98        "pattern",
99        "path",
100        "glob",
101        "type",
102        "output_mode",
103        "case_insensitive",
104        "multiline",
105        "context_before",
106        "context_after",
107        "context",
108        "head_limit",
109        "offset",
110    ]
111    .into_iter()
112    .collect()
113}
114
115pub fn safe_parse_grep_params(
116    input: &Value,
117) -> Result<GrepParams, GrepParseError> {
118    // Pre-check: scan the input object for known aliases and return a
119    // targeted hint rather than the generic `unknown field` serde error.
120    if let Some(obj) = input.as_object() {
121        let canonical = canonical_fields();
122        let mut alias_hints: Vec<String> = Vec::new();
123        let mut unknown_unhinted: Vec<String> = Vec::new();
124        for key in obj.keys() {
125            if canonical.contains(key.as_str()) {
126                continue;
127            }
128            if let Some(hint) = known_alias_hint(key.as_str()) {
129                alias_hints.push(hint.to_string());
130            } else {
131                unknown_unhinted.push(format!("unknown parameter '{}'.", key));
132            }
133        }
134        if !alias_hints.is_empty() || !unknown_unhinted.is_empty() {
135            let mut msgs = alias_hints;
136            msgs.extend(unknown_unhinted);
137            return Err(GrepParseError::Message(msgs.join("; ")));
138        }
139    }
140
141    // Delegate to serde for type/value validation.
142    let parsed: GrepParams = serde_json::from_value(input.clone())
143        .map_err(|e| GrepParseError::Message(normalize_serde_error(&e.to_string())))?;
144
145    // Minimal post-parse invariants.
146    if parsed.pattern.is_empty() {
147        return Err(GrepParseError::Message("pattern is required".to_string()));
148    }
149    if let Some(hl) = parsed.head_limit {
150        if hl == 0 {
151            return Err(GrepParseError::Message(
152                "head_limit must be >= 1".to_string(),
153            ));
154        }
155    }
156    Ok(parsed)
157}
158
159pub fn parse_grep_params(input: &Value) -> Result<GrepParams, GrepParseError> {
160    safe_parse_grep_params(input)
161}
162
163/// Rust's serde errors on enum mismatch look like `unknown variant
164/// `foo`, expected one of ...`. Rewrite to the wording the TS tool
165/// emits so models don't have to learn a second dialect.
166fn normalize_serde_error(msg: &str) -> String {
167    if msg.contains("unknown variant")
168        && msg.contains("output_mode")
169        && msg.contains("files_with_matches")
170    {
171        return "output_mode must be one of: files_with_matches, content, count".to_string();
172    }
173    msg.to_string()
174}
175
176// ---- Tool definition strings (for MCP / OpenAI function-schema wiring) ----
177
178pub const GREP_TOOL_NAME: &str = "grep";
179
180pub const GREP_TOOL_DESCRIPTION: &str =
181    "Search file contents with a ripgrep-compatible regex and return structured results.\n\n\
182    Usage:\n\
183    - pattern is required. Regex syntax is ripgrep's (Rust regex). Escape literal metacharacters: use 'interface\\\\{\\\\}' to match 'interface{}'. '.' does not match newlines unless multiline: true.\n\
184    - path defaults to the session cwd. Absolute paths preferred; relative paths resolve against cwd.\n\
185    - Filter by the 'glob' parameter (e.g. '*.ts', '*.{js,tsx}') or by 'type' (e.g. 'js', 'py', 'rust'). 'type' takes ONE name only — for multiple extensions, use 'glob' with a brace list like '*.{ts,tsx,js}'. 'type' is more efficient for standard languages.\n\
186    - Default output_mode is 'files_with_matches' — cheap path-only results. Use this first to decide whether to pay for content.\n\
187    - output_mode 'content' returns matching lines grouped by file, newest-first. Context lines come from context_before / context_after / context (-C sets both). Context is only valid with content mode.\n\
188    - output_mode 'count' returns per-file match counts, alphabetical path order.\n\
189    - Results are capped at head_limit (default 250). Use offset to page: next_offset = previous_offset + returned_count.\n\
190    - .gitignore, .ignore, and .rgignore are respected. Hidden files are skipped. node_modules, .git, and other ignored paths will not appear.\n\
191    - Binary files are skipped. Files larger than 5 MB are skipped.\n\
192    - Call in parallel for independent searches. Prefer this tool over Bash(grep/rg).";