vtcode_core/tools/
srgn.rs

1//! Srgn (code surgeon) tool integration for VTCode
2//!
3//! This tool provides access to srgn, a grep-like tool that understands source code
4//! syntax and allows for manipulation in addition to search. It supports various
5//! programming languages and provides precise code modification capabilities.
6//!
7//! ## Supported Languages and Prepared Queries
8//!
9//! ### Rust
10//! - `comments` - Comments (line and block styles; excluding doc comments)
11//! - `doc-comments` - Doc comments (comment chars included)
12//! - `uses` - Use statements (paths only; excl. `use`/`as`/`*`)
13//! - `strings` - Strings (regular, raw, byte; includes interpolation parts)
14//! - `attribute` - Attributes like `#[attr]`
15//! - `struct` - `struct` definitions
16//! - `struct~<PATTERN>` - Structs whose name matches PATTERN
17//! - `enum` - `enum` definitions
18//! - `enum~<PATTERN>` - Enums whose name matches PATTERN
19//! - `fn` - Function definitions
20//! - `fn~<PATTERN>` - Functions whose name matches PATTERN
21//! - `unsafe` - `unsafe` keyword usages
22//!
23//! ### Python
24//! - `comments` - Comments
25//! - `strings` - Strings (raw, byte, f-strings; interpolation not included)
26//! - `imports` - Module names in imports
27//! - `doc-strings` - Docstrings
28//! - `function-names` - Function names at definition site
29//! - `function-calls` - Function calls
30//! - `class` - Class definitions
31//! - `def` - All function definitions
32//! - `methods` - Function definitions inside classes
33//!
34//! ### JavaScript/TypeScript
35//! - `comments` - Comments
36//! - `strings` - Strings (literal, template)
37//! - `imports` - Imports (module specifiers)
38//! - `function` - Function definitions
39//! - `class` - Class definitions
40//! - `interface` - Interface definitions
41//!
42//! ### Go
43//! - `comments` - Comments
44//! - `strings` - Strings (interpreted and raw)
45//! - `imports` - Imports
46//! - `struct` - Struct type definitions
47//! - `struct~<PATTERN>` - Structs whose name matches PATTERN
48//! - `func` - Function definitions
49//! - `func~<PATTERN>` - Functions whose name matches PATTERN
50//!
51//! ### C/C++/C#
52//! - `comments` - Comments
53//! - `strings` - Strings
54//! - `function` - Function definitions
55//! - `struct` - Struct definitions
56//! - `class` - Class definitions
57//!
58//! ### HCL (Terraform)
59//! - `comments` - Comments
60//! - `strings` - Literal strings
61//! - `variable` - Variable blocks
62//! - `resource` - Resource blocks
63//! - `data` - Data blocks
64//!
65//! ## Usage Examples
66//!
67//! ```rust
68//! // Replace println with eprintln in Rust functions
69//! SrgnInput {
70//!     path: "*.rs".to_string(),
71//!     language_scope: Some("rust fn".to_string()),
72//!     scope: Some("println".to_string()),
73//!     replacement: Some("eprintln".to_string()),
74//!     action: SrgnAction::Replace,
75//!     dry_run: true,
76//!     ..Default::default()
77//! }
78//!
79//! // Find all unsafe Rust code
80//! SrgnInput {
81//!     path: "*.rs".to_string(),
82//!     language_scope: Some("rust unsafe".to_string()),
83//!     fail_any: true,
84//!     ..Default::default()
85//! }
86//! ```
87
88use super::traits::{FileTool, Tool};
89use crate::utils::vtcodegitignore::should_exclude_file;
90use anyhow::{Context, Result, anyhow};
91use async_trait::async_trait;
92use serde::{Deserialize, Serialize};
93use serde_json::{Value, json};
94use std::path::{Path, PathBuf};
95use std::process::Stdio;
96use std::time::SystemTime;
97use tokio::process::Command;
98
99/// Input structure for srgn operations
100#[derive(Debug, Deserialize)]
101pub struct SrgnInput {
102    /// File path or glob pattern to operate on
103    pub path: String,
104    /// Scope pattern (regex or literal string)
105    pub scope: Option<String>,
106    /// Replacement string (for replace operations)
107    pub replacement: Option<String>,
108    /// Language-specific scope (e.g., "rust fn", "python class")
109    pub language_scope: Option<String>,
110    /// Action to perform
111    pub action: SrgnAction,
112    /// Whether to use literal string matching instead of regex
113    #[serde(default)]
114    pub literal_string: bool,
115    /// Whether to perform a dry run (show changes without applying)
116    #[serde(default)]
117    pub dry_run: bool,
118    /// Whether to invert the operation (where applicable)
119    #[serde(default)]
120    pub invert: bool,
121    /// Custom tree-sitter query (for advanced users)
122    pub custom_query: Option<String>,
123    /// Custom tree-sitter query from file
124    pub custom_query_file: Option<String>,
125    /// Additional srgn flags
126    pub flags: Option<Vec<String>>,
127    /// Fail if anything matches (for linting/checking)
128    #[serde(default)]
129    pub fail_any: bool,
130    /// Fail if nothing matches
131    #[serde(default)]
132    pub fail_none: bool,
133    /// Join multiple language scopes with OR instead of AND
134    #[serde(default)]
135    pub join_language_scopes: bool,
136    /// Ignore hidden files and directories
137    #[serde(default)]
138    pub hidden: bool,
139    /// Don't ignore .gitignored files
140    #[serde(default)]
141    pub gitignored: bool,
142    /// Process files in sorted order
143    #[serde(default)]
144    pub sorted: bool,
145    /// Number of threads to use (0 = auto)
146    pub threads: Option<usize>,
147    /// Whether to fail if no files are found
148    #[serde(default)]
149    pub fail_no_files: bool,
150    /// German-specific options
151    pub german_options: Option<GermanOptions>,
152}
153
154/// German-specific options for srgn
155#[derive(Debug, Deserialize)]
156pub struct GermanOptions {
157    /// Prefer original spelling when multiple are valid
158    #[serde(default)]
159    pub prefer_original: bool,
160    /// Use naive replacement (don't check word validity)
161    #[serde(default)]
162    pub naive: bool,
163}
164
165/// Available srgn actions
166#[derive(Debug, Deserialize, Serialize)]
167#[serde(rename_all = "snake_case")]
168pub enum SrgnAction {
169    /// Replace content in scope
170    Replace,
171    /// Delete content in scope
172    Delete,
173    /// Convert to uppercase
174    Upper,
175    /// Convert to lowercase
176    Lower,
177    /// Convert to titlecase
178    Titlecase,
179    /// Normalize Unicode
180    Normalize,
181    /// German umlaut substitutions
182    German,
183    /// Symbol substitutions (ASCII art to Unicode)
184    Symbols,
185    /// Squeeze consecutive occurrences
186    Squeeze,
187}
188
189/// Srgn tool implementation
190#[derive(Clone)]
191pub struct SrgnTool {
192    workspace_root: PathBuf,
193}
194
195impl SrgnTool {
196    /// Create a new SrgnTool instance
197    pub fn new(workspace_root: PathBuf) -> Self {
198        Self { workspace_root }
199    }
200
201    /// Build srgn command arguments from input
202    fn build_command_args(&self, input: &SrgnInput) -> Result<Vec<String>> {
203        let mut args = Vec::new();
204
205        // Add global flags first
206        if input.dry_run {
207            args.push("--dry-run".to_string());
208        }
209
210        if input.invert {
211            args.push("--invert".to_string());
212        }
213
214        if input.fail_any {
215            args.push("--fail-any".to_string());
216        }
217
218        if input.fail_none {
219            args.push("--fail-none".to_string());
220        }
221
222        if input.join_language_scopes {
223            args.push("--join-language-scopes".to_string());
224        }
225
226        if input.hidden {
227            args.push("--hidden".to_string());
228        }
229
230        if input.gitignored {
231            args.push("--gitignored".to_string());
232        }
233
234        if input.sorted {
235            args.push("--sorted".to_string());
236        }
237
238        if input.fail_no_files {
239            args.push("--fail-no-files".to_string());
240        }
241
242        if let Some(threads) = input.threads
243            && threads > 0
244        {
245            args.push("--threads".to_string());
246            args.push(threads.to_string());
247        }
248
249        // Add German-specific options
250        if let Some(german_opts) = &input.german_options {
251            if german_opts.prefer_original {
252                args.push("--german-prefer-original".to_string());
253            }
254            if german_opts.naive {
255                args.push("--german-naive".to_string());
256            }
257        }
258
259        // Add file path/glob
260        args.push("--glob".to_string());
261        args.push(input.path.clone());
262
263        // Handle different input combinations for scope
264        match (
265            &input.scope,
266            &input.language_scope,
267            &input.custom_query,
268            &input.custom_query_file,
269        ) {
270            // Custom query from file takes highest precedence
271            (_, _, _, Some(query_file)) => {
272                // Determine language from scope or default to rust
273                let lang = if let Some(lang_scope) = &input.language_scope {
274                    let parts: Vec<String> = lang_scope
275                        .split_whitespace()
276                        .map(|s| s.to_string())
277                        .collect();
278                    parts.first().unwrap_or(&"rust".to_string()).clone()
279                } else {
280                    "rust".to_string()
281                };
282
283                let query_flag = match lang.as_str() {
284                    "rust" | "rs" => "--rust-query-file",
285                    "python" | "py" => "--python-query-file",
286                    "javascript" | "js" | "typescript" | "ts" => "--typescript-query-file",
287                    "go" => "--go-query-file",
288                    "c" => "--c-query-file",
289                    "csharp" | "cs" | "c#" => "--csharp-query-file",
290                    "hcl" => "--hcl-query-file",
291                    _ => {
292                        return Err(anyhow!(
293                            "Unsupported language for custom query file: {}",
294                            lang
295                        ));
296                    }
297                };
298
299                args.push(query_flag.to_string());
300                args.push(query_file.clone());
301            }
302            // Custom query takes precedence
303            (_, _, Some(query), None) => {
304                // Determine language from scope or default to rust
305                let lang = if let Some(lang_scope) = &input.language_scope {
306                    let parts: Vec<String> = lang_scope
307                        .split_whitespace()
308                        .map(|s| s.to_string())
309                        .collect();
310                    parts.first().unwrap_or(&"rust".to_string()).clone()
311                } else {
312                    "rust".to_string()
313                };
314
315                let query_flag = match lang.as_str() {
316                    "rust" | "rs" => "--rust-query",
317                    "python" | "py" => "--python-query",
318                    "javascript" | "js" | "typescript" | "ts" => "--typescript-query",
319                    "go" => "--go-query",
320                    "c" => "--c-query",
321                    "csharp" | "cs" | "c#" => "--csharp-query",
322                    "hcl" => "--hcl-query",
323                    _ => return Err(anyhow!("Unsupported language for custom query: {}", lang)),
324                };
325
326                args.push(query_flag.to_string());
327                args.push(query.clone());
328            }
329            // Language scope takes precedence
330            (_, Some(lang_scope), None, None) => {
331                // Parse language and scope (e.g., "rust fn", "python class", "go struct~Test")
332                let parts: Vec<&str> = lang_scope.split_whitespace().collect();
333                if parts.len() >= 2 {
334                    let lang = parts[0];
335                    let scope = parts[1];
336
337                    // Map language to srgn flag
338                    let lang_flag = match lang {
339                        "rust" | "rs" => "--rust",
340                        "python" | "py" => "--python",
341                        "javascript" | "js" => "--typescript", // srgn uses typescript for js
342                        "typescript" | "ts" => "--typescript",
343                        "go" => "--go",
344                        "c" => "--c",
345                        "csharp" | "cs" | "c#" => "--csharp",
346                        "hcl" => "--hcl",
347                        _ => return Err(anyhow!("Unsupported language: {}", lang)),
348                    };
349
350                    args.push(lang_flag.to_string());
351                    args.push(scope.to_string());
352
353                    // Add additional scope parts if present (for dynamic patterns like "struct~Test")
354                    // The "~" separator is used by srgn for dynamic patterns (e.g., "struct~Test" matches only structs named "Test")
355                    if parts.len() > 2 {
356                        for part in &parts[2..] {
357                            args.push(part.to_string());
358                        }
359                    }
360                } else {
361                    return Err(anyhow!(
362                        "Invalid language scope format. Expected 'language scope' or 'language scope~pattern', got: {}",
363                        lang_scope
364                    ));
365                }
366            }
367            // Regular scope
368            (Some(scope), None, None, None) => {
369                if input.literal_string {
370                    args.push("--literal-string".to_string());
371                }
372                args.push(scope.clone());
373            }
374            // No scope specified
375            (None, None, None, None) => {
376                // Use global scope (empty string)
377                args.push(".*".to_string());
378            }
379        }
380
381        // Add action-specific flags
382        match &input.action {
383            SrgnAction::Replace => {
384                if let Some(replacement) = &input.replacement {
385                    args.push("--".to_string());
386                    args.push(replacement.clone());
387                } else {
388                    return Err(anyhow!("Replacement string required for replace action"));
389                }
390            }
391            SrgnAction::Delete => {
392                args.push("--delete".to_string());
393            }
394            SrgnAction::Upper => {
395                args.push("--upper".to_string());
396            }
397            SrgnAction::Lower => {
398                args.push("--lower".to_string());
399            }
400            SrgnAction::Titlecase => {
401                args.push("--titlecase".to_string());
402            }
403            SrgnAction::Normalize => {
404                args.push("--normalize".to_string());
405            }
406            SrgnAction::German => {
407                args.push("--german".to_string());
408            }
409            SrgnAction::Symbols => {
410                args.push("--symbols".to_string());
411            }
412            SrgnAction::Squeeze => {
413                args.push("--squeeze".to_string());
414            }
415        }
416
417        // Add any additional flags
418        if let Some(flags) = &input.flags {
419            args.extend(flags.clone());
420        }
421
422        Ok(args)
423    }
424
425    /// Sanitize and validate file path within workspace
426    fn validate_path(&self, path: &str) -> Result<PathBuf> {
427        let full_path = self.workspace_root.join(path);
428        let canonical =
429            std::fs::canonicalize(&full_path).with_context(|| format!("Invalid path: {}", path))?;
430        if !canonical.starts_with(&self.workspace_root) {
431            return Err(anyhow!("Path '{}' is outside workspace", path));
432        }
433        Ok(canonical)
434    }
435
436    /// Check if a file was modified by comparing timestamps
437    fn was_file_modified(&self, path: &Path, before_time: SystemTime) -> Result<bool> {
438        let metadata = std::fs::metadata(path)?;
439        let modified_time = metadata.modified()?;
440        Ok(modified_time > before_time)
441    }
442
443    /// Execute srgn command
444    async fn execute_srgn(&self, args: &[String]) -> Result<String> {
445        // For file-modifying operations, capture file paths and timestamps for verification
446        let file_paths: Vec<PathBuf> = args
447            .iter()
448            .filter(|arg| arg.contains('.') && !arg.starts_with('-'))
449            .map(|arg| self.validate_path(arg))
450            .collect::<Result<Vec<_>>>()?;
451        let before_times: Vec<SystemTime> = file_paths
452            .iter()
453            .map(|path| {
454                std::fs::metadata(path)
455                    .and_then(|m| m.modified())
456                    .unwrap_or(SystemTime::UNIX_EPOCH)
457            })
458            .collect();
459
460        let output = Command::new("srgn")
461            .args(args)
462            .current_dir(&self.workspace_root)
463            .stdout(Stdio::piped())
464            .stderr(Stdio::piped())
465            .output()
466            .await
467            .with_context(|| format!("Failed to execute srgn command with args: {:?}", args))?;
468
469        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
470        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
471
472        if !output.status.success() {
473            return Err(anyhow!(
474                "srgn command failed with exit code {}: {}",
475                output.status.code().unwrap_or(-1),
476                stderr.trim()
477            ));
478        }
479
480        // Verify file modifications for non-dry-run operations
481        if !args.contains(&"--dry-run".to_string()) && !file_paths.is_empty() {
482            for (i, path) in file_paths.iter().enumerate() {
483                if !self.was_file_modified(path, before_times[i])? {
484                    return Err(anyhow!(
485                        "File '{}' was not modified as expected",
486                        path.display()
487                    ));
488                }
489            }
490        }
491
492        // Return combined output
493        if stdout.is_empty() {
494            Ok(stderr)
495        } else if stderr.is_empty() {
496            Ok(stdout)
497        } else {
498            Ok(format!("{}\n{}", stdout.trim(), stderr.trim()))
499        }
500    }
501
502    /// Validate input parameters
503    fn validate_input(&self, input: &SrgnInput) -> Result<()> {
504        // Check if path exists or is a valid glob
505        let path = self.workspace_root.join(&input.path);
506        if !path.exists() && !input.path.contains('*') && !input.path.contains('?') {
507            return Err(anyhow!("Path '{}' does not exist", input.path));
508        }
509
510        // Validate action-specific requirements
511        match &input.action {
512            SrgnAction::Replace => {
513                if input.replacement.is_none() {
514                    return Err(anyhow!("Replacement action requires a replacement string"));
515                }
516            }
517            SrgnAction::Delete => {
518                if input.scope.is_none() && input.language_scope.is_none() {
519                    return Err(anyhow!(
520                        "Delete action requires either a scope pattern or language scope"
521                    ));
522                }
523            }
524            _ => {}
525        }
526
527        Ok(())
528    }
529}
530
531#[async_trait]
532impl Tool for SrgnTool {
533    async fn execute(&self, args: Value) -> Result<Value> {
534        let input: SrgnInput = serde_json::from_value(args)
535            .with_context(|| "Failed to parse SrgnInput from arguments")?;
536
537        // Validate input
538        self.validate_input(&input)?;
539
540        // Build command arguments
541        let cmd_args = self.build_command_args(&input)?;
542
543        // Extract potential file paths for git diff confirmation
544        let modified_files: Vec<String> = cmd_args
545            .iter()
546            .filter(|arg| arg.contains('.') && !arg.starts_with('-') && !arg.starts_with('*'))
547            .cloned()
548            .collect();
549
550        // Execute srgn command
551        let output = self.execute_srgn(&cmd_args).await?;
552
553        // Return result with modified files info
554        Ok(json!({
555            "success": true,
556            "output": output,
557            "command": format!("srgn {}", cmd_args.join(" ")),
558            "dry_run": input.dry_run,
559            "modified_files": if input.dry_run { Vec::<String>::new() } else { modified_files }
560        }))
561    }
562
563    fn name(&self) -> &'static str {
564        "srgn"
565    }
566
567    fn description(&self) -> &'static str {
568        "Code surgeon tool for precise source code manipulation using srgn. Supports syntax-aware search and replace operations across multiple programming languages."
569    }
570}
571
572#[async_trait]
573impl FileTool for SrgnTool {
574    fn workspace_root(&self) -> &PathBuf {
575        &self.workspace_root
576    }
577
578    async fn should_exclude(&self, path: &Path) -> bool {
579        should_exclude_file(path).await
580    }
581}