Skip to main content

vtcode_core/tools/
autonomous_executor.rs

1//! Autonomous tool execution with safety checks
2//!
3//! Implements safe autonomous execution following AGENTS.md principles:
4//! - Act, don't ask (for safe operations)
5//! - Verify before destructive operations
6//! - Loop detection and prevention
7//! - Context-aware decision making
8
9use crate::command_safety::shell_string_might_be_dangerous;
10use crate::config::constants::tools;
11use crate::core::loop_detector::LoopDetector;
12use crate::tools::apply_patch::decode_apply_patch_input;
13use crate::tools::command_args::{command_text, interactive_input_text};
14use crate::tools::tool_intent::{
15    self, classify_tool_intent, unified_exec_action, unified_exec_action_in,
16    unified_file_action_in, unified_file_action_is, unified_search_action_is,
17};
18use anyhow::{Context, Result};
19use hashbrown::{HashMap, HashSet};
20use serde_json::Value;
21use std::collections::VecDeque;
22use std::path::{Component, Path, PathBuf};
23use std::sync::{Arc, RwLock};
24use std::time::{Duration, Instant};
25use tracing::warn;
26
27/// Tools that require verification before execution
28const VERIFICATION_REQUIRED_TOOLS: &[&str] = &[
29    tools::WRITE_FILE,
30    tools::EDIT_FILE,
31    tools::UNIFIED_EXEC,
32    tools::CREATE_PTY_SESSION,
33];
34
35/// Autonomous execution policy for a tool
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum AutonomousPolicy {
38    /// Execute immediately without asking
39    AutoExecute,
40    /// Show dry-run/preview, then execute
41    VerifyThenExecute,
42    /// Always require explicit user confirmation
43    RequireConfirmation,
44}
45
46/// Execution statistics for a tool
47#[derive(Debug, Clone, Default)]
48struct ToolStats {
49    total_attempts: usize,
50    successful_executions: usize,
51    failed_executions: usize,
52}
53
54impl ToolStats {
55    fn success_rate(&self) -> f64 {
56        if self.total_attempts == 0 {
57            0.0
58        } else {
59            self.successful_executions as f64 / self.total_attempts as f64
60        }
61    }
62}
63
64use crate::tools::circuit_breaker::CircuitBreaker;
65use crate::tools::validation::paths::{validate_non_root_listing_path, validate_path_safety};
66use crate::utils::path::{normalize_path, resolve_workspace_path};
67
68/// Autonomous tool executor with safety checks.
69///
70/// In the unified interactive VT Code runloop, higher-level turn code owns
71/// user-visible loop recovery. The loop detector here remains a generic
72/// safeguard for legacy and non-unified autonomous execution paths.
73///
74/// # Rate Limiting
75///
76/// This executor maintains its own sliding-window rate limiter (`rate_history`)
77/// for **policy decisions** (auto-execute vs require confirmation). It does NOT
78/// consume tokens -- it only checks whether the tool has been called too
79/// frequently in the recent window.
80///
81/// The separate `PER_TOOL_RATE_LIMITER` (token-bucket) in `rate_limiter.rs`
82/// handles **execution blocking** at the executor level. The `SafetyGateway`
83/// rate limiter is disabled by the runloop in favor of these two systems.
84pub struct AutonomousExecutor {
85    verification_tools: HashSet<String>,
86    loop_detector: Arc<RwLock<LoopDetector>>,
87    execution_stats: Arc<RwLock<HashMap<String, ToolStats>>>,
88    workspace_dir: Option<PathBuf>,
89    rate_limit_window: Duration,
90    rate_limit_max_calls: usize,
91    rate_history: Arc<RwLock<HashMap<String, VecDeque<Instant>>>>,
92    circuit_breaker: CircuitBreaker,
93}
94
95impl AutonomousExecutor {
96    #[inline]
97    fn canonical_tool_key(tool_name: &str) -> &str {
98        tool_intent::canonical_unified_exec_tool_name(tool_name).unwrap_or(tool_name)
99    }
100
101    #[inline]
102    fn is_command_session_tool(tool_name: &str) -> bool {
103        tool_intent::canonical_unified_exec_tool_name(tool_name).is_some()
104    }
105
106    #[inline]
107    fn is_command_session_run(tool_name: &str, args: &Value) -> bool {
108        tool_intent::is_command_run_tool_call(tool_name, args)
109            || (tool_name == tools::UNIFIED_EXEC && unified_exec_action(args).is_none())
110    }
111
112    pub fn new() -> Self {
113        Self::with_loop_detector(Arc::new(RwLock::new(LoopDetector::new())))
114    }
115
116    pub fn with_loop_detector(loop_detector: Arc<RwLock<LoopDetector>>) -> Self {
117        Self {
118            verification_tools: VERIFICATION_REQUIRED_TOOLS
119                .iter()
120                .map(|s| s.to_string())
121                .collect(),
122            loop_detector,
123            execution_stats: Arc::new(RwLock::new(HashMap::new())),
124            workspace_dir: std::env::var("WORKSPACE_DIR")
125                .ok()
126                .map(PathBuf::from)
127                .or_else(|| std::env::current_dir().ok()),
128            rate_limit_window: Duration::from_secs(10),
129            rate_limit_max_calls: 5,
130            rate_history: Arc::new(RwLock::new(HashMap::new())),
131            circuit_breaker: CircuitBreaker::default(),
132        }
133    }
134
135    /// Set workspace directory for boundary validation
136    pub fn set_workspace_dir(&mut self, dir: PathBuf) {
137        self.workspace_dir = Some(dir);
138    }
139
140    /// Configure loop detection thresholds
141    pub fn configure_loop_limits(&self, limits: &HashMap<String, usize>) {
142        if let Ok(mut detector) = self.loop_detector.write() {
143            for (tool, limit) in limits {
144                detector.set_tool_limit(Self::canonical_tool_key(tool), *limit);
145            }
146        } else {
147            tracing::warn!("Failed to acquire loop detector lock for configuration");
148        }
149    }
150
151    pub fn set_loop_limit(&self, tool_name: &str, limit: usize) {
152        let tool_key = Self::canonical_tool_key(tool_name);
153        if let Ok(mut detector) = self.loop_detector.write() {
154            detector.set_tool_limit(tool_key, limit);
155        } else {
156            tracing::warn!("Failed to acquire loop detector lock for configuration");
157        }
158    }
159
160    pub fn is_hard_limit_exceeded(&self, tool_name: &str) -> bool {
161        let tool_key = Self::canonical_tool_key(tool_name);
162        self.loop_detector
163            .read()
164            .map(|detector| detector.is_hard_limit_exceeded(tool_key))
165            .unwrap_or(false)
166    }
167
168    /// Reset loop-detection streaks at the start of a new turn.
169    pub fn reset_turn_loop_detection(&self) {
170        if let Ok(mut detector) = self.loop_detector.write() {
171            detector.reset();
172        } else {
173            tracing::warn!("Failed to acquire loop detector lock for turn reset");
174        }
175    }
176
177    /// Determine execution policy for a tool
178    pub fn get_policy(&self, tool_name: &str, args: &Value) -> AutonomousPolicy {
179        if self.is_destructive_operation(tool_name, args) {
180            return AutonomousPolicy::RequireConfirmation;
181        }
182
183        if !classify_tool_intent(tool_name, args).mutating {
184            return AutonomousPolicy::AutoExecute;
185        }
186
187        if self.requires_preview(tool_name, args) {
188            return AutonomousPolicy::VerifyThenExecute;
189        }
190
191        AutonomousPolicy::RequireConfirmation
192    }
193
194    /// Check if tool should be blocked due to loop detection or circuit breaker
195    /// Returns Some(message) if blocked, None if allowed
196    pub fn should_block(&self, tool_name: &str, _args: &Value) -> Option<String> {
197        let tool_key = Self::canonical_tool_key(tool_name);
198
199        // Check circuit breaker first (fail fast)
200        if !self.circuit_breaker.allow_request_for_tool(tool_key) {
201            return Some(format!(
202                "Tool '{}' blocked by circuit breaker due to repeated failures. \
203                 Cooling down before retrying.",
204                tool_key
205            ));
206        }
207
208        if self.is_rate_limited(tool_key) {
209            return Some(format!(
210                "Tool '{}' temporarily blocked: rate limit exceeded ({} calls in {:?}).",
211                tool_key, self.rate_limit_max_calls, self.rate_limit_window
212            ));
213        }
214
215        // Use try_read to avoid blocking on contested locks
216        match self.loop_detector.try_read() {
217            Ok(detector) => {
218                // Check if hard limit already exceeded
219                if detector.is_hard_limit_exceeded(tool_key) {
220                    return Some(format!(
221                        "Tool '{}' blocked: hard limit exceeded. Agent is stuck in a loop.",
222                        tool_key
223                    ));
224                }
225
226                // Check call count and provide early warning
227                let count = detector.get_call_count(tool_key);
228                if count >= 3
229                    && let Some(suggestion) = detector.suggest_alternative(tool_key)
230                {
231                    return Some(format!(
232                        "Tool '{}' called {} times. Consider alternative approach:\n{}",
233                        tool_key, count, suggestion
234                    ));
235                }
236            }
237            Err(_) => {
238                // If we can't get the lock, don't block execution
239                tracing::debug!("Could not acquire loop detector read lock for {}", tool_key);
240            }
241        }
242        None
243    }
244
245    /// Record tool call in loop detector
246    /// Returns warning message if loop detected
247    pub fn record_tool_call(&self, tool_name: &str, args: &Value) -> Option<String> {
248        let tool_key = Self::canonical_tool_key(tool_name);
249        self.record_rate_history(tool_key);
250        if let Ok(mut detector) = self.loop_detector.write() {
251            detector.record_call(tool_key, args)
252        } else {
253            None
254        }
255    }
256
257    /// Check if operation is destructive based on tool and arguments
258    fn is_destructive_operation(&self, tool_name: &str, args: &Value) -> bool {
259        match tool_name {
260            tools::APPLY_PATCH | tools::DELETE_FILE => true,
261            tools::UNIFIED_FILE => unified_file_action_in(args, &["patch", "delete"]),
262            _ if Self::is_command_session_run(tool_name, args) => command_text(args)
263                .ok()
264                .flatten()
265                .is_some_and(|cmd| self.is_destructive_command(&cmd)),
266            _ if Self::is_command_session_tool(tool_name)
267                && unified_exec_action_in(args, &["write", "continue"]) =>
268            {
269                interactive_input_text(args).is_some_and(|input| self.is_destructive_command(input))
270            }
271            _ => false,
272        }
273    }
274
275    /// Check if shell command is destructive
276    fn is_destructive_command(&self, cmd: &str) -> bool {
277        if shell_string_might_be_dangerous(cmd) {
278            return true;
279        }
280
281        let cmd_lower = cmd.to_lowercase();
282
283        // Additional destructive patterns that are not captured by the centralized
284        // command safety evaluator.
285        let supplemental_patterns = [
286            "truncate",
287            "> /dev/",
288            "dd if=",
289            "mkfs",
290            "fdisk",
291            "format",
292            // Overwrite operations
293            ">/",
294            "2>/",
295            // Package managers (potentially destructive)
296            "npm uninstall -g",
297            "cargo uninstall",
298            "pip uninstall",
299            // Permissions
300            "chmod -r",
301            "chown -r",
302        ];
303
304        supplemental_patterns
305            .iter()
306            .any(|pattern| cmd_lower.contains(pattern))
307    }
308
309    /// Validate tool arguments for safety
310    pub fn validate_args(&self, tool_name: &str, args: &Value) -> Result<()> {
311        match tool_name {
312            tools::WRITE_FILE | tools::EDIT_FILE => self.validate_file_path(args.get("path"))?,
313            _ if Self::is_command_session_run(tool_name, args) => {
314                self.validate_command_text(
315                    &command_text(args)
316                        .map_err(anyhow::Error::msg)?
317                        .context("Missing or invalid 'command' argument")?,
318                )?;
319            }
320            _ if Self::is_command_session_tool(tool_name)
321                && unified_exec_action_in(args, &["write", "continue"]) =>
322            {
323                if let Some(input) = interactive_input_text(args) {
324                    self.validate_command_text(input)?;
325                }
326            }
327            tools::UNIFIED_FILE if unified_file_action_in(args, &["write", "edit", "delete"]) => {
328                self.validate_file_path(args.get("path"))?;
329            }
330            tools::UNIFIED_FILE if unified_file_action_in(args, &["move", "copy"]) => {
331                self.validate_file_path(args.get("path"))?;
332                self.validate_file_path(args.get("destination"))?;
333            }
334            tools::UNIFIED_SEARCH if unified_search_action_is(args, "list") => {
335                self.validate_list_files_args(args)?;
336            }
337            _ => {}
338        }
339        Ok(())
340    }
341
342    /// Validate file path is within workspace boundaries.
343    ///
344    /// First checks for sensitive system paths via `validate_path_safety`,
345    /// then enforces workspace boundary constraints.
346    fn validate_file_path(&self, path: Option<&Value>) -> Result<()> {
347        let path_str = path
348            .and_then(|v| v.as_str())
349            .context("Missing or invalid 'path' argument")?;
350
351        // Check for sensitive system paths (e.g., /var/db/shadow, /etc/shadow)
352        validate_path_safety(path_str)?;
353
354        let path_obj = Path::new(path_str);
355
356        // Check for absolute paths
357        if path_obj.is_absolute() {
358            // Allow /tmp/vtcode paths
359            if path_str.starts_with("/tmp/vtcode") {
360                return Ok(());
361            }
362
363            // Check if within workspace
364            if let Some(workspace) = &self.workspace_dir
365                && (resolve_workspace_path(workspace, path_obj).is_ok()
366                    || is_within_workspace_lexically(workspace, path_obj))
367            {
368                return Ok(());
369            }
370
371            anyhow::bail!(
372                "Absolute path outside workspace boundary: {}. \
373                 Only paths within WORKSPACE_DIR or /tmp/vtcode are allowed.",
374                path_str
375            );
376        }
377
378        // Prevent parent directory traversal that could escape workspace
379        if path_str.contains("..") {
380            warn!("Path contains parent directory traversal: {}", path_str);
381
382            // Resolve the path and check if it stays within workspace
383            if let Some(workspace) = &self.workspace_dir {
384                let path_obj = Path::new(path_str);
385                let canonical_ok =
386                    resolve_workspace_path(workspace, &workspace.join(path_obj)).is_ok();
387                let lexical_ok = is_within_workspace_lexically(workspace, path_obj);
388                if !canonical_ok && !lexical_ok {
389                    anyhow::bail!("Path traversal escapes workspace boundary: {}", path_str);
390                }
391            } else {
392                anyhow::bail!(
393                    "Path traversal blocked: workspace boundary is unknown for '{}'",
394                    path_str
395                );
396            }
397        }
398
399        // If workspace directory is unknown, conservatively block writes to avoid escaping boundaries.
400        if self.workspace_dir.is_none() {
401            anyhow::bail!(
402                "Workspace directory is not set; refusing to write to relative path '{}'. \
403                 Set WORKSPACE_DIR or call set_workspace_dir().",
404                path_str
405            );
406        }
407
408        Ok(())
409    }
410
411    /// Validate shell command for safety
412    fn validate_command_text(&self, cmd: &str) -> Result<()> {
413        if self.is_destructive_command(cmd) {
414            anyhow::bail!(
415                "Destructive command requires explicit confirmation: {}",
416                cmd
417            );
418        }
419
420        Ok(())
421    }
422
423    /// Validate list_files arguments to prevent root listing loops
424    fn validate_list_files_args(&self, args: &Value) -> Result<()> {
425        let raw_path = args
426            .get("path")
427            .and_then(|v| v.as_str())
428            .unwrap_or_default()
429            .trim();
430
431        let targets_root_directory = !Path::new(raw_path)
432            .components()
433            .any(|component| !matches!(component, Component::CurDir | Component::RootDir));
434
435        if targets_root_directory {
436            anyhow::bail!(
437                "Error: autonomous directory listing must not target the root directory. Please specify a subdirectory like 'src/', 'vtcode-core/src/', or 'tests/'."
438            );
439        }
440
441        validate_non_root_listing_path(Some(raw_path))
442    }
443
444    /// Generate dry-run preview for verification
445    pub fn generate_preview(&self, tool_name: &str, args: &Value) -> String {
446        if tool_name == tools::WRITE_FILE
447            || (tool_name == tools::UNIFIED_FILE && unified_file_action_is(args, "write"))
448        {
449            let path = args
450                .get("path")
451                .and_then(|v| v.as_str())
452                .unwrap_or("unknown");
453            let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
454            let lines = content.lines().count();
455            let size_kb = content.len() / 1024;
456
457            let preview = if lines > 10 {
458                let first_lines: Vec<_> = content.lines().take(5).collect();
459                format!(
460                    "\n  {}\n  ... ({} more lines)",
461                    first_lines.join("\n  "),
462                    lines - 5
463                )
464            } else {
465                format!("\n  {}", content.lines().collect::<Vec<_>>().join("\n  "))
466            };
467
468            format!(
469                "Will write {} lines ({} KB) to: {}\nPreview:{}",
470                lines, size_kb, path, preview
471            )
472        } else if tool_name == tools::EDIT_FILE
473            || (tool_name == tools::UNIFIED_FILE && unified_file_action_is(args, "edit"))
474        {
475            let path = args
476                .get("path")
477                .and_then(|v| v.as_str())
478                .unwrap_or("unknown");
479            let old_str = args.get("old_str").and_then(|v| v.as_str()).unwrap_or("");
480            let new_str = args.get("new_str").and_then(|v| v.as_str()).unwrap_or("");
481
482            format!(
483                "Will edit file: {}\nReplacing:\n  {}\nWith:\n  {}",
484                path,
485                old_str.lines().take(3).collect::<Vec<_>>().join("\n  "),
486                new_str.lines().take(3).collect::<Vec<_>>().join("\n  ")
487            )
488        } else if Self::is_command_session_run(tool_name, args) {
489            let cmd = command_text(args)
490                .ok()
491                .flatten()
492                .unwrap_or_else(|| "unknown".to_string());
493            let is_destructive = self.is_destructive_command(&cmd);
494
495            let warning = if is_destructive {
496                "\n[WARN] WARNING: This command is potentially destructive!"
497            } else {
498                ""
499            };
500
501            format!("Will execute: {}{}", cmd, warning)
502        } else if tool_name == tools::APPLY_PATCH
503            || (tool_name == tools::UNIFIED_FILE && unified_file_action_is(args, "patch"))
504        {
505            let patch = decode_apply_patch_input(args)
506                .ok()
507                .flatten()
508                .map(|patch| patch.text)
509                .unwrap_or_default();
510            let lines = patch.lines().count();
511            format!("Will apply patch with {} lines of changes", lines)
512        } else {
513            format!("Will execute: {} with args: {:?}", tool_name, args)
514        }
515    }
516
517    fn requires_preview(&self, tool_name: &str, args: &Value) -> bool {
518        let canonical_tool_name = Self::canonical_tool_key(tool_name);
519        if self.verification_tools.contains(canonical_tool_name) {
520            return true;
521        }
522
523        match canonical_tool_name {
524            tools::UNIFIED_FILE => unified_file_action_in(args, &["write", "edit", "move", "copy"]),
525            tools::UNIFIED_EXEC => unified_exec_action_in(args, &["run", "code", "close"]),
526            _ => false,
527        }
528    }
529
530    /// Record execution result for statistics tracking and circuit breaker
531    pub fn record_execution(&self, tool_name: &str, success: bool) {
532        let tool_key = Self::canonical_tool_key(tool_name);
533
534        // Update circuit breaker
535        if success {
536            self.circuit_breaker.record_success_for_tool(tool_key);
537        } else {
538            // Note: We blindly treat all failures as circuit-breaking for now.
539            // Ideally, the caller should specify if it's an arg error or system error.
540            self.circuit_breaker
541                .record_failure_for_tool(tool_key, false);
542        }
543
544        if let Ok(mut stats) = self.execution_stats.write() {
545            let entry = stats.entry(tool_key.to_string()).or_default();
546            entry.total_attempts += 1;
547            if success {
548                entry.successful_executions += 1;
549            } else {
550                entry.failed_executions += 1;
551            }
552        }
553    }
554
555    /// Get success rate for a tool
556    pub fn get_success_rate(&self, tool_name: &str) -> f64 {
557        if let Ok(stats) = self.execution_stats.read() {
558            stats
559                .get(tool_name)
560                .map(|s| s.success_rate())
561                .unwrap_or(0.0)
562        } else {
563            0.0
564        }
565    }
566
567    /// Get execution statistics for a tool
568    pub fn get_tool_stats(&self, tool_name: &str) -> Option<(usize, usize, usize)> {
569        if let Ok(stats) = self.execution_stats.read() {
570            stats.get(tool_name).map(|s| {
571                (
572                    s.total_attempts,
573                    s.successful_executions,
574                    s.failed_executions,
575                )
576            })
577        } else {
578            None
579        }
580    }
581}
582
583impl Default for AutonomousExecutor {
584    fn default() -> Self {
585        Self::new()
586    }
587}
588
589impl AutonomousExecutor {
590    fn record_rate_history(&self, tool_name: &str) {
591        let now = Instant::now();
592        if let Ok(mut history) = self.rate_history.write() {
593            let entries = history
594                .entry(Self::canonical_tool_key(tool_name).to_string())
595                .or_default();
596            entries.push_back(now);
597            prune_expired_timestamps(entries, now, self.rate_limit_window);
598        }
599    }
600
601    fn is_rate_limited(&self, tool_name: &str) -> bool {
602        let tool_key = Self::canonical_tool_key(tool_name);
603        let now = Instant::now();
604
605        // First, try with a read lock to check without modifying
606        // This is the common fast path when there are no expired entries
607        if let Ok(history) = self.rate_history.read() {
608            if let Some(entries) = history.get(tool_key) {
609                // Quick check: if all entries are within window and at limit, we're rate limited
610                let oldest_within_window = entries
611                    .front()
612                    .is_some_and(|front| now.duration_since(*front) <= self.rate_limit_window);
613                if oldest_within_window {
614                    return entries.len() >= self.rate_limit_max_calls;
615                }
616            } else {
617                // No entries for this tool, definitely not rate limited
618                return false;
619            }
620        }
621
622        // Fall back to write lock only when we need to clean up expired entries
623        if let Ok(mut history) = self.rate_history.write() {
624            let entries = history.entry(tool_key.to_string()).or_default();
625            prune_expired_timestamps(entries, now, self.rate_limit_window);
626            return entries.len() >= self.rate_limit_max_calls;
627        }
628        false
629    }
630}
631
632fn prune_expired_timestamps(entries: &mut VecDeque<Instant>, now: Instant, window: Duration) {
633    while let Some(front) = entries.front() {
634        if now.duration_since(*front) > window {
635            entries.pop_front();
636        } else {
637            break;
638        }
639    }
640}
641
642fn is_within_workspace_lexically(workspace: &Path, candidate: &Path) -> bool {
643    let normalized_workspace = normalize_path(workspace);
644    let normalized_candidate = if candidate.is_absolute() {
645        normalize_path(candidate)
646    } else {
647        normalize_path(&normalized_workspace.join(candidate))
648    };
649    normalized_candidate.starts_with(&normalized_workspace)
650}
651
652#[cfg(test)]
653mod tests {
654    use super::*;
655    use serde_json::json;
656
657    #[test]
658    fn test_readonly_tools_auto_execute() {
659        let executor = AutonomousExecutor::new();
660
661        assert_eq!(
662            executor.get_policy(
663                tools::UNIFIED_SEARCH,
664                &json!({"action": "list", "path": "src"})
665            ),
666            AutonomousPolicy::AutoExecute
667        );
668        assert_eq!(
669            executor.get_policy(
670                tools::UNIFIED_FILE,
671                &json!({"action": "read", "path": "README.md"})
672            ),
673            AutonomousPolicy::AutoExecute
674        );
675        assert_eq!(
676            executor.get_policy(
677                tools::UNIFIED_EXEC,
678                &json!({"action": "poll", "session_id": "run-1"})
679            ),
680            AutonomousPolicy::AutoExecute
681        );
682        assert_eq!(
683            executor.get_policy(
684                tools::UNIFIED_EXEC,
685                &json!({"action": "continue", "session_id": "run-1"})
686            ),
687            AutonomousPolicy::AutoExecute
688        );
689    }
690
691    #[test]
692    fn test_destructive_commands_require_confirmation() {
693        let executor = AutonomousExecutor::new();
694
695        let destructive_cmds = vec![
696            "rm -rf /tmp/test",
697            "git reset --hard HEAD~1",
698            "git push --force origin main",
699            "git clean -fdx",
700            "chmod -R 777 /",
701        ];
702
703        for cmd in destructive_cmds {
704            let args = json!({"command": cmd});
705            let policy = executor.get_policy("shell", &args);
706            assert_eq!(
707                policy,
708                AutonomousPolicy::RequireConfirmation,
709                "unexpected policy for command: {cmd}"
710            );
711        }
712    }
713
714    #[test]
715    fn test_list_files_root_blocked() {
716        let executor = AutonomousExecutor::new();
717
718        let root_variations = vec![
719            json!({"action": "list", "path": "."}),
720            json!({"action": "list", "path": ""}),
721            json!({"action": "list", "path": "./"}),
722            json!({"action": "list"}),
723        ];
724
725        for args in root_variations {
726            let result = executor.validate_args(tools::UNIFIED_SEARCH, &args);
727            assert!(result.is_err());
728            assert!(result.unwrap_err().to_string().contains("root directory"));
729        }
730    }
731
732    #[test]
733    fn test_list_files_specific_path_allowed() {
734        let executor = AutonomousExecutor::new();
735
736        let args = json!({"action": "list", "path": "src/core/"});
737        let result = executor.validate_args(tools::UNIFIED_SEARCH, &args);
738        result.unwrap();
739    }
740
741    #[test]
742    fn test_verification_tools_need_preview() {
743        let executor = AutonomousExecutor::new();
744
745        for tool in VERIFICATION_REQUIRED_TOOLS {
746            let policy = executor.get_policy(tool, &json!({}));
747            assert_eq!(policy, AutonomousPolicy::VerifyThenExecute);
748        }
749    }
750
751    #[test]
752    fn test_unified_tools_use_action_specific_policies() {
753        let executor = AutonomousExecutor::new();
754
755        assert_eq!(
756            executor.get_policy(
757                tools::UNIFIED_FILE,
758                &json!({"action": "write", "path": "foo.txt", "content": "hello"})
759            ),
760            AutonomousPolicy::VerifyThenExecute
761        );
762        assert_eq!(
763            executor.get_policy(
764                tools::UNIFIED_FILE,
765                &json!({"action": "patch", "input": "*** Begin Patch\n*** End Patch"})
766            ),
767            AutonomousPolicy::RequireConfirmation
768        );
769        assert_eq!(
770            executor.get_policy(tools::UNIFIED_EXEC, &json!({"cmd": "echo hi"})),
771            AutonomousPolicy::VerifyThenExecute
772        );
773        assert_eq!(
774            executor.get_policy(
775                tools::UNIFIED_EXEC,
776                &json!({"action": "write", "session_id": "run-1", "input": "rm -rf /tmp/test"})
777            ),
778            AutonomousPolicy::RequireConfirmation
779        );
780    }
781
782    #[test]
783    fn test_exec_aliases_use_unified_exec_preview_policy() {
784        let executor = AutonomousExecutor::new();
785
786        assert_eq!(
787            executor.get_policy(tools::EXEC_COMMAND, &json!({"cmd": "echo hi"})),
788            AutonomousPolicy::VerifyThenExecute
789        );
790        assert_eq!(
791            executor.get_policy(tools::RUN_PTY_CMD, &json!({"command": "echo hi"})),
792            AutonomousPolicy::VerifyThenExecute
793        );
794    }
795
796    #[test]
797    fn test_loop_detection_integration() {
798        let executor = AutonomousExecutor::new();
799        let args = json!({"path": "src/"});
800
801        // First two calls should not block
802        assert!(
803            executor
804                .should_block(tools::UNIFIED_SEARCH, &args)
805                .is_none()
806        );
807        executor.record_tool_call(tools::UNIFIED_SEARCH, &args);
808
809        assert!(
810            executor
811                .should_block(tools::UNIFIED_SEARCH, &args)
812                .is_none()
813        );
814        executor.record_tool_call(tools::UNIFIED_SEARCH, &args);
815
816        // Third call should trigger warning
817        executor.record_tool_call(tools::UNIFIED_SEARCH, &args);
818        let block_msg = executor.should_block(tools::UNIFIED_SEARCH, &args);
819        assert!(block_msg.is_some());
820        let message = block_msg.unwrap();
821        assert!(
822            message.contains("alternative") || message.contains("blocked"),
823            "unexpected loop warning message: {message}"
824        );
825    }
826
827    #[test]
828    fn test_turn_reset_clears_loop_detection_state() {
829        let executor = AutonomousExecutor::new();
830        let args = json!({"path": "src/"});
831
832        executor.record_tool_call(tools::UNIFIED_SEARCH, &args);
833        executor.record_tool_call(tools::UNIFIED_SEARCH, &args);
834        executor.record_tool_call(tools::UNIFIED_SEARCH, &args);
835        assert!(
836            executor
837                .should_block(tools::UNIFIED_SEARCH, &args)
838                .is_some()
839        );
840
841        executor.reset_turn_loop_detection();
842        assert!(
843            executor
844                .should_block(tools::UNIFIED_SEARCH, &args)
845                .is_none()
846        );
847    }
848
849    #[test]
850    fn test_execution_stats_tracking() {
851        let executor = AutonomousExecutor::new();
852
853        // Record some executions
854        executor.record_execution(tools::UNIFIED_SEARCH, true);
855        executor.record_execution(tools::UNIFIED_SEARCH, true);
856        executor.record_execution(tools::UNIFIED_SEARCH, false);
857
858        // Check stats
859        let (total, success, failed) = executor.get_tool_stats(tools::UNIFIED_SEARCH).unwrap();
860        assert_eq!(total, 3);
861        assert_eq!(success, 2);
862        assert_eq!(failed, 1);
863
864        // Check success rate
865        let rate = executor.get_success_rate(tools::UNIFIED_SEARCH);
866        assert!((rate - 0.666).abs() < 0.01);
867    }
868
869    #[test]
870    fn test_workspace_boundary_validation() {
871        let mut executor = AutonomousExecutor::new();
872        let temp_dir = std::env::temp_dir();
873        executor.set_workspace_dir(temp_dir.clone());
874
875        // Absolute path outside workspace should fail
876        let args = json!({"path": "/etc/passwd"});
877        let result = executor.validate_args(tools::WRITE_FILE, &args);
878        assert!(result.is_err());
879        assert!(
880            result
881                .unwrap_err()
882                .to_string()
883                .contains("workspace boundary")
884        );
885
886        // /tmp/vtcode should be allowed
887        let args = json!({"path": "/tmp/vtcode/test.txt"});
888        let result = executor.validate_args(tools::WRITE_FILE, &args);
889        result.unwrap();
890    }
891
892    #[test]
893    fn test_unified_exec_validation_uses_command_aliases() {
894        let executor = AutonomousExecutor::new();
895
896        let err = executor
897            .validate_args(tools::UNIFIED_EXEC, &json!({"cmd": "rm -rf /tmp/test"}))
898            .expect_err("destructive command should fail");
899
900        assert!(err.to_string().contains("requires explicit confirmation"));
901    }
902
903    #[test]
904    fn test_unified_file_validation_checks_destinations() {
905        let mut executor = AutonomousExecutor::new();
906        executor.set_workspace_dir(PathBuf::from("/workspace"));
907
908        let err = executor
909            .validate_args(
910                tools::UNIFIED_FILE,
911                &json!({
912                    "action": "move",
913                    "path": "src/main.rs",
914                    "destination": "/etc/passwd"
915                }),
916            )
917            .expect_err("destination outside workspace should fail");
918
919        assert!(err.to_string().contains("workspace boundary"));
920    }
921
922    #[test]
923    fn test_enhanced_destructive_patterns() {
924        let executor = AutonomousExecutor::new();
925
926        let destructive_cmds = vec![
927            "rm -r somedir",
928            "git branch -D feature",
929            "npm uninstall -g package",
930            "cargo uninstall tool",
931        ];
932
933        for cmd in destructive_cmds {
934            assert!(executor.is_destructive_command(cmd));
935        }
936    }
937
938    #[test]
939    fn test_enhanced_preview_generation() {
940        let executor = AutonomousExecutor::new();
941
942        // Test write_file preview
943        let args = json!({
944            "path": "test.rs",
945            "content": "line1\nline2\nline3"
946        });
947        let preview = executor.generate_preview(tools::WRITE_FILE, &args);
948        assert!(preview.contains("3 lines"));
949        assert!(preview.contains("test.rs"));
950
951        // Test edit_file preview
952        let args = json!({
953            "path": "main.rs",
954            "old_str": "old code",
955            "new_str": "new code"
956        });
957        let preview = executor.generate_preview(tools::EDIT_FILE, &args);
958        assert!(preview.contains("main.rs"));
959        assert!(preview.contains("old code"));
960        assert!(preview.contains("new code"));
961
962        // Test destructive command preview
963        let args = json!({"command": "rm -rf /tmp/test"});
964        let preview = executor.generate_preview("shell", &args);
965        assert!(preview.contains("WARNING"));
966        assert!(preview.contains("destructive"));
967
968        let preview = executor.generate_preview(
969            tools::UNIFIED_EXEC,
970            &json!({"command.0": "git", "command.1": "status"}),
971        );
972        assert!(preview.contains("git status"));
973
974        let preview = executor.generate_preview(
975            tools::UNIFIED_FILE,
976            &json!({
977                "input": "*** Begin Patch\n*** Add File: note.txt\n+hello\n*** End Patch"
978            }),
979        );
980        assert!(preview.contains("apply patch"));
981    }
982
983    #[test]
984    fn test_parent_traversal_detection() {
985        let mut executor = AutonomousExecutor::new();
986        let workspace = PathBuf::from("/workspace");
987        executor.set_workspace_dir(workspace);
988
989        // Path with .. that stays in workspace should be allowed (with warning)
990        let args = json!({"path": "src/../lib/file.rs"});
991        let result = executor.validate_args(tools::WRITE_FILE, &args);
992        // This should succeed but log a warning
993        result.unwrap();
994    }
995}