Skip to main content

runtimo_core/capabilities/
git_exec.rs

1//! GitExec capability — git operations with state tracking and undo support.
2//!
3//! Provides git operations (clone, pull, commit, revert, clean, status) with:
4//! - State tracking (commit sha, branch, remote URL)
5//! - Backup-before-mutate for undo support
6//! - WAL logging for audit trail
7//! - Path traversal protection
8//! - Timeout enforcement on all git subprocesses
9//! - URL validation (HTTPS/SSH only, SSRF blocking)
10//! - Credential sanitization from output and stderr
11//! - Secret file detection for git add
12//! - Telemetry and process tracking before/after execution
13//!
14//! # Network capability
15//!
16//! **Git operations ARE inherently network-capable.** `git clone`, `git pull`,
17//! and `git fetch` make outbound connections to remote repositories.
18//! This is by design — denying network access would make GitExec useless.
19//!
20//! The network isolation is at the transport/protocol level:
21//! - Only HTTPS (`https://`) and SSH (`git@`) URLs are accepted
22//! - SSRF targets (metadata services, localhost, private ranges) are blocked
23//! - Credentials are sanitized from all output, stderr, and telemetry
24//!
25//! **Note on ShellExec interaction:** GitExec spawns `git` subprocesses which
26//! internally invoke `git-remote-https` (a git helper, NOT the system `curl`).
27//! The ShellExec network blocklist (`curl`, `wget`, etc.) does NOT affect
28//! GitExec — git uses its own transport layer. However, `RUNTIMO_ENABLE_NETWORK`
29//! does NOT gate GitExec; GitExec's network access is controlled by its own
30//! URL validation and SSRF blocking.
31//!
32//! # Example
33//!
34//! ```rust,ignore
35//! use runtimo_core::capabilities::GitExec;
36//! use runtimo_core::capability::{Capability, Context};
37//! use serde_json::json;
38//! use std::path::PathBuf;
39//!
40//! let cap = GitExec::new(PathBuf::from("/tmp/backups"));
41//! let result = cap.execute(
42//!     &json!({"operation": "clone", "url": "https://github.com/user/repo.git", "path": "/tmp/repo"}),
43//!     &Context { dry_run: false, job_id: "job1".into(), working_dir: PathBuf::from("/tmp") }
44//! ).unwrap();
45//!
46//! assert!(result.status == "ok");
47//! ```
48
49use crate::backup::BackupManager;
50use crate::capability::{CapabilityError, Context, Output, TypedCapability};
51use crate::processes::ProcessSnapshot;
52use crate::telemetry::Telemetry;
53use crate::validation::path::{validate_path, PathContext};
54use crate::{Error, Result};
55use serde::{Deserialize, Serialize};
56use serde_json::Value;
57use std::path::{Path, PathBuf};
58use std::process::Command;
59use std::time::{Duration, Instant};
60
61/// Arguments for the [`GitExec`] capability.
62#[derive(Debug, Clone, Serialize, Deserialize)]
63#[allow(clippy::exhaustive_structs)] // args struct — fields are the contract
64pub struct GitExecArgs {
65    /// Git operation to perform (clone, pull, commit, revert, clean, status).
66    pub operation: String,
67    /// Repository URL (for clone/pull).
68    pub url: Option<String>,
69    /// Local path to repository (for clone/commit/revert/clean/status).
70    pub path: Option<String>,
71    /// Branch name (for checkout/clone).
72    pub branch: Option<String>,
73    /// Commit message (for commit).
74    pub message: Option<String>,
75    /// Files to commit (for commit).
76    pub files: Option<Vec<String>>,
77    /// Commit SHA to revert to (for revert).
78    pub commit_sha: Option<String>,
79    /// Timeout in seconds (default: 300).
80    pub timeout_secs: Option<u64>,
81}
82
83/// Git state before/after operation.
84#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct GitState {
86    /// Current commit SHA (HEAD).
87    pub commit_sha: Option<String>,
88    /// Current branch name.
89    pub branch: Option<String>,
90    /// Remote URL (origin).
91    pub remote_url: Option<String>,
92    /// Repository path.
93    pub repo_path: String,
94    /// Working directory status (clean/dirty).
95    pub is_clean: bool,
96}
97
98/// Known secret file patterns to exclude from `git add -A`.
99const SECRET_PATTERNS: &[&str] = &[
100    ".env",
101    ".env.local",
102    ".env.production",
103    ".env.staging",
104    "credentials.json",
105    "credentials.yml",
106    "credentials.yaml",
107    "secrets.json",
108    "secrets.yml",
109    "secrets.yaml",
110    ".ssh/id_rsa",
111    ".ssh/id_ed25519",
112    ".ssh/id_dsa",
113    "id_rsa",
114    "id_ed25519",
115    "id_dsa",
116    ".npmrc",
117    ".pypirc",
118    ".docker/config.json",
119    "token",
120    "api_key",
121    "api_secret",
122    ".aws/credentials",
123    ".azure/credentials",
124    "keystore.jks",
125    "keystore.p12",
126];
127
128/// Maximum number of untracked files allowed for `git clean -fd`.
129const MAX_CLEAN_FILES: usize = 1000;
130
131/// Capability that executes git operations with full state tracking.
132///
133/// Supports clone, pull, commit, revert, clean, and status operations.
134/// Creates backups before mutable operations for undo support.
135pub struct GitExec {
136    backup_mgr: BackupManager,
137}
138
139impl GitExec {
140    /// Creates a new GitExec capability with the given backup directory.
141    ///
142    /// # Errors
143    ///
144    /// Returns [`crate::Error::BackupError`] if the backup
145    /// directory cannot be created.
146    pub fn new(backup_dir: PathBuf) -> Result<Self> {
147        Ok(Self {
148            backup_mgr: BackupManager::new(backup_dir)?,
149        })
150    }
151
152    /// Runs a git command with timeout enforcement and returns the output.
153    fn run_git_with_timeout(repo_path: &Path, args: &[&str], timeout_secs: u64) -> Result<String> {
154        let mut child = Command::new("git")
155            .current_dir(repo_path)
156            .args(args)
157            .stdin(std::process::Stdio::null())
158            .spawn()
159            .map_err(|e| Error::ExecutionFailed(format!("git command failed: {}", e)))?;
160
161        let timeout = Duration::from_secs(timeout_secs);
162        let start = Instant::now();
163
164        loop {
165            match child.try_wait() {
166                Ok(Some(status)) => {
167                    let output = child
168                        .wait_with_output()
169                        .map_err(|e| Error::ExecutionFailed(format!("git wait failed: {}", e)))?;
170                    if !status.success() {
171                        let stderr = String::from_utf8_lossy(&output.stderr);
172                        // Sanitize stderr for safe JSON embedding: escape control chars
173                        let sanitized_stderr = stderr
174                            .chars()
175                            .filter(|c| !c.is_control() || *c == '\n' || *c == '\r' || *c == '\t')
176                            .collect::<String>();
177                        return Err(Error::ExecutionFailed(format!(
178                            "git {}: {}",
179                            args.join(" "),
180                            sanitized_stderr.trim()
181                        )));
182                    }
183                    return Ok(String::from_utf8_lossy(&output.stdout).to_string());
184                }
185                Ok(None) => {
186                    if start.elapsed() > timeout {
187                        let _ = child.kill();
188                        let _ = child.wait();
189                        return Err(Error::ExecutionFailed(format!(
190                            "git {} timed out after {}s",
191                            args.join(" "),
192                            timeout_secs
193                        )));
194                    }
195                    std::thread::sleep(Duration::from_millis(50));
196                }
197                Err(e) => {
198                    let _ = child.kill();
199                    let _ = child.wait();
200                    return Err(Error::ExecutionFailed(format!("git wait error: {}", e)));
201                }
202            }
203        }
204    }
205
206    /// Checks if the working tree is clean (no uncommitted changes).
207    fn is_working_tree_clean(repo_path: &Path) -> bool {
208        let output = Command::new("git")
209            .current_dir(repo_path)
210            .args(["status", "--porcelain"])
211            .output();
212
213        match output {
214            Ok(out) => out.stdout.is_empty() && out.stderr.is_empty(),
215            Err(_) => false,
216        }
217    }
218
219    /// Validates a git URL format. Blocks http:// (MITM risk) and SSRF patterns.
220    fn validate_url(url: &str) -> Result<()> {
221        let is_https = url.starts_with("https://");
222        let is_ssh = url.starts_with("git@");
223        if !is_https && !is_ssh {
224            return Err(Error::SchemaValidationFailed(format!(
225                "Insecure or unsupported URL scheme: {} (must use https:// or git@ SSH)",
226                url
227            )));
228        }
229
230        if is_https {
231            if let Some(host_part) = url
232                .strip_prefix("https://")
233                .and_then(|s| s.split('/').next())
234            {
235                let host = host_part.split(':').next().unwrap_or(host_part);
236                if Self::is_ssrf_host(host) {
237                    return Err(Error::SchemaValidationFailed(format!(
238                        "SSRF blocked: URL targets internal/metadata address: {}",
239                        url
240                    )));
241                }
242            }
243        } else if is_ssh {
244            if let Some(host) = url.strip_prefix("git@").and_then(|s| s.split(':').next()) {
245                if Self::is_ssrf_host(host) {
246                    return Err(Error::SchemaValidationFailed(format!(
247                        "SSRF blocked: URL targets internal/metadata address: {}",
248                        url
249                    )));
250                }
251            }
252        }
253
254        Ok(())
255    }
256
257    /// Checks if a host is a known SSRF target (cloud metadata, localhost, link-local).
258    fn is_ssrf_host(host: &str) -> bool {
259        let lower = host.to_lowercase();
260        let ssrf_indicators = [
261            "169.254.169.254",
262            "169.254.",
263            "127.0.0.1",
264            "localhost",
265            "0.0.0.0",
266            "::1",
267            "10.0.0.",
268            "10.0.1.",
269            "10.0.2.",
270            "10.0.3.",
271            "172.16.",
272            "172.17.",
273            "172.18.",
274            "172.19.",
275            "172.20.",
276            "172.21.",
277            "172.22.",
278            "172.23.",
279            "172.24.",
280            "172.25.",
281            "172.26.",
282            "172.27.",
283            "172.28.",
284            "172.29.",
285            "172.30.",
286            "172.31.",
287            "192.168.",
288            "metadata.google",
289            "metadata.azure",
290            "instance-data",
291            "100.100.100.200",
292            "[::1]",
293            "[fe80:",
294        ];
295        ssrf_indicators
296            .iter()
297            .any(|indicator| lower.contains(indicator))
298    }
299
300    /// Validates a branch name against git's ref naming rules and option injection.
301    ///
302    /// Rejects: empty branches, `..` (range spec), `@{` (reflog), `--` prefix
303    /// (option injection), `refs/` patterns (ref injection), control characters,
304    /// whitespace, and shell/git metacharacters (`:`, `~`, `^`, `*`, `[`, `\\`,
305    /// `.lock`, `?`).
306    fn validate_branch_name(branch: &str) -> Result<()> {
307        if branch.is_empty() {
308            return Err(Error::SchemaValidationFailed("Branch name is empty".into()));
309        }
310        if branch.contains("..") || branch.contains("@{") {
311            return Err(Error::SchemaValidationFailed(format!(
312                "Invalid branch name: {}",
313                branch
314            )));
315        }
316        if branch.starts_with("--") {
317            return Err(Error::SchemaValidationFailed(format!(
318                "Branch name cannot start with '--': {}",
319                branch
320            )));
321        }
322        if branch.starts_with("refs/") || branch.contains("/refs/") {
323            return Err(Error::SchemaValidationFailed(format!(
324                "Ref injection detected in branch name: {}",
325                branch
326            )));
327        }
328        if branch.contains(|c: char| c.is_control() || c.is_whitespace()) {
329            return Err(Error::SchemaValidationFailed(format!(
330                "Branch name contains control or whitespace: {}",
331                branch
332            )));
333        }
334        if branch.contains([':', '~', '^', '*', '[', '\\', '?'])
335            || std::path::Path::new(branch)
336                .extension()
337                .is_some_and(|ext| ext.eq_ignore_ascii_case("lock"))
338        {
339            return Err(Error::SchemaValidationFailed(format!(
340                "Branch name contains invalid character: {}",
341                branch
342            )));
343        }
344        Ok(())
345    }
346
347    /// Validates a commit SHA.
348    fn validate_commit_sha(sha: &str) -> Result<()> {
349        if sha.len() < 7 || sha.len() > 40 {
350            return Err(Error::SchemaValidationFailed(format!(
351                "Invalid commit SHA length: {}",
352                sha
353            )));
354        }
355        if !sha.chars().all(|c| c.is_ascii_hexdigit()) {
356            return Err(Error::SchemaValidationFailed(format!(
357                "Invalid commit SHA: {}",
358                sha
359            )));
360        }
361        Ok(())
362    }
363
364    /// Sanitizes credentials from a URL string (redacts user:pass@).
365    /// Preserves SSH-style URLs (git@host:path) unchanged.
366    #[allow(clippy::arithmetic_side_effects)]
367    fn sanitize_url(url: &str) -> String {
368        if url.starts_with("git@") {
369            return url.to_string();
370        }
371        if let Some(at_pos) = url.find('@') {
372            if let Some(scheme_end) = url.find("://") {
373                let scheme = &url[..scheme_end + 3];
374                let after_at = &url[at_pos + 1..];
375                return format!("{}***@{}", scheme, after_at);
376            }
377            return format!("***@{}", &url[at_pos + 1..]);
378        }
379        url.to_string()
380    }
381
382    /// Sanitizes git output to remove credential leakage.
383    fn sanitize_output(output: &str) -> String {
384        let re_pattern = |line: &str| -> String {
385            let mut result = String::new();
386            let mut chars = line.chars().peekable();
387            while let Some(c) = chars.next() {
388                if c == ':' && chars.peek() == Some(&'/') && chars.clone().nth(1) == Some('/') {
389                    result.push_str("://");
390                    chars.next();
391                    chars.next();
392                    let mut user_pass = String::new();
393                    let mut found_at = false;
394                    for nc in chars.by_ref() {
395                        if nc == '@' {
396                            found_at = true;
397                            break;
398                        }
399                        user_pass.push(nc);
400                    }
401                    if found_at && !user_pass.is_empty() {
402                        result.push_str("***@");
403                    } else {
404                        result.push_str(&user_pass);
405                        if found_at {
406                            result.push('@');
407                        }
408                    }
409                } else {
410                    result.push(c);
411                }
412            }
413            result
414        };
415
416        output
417            .lines()
418            .map(re_pattern)
419            .collect::<Vec<_>>()
420            .join("\n")
421    }
422
423    /// Checks if a file path looks like a secret file that should not be committed.
424    fn is_secret_file(path: &str) -> bool {
425        let lower = path.to_lowercase();
426        SECRET_PATTERNS.iter().any(|pattern| {
427            lower == *pattern
428                || lower.ends_with(&format!("/{}", pattern))
429                || lower.contains(&format!("/{}/", pattern))
430        })
431    }
432
433    /// Validates a file path for git add (no traversal, no secrets).
434    fn validate_add_file(file: &str, repo_path: &Path) -> Result<()> {
435        if file.contains("..") {
436            return Err(Error::SchemaValidationFailed(format!(
437                "Path traversal in file path: {}",
438                file
439            )));
440        }
441        if Self::is_secret_file(file) {
442            return Err(Error::SchemaValidationFailed(format!(
443                "Secret file detected, refusing to add: {}",
444                file
445            )));
446        }
447        let full_path = repo_path.join(file);
448        if full_path.exists() {
449            let canonical = full_path.canonicalize().map_err(|e| {
450                Error::SchemaValidationFailed(format!("Cannot resolve file {}: {}", file, e))
451            })?;
452            let canonical_repo = repo_path.canonicalize().map_err(|e| {
453                Error::SchemaValidationFailed(format!("Cannot resolve repo: {}", e))
454            })?;
455            if !canonical.starts_with(&canonical_repo) {
456                return Err(Error::SchemaValidationFailed(format!(
457                    "File {} escapes repository boundary",
458                    file
459                )));
460            }
461        }
462        Ok(())
463    }
464
465    /// Checks available disk space (returns free bytes, or None if unknown).
466    fn disk_free_bytes(path: &Path) -> Option<u64> {
467        let output = Command::new("df")
468            .arg("--output=avail")
469            .arg("-B1")
470            .arg(path)
471            .output()
472            .ok()?;
473        if output.status.success() {
474            let stdout = String::from_utf8_lossy(&output.stdout);
475            stdout.lines().nth(1)?.trim().parse().ok()
476        } else {
477            None
478        }
479    }
480
481    /// Counts untracked files that would be removed by git clean -fd.
482    fn count_untracked_files(repo_path: &Path, timeout_secs: u64) -> Result<usize> {
483        let output = Self::run_git_with_timeout(
484            repo_path,
485            &["ls-files", "--others", "--exclude-standard"],
486            timeout_secs,
487        )?;
488        Ok(output.lines().filter(|l| !l.is_empty()).count())
489    }
490
491    /// Sanitizes a commit message (strips control chars, ensures non-empty).
492    fn sanitize_commit_message(msg: &str) -> Result<String> {
493        let sanitized: String = msg
494            .chars()
495            .filter(|c| !c.is_control() || *c == '\n' || *c == '\t')
496            .collect();
497        let trimmed = sanitized.trim();
498        if trimmed.is_empty() {
499            return Err(Error::SchemaValidationFailed(
500                "Commit message is empty after sanitization".into(),
501            ));
502        }
503        Ok(trimmed.to_string())
504    }
505
506    /// Creates a backup unconditionally before any mutating operation.
507    fn backup_before_mutation(&self, repo_path: &Path, job_id: &str) -> Result<PathBuf> {
508        self.backup_mgr.create_backup(repo_path, job_id)
509    }
510
511    /// Captures the current git state for a repository.
512    fn capture_state(repo_path: &Path, timeout_secs: u64) -> Result<GitState> {
513        let commit_sha =
514            Self::run_git_with_timeout(repo_path, &["rev-parse", "HEAD"], timeout_secs)
515                .map(|s| s.trim().to_string())
516                .ok();
517
518        let branch = Self::run_git_with_timeout(
519            repo_path,
520            &["rev-parse", "--abbrev-ref", "HEAD"],
521            timeout_secs,
522        )
523        .map(|s| s.trim().to_string())
524        .ok();
525
526        let remote_url =
527            Self::run_git_with_timeout(repo_path, &["remote", "get-url", "origin"], timeout_secs)
528                .ok()
529                .and_then(|s| {
530                    let trimmed = s.trim().to_string();
531                    let sanitized = Self::sanitize_url(&trimmed);
532                    if sanitized.is_empty() {
533                        None
534                    } else {
535                        Some(sanitized)
536                    }
537                });
538
539        let is_clean = Self::is_working_tree_clean(repo_path);
540
541        Ok(GitState {
542            commit_sha,
543            branch,
544            remote_url,
545            repo_path: repo_path.to_string_lossy().to_string(),
546            is_clean,
547        })
548    }
549
550    /// Executes git clone operation.
551    fn op_clone(&self, args: &GitExecArgs, ctx: &Context) -> Result<Output> {
552        let _ = self;
553        let timeout_secs = args.timeout_secs.unwrap_or(300);
554        let url = args
555            .url
556            .as_ref()
557            .ok_or_else(|| Error::ExecutionFailed("URL required for clone".into()))?;
558        let path = args
559            .path
560            .as_ref()
561            .ok_or_else(|| Error::ExecutionFailed("Path required for clone".into()))?;
562
563        Self::validate_url(url)?;
564
565        let path = Path::new(path);
566        if path.exists() {
567            return Err(Error::ExecutionFailed(format!(
568                "Path already exists: {}",
569                path.display()
570            )));
571        }
572
573        if let Some(free) = Self::disk_free_bytes(path.parent().unwrap_or_else(|| Path::new("/"))) {
574            if free < 100 * 1024 * 1024 {
575                return Err(Error::ExecutionFailed(
576                    "Insufficient disk space for clone (need at least 100MB)".into(),
577                ));
578            }
579        }
580
581        if ctx.dry_run {
582            let mut out = Output::ok(format!(
583                "DRY RUN: would clone {} to {}",
584                Self::sanitize_url(url),
585                path.display()
586            ));
587            out.data = Some(serde_json::json!({
588                "operation": "clone",
589                "url": Self::sanitize_url(url),
590                "path": path.display().to_string(),
591                "dry_run": true
592            }));
593            return Ok(out);
594        }
595
596        if let Some(parent) = path.parent() {
597            std::fs::create_dir_all(parent).map_err(|e| {
598                Error::ExecutionFailed(format!("mkdir {}: {}", parent.display(), e))
599            })?;
600        }
601
602        let mut cmd = Command::new("git");
603        cmd.arg("clone").arg(url).arg(path);
604
605        if let Some(branch) = &args.branch {
606            cmd.arg("-b").arg(branch);
607        }
608
609        let mut child = cmd
610            .stdin(std::process::Stdio::null())
611            .spawn()
612            .map_err(|e| Error::ExecutionFailed(format!("git clone spawn failed: {}", e)))?;
613
614        let timeout = Duration::from_secs(timeout_secs);
615        let start = Instant::now();
616        let status = loop {
617            match child.try_wait() {
618                Ok(Some(s)) => break s,
619                Ok(None) => {
620                    if start.elapsed() > timeout {
621                        let _ = child.kill();
622                        let _ = child.wait();
623                        return Err(Error::ExecutionFailed(format!(
624                            "git clone timed out after {}s",
625                            timeout_secs
626                        )));
627                    }
628                    std::thread::sleep(Duration::from_millis(100));
629                }
630                Err(e) => {
631                    let _ = child.kill();
632                    let _ = child.wait();
633                    return Err(Error::ExecutionFailed(format!(
634                        "git clone wait error: {}",
635                        e
636                    )));
637                }
638            }
639        };
640
641        if !status.success() {
642            return Err(Error::ExecutionFailed(
643                "git clone failed (see stderr)".into(),
644            ));
645        }
646
647        let state = Self::capture_state(path, timeout_secs)?;
648
649        let mut out = Output::ok(format!(
650            "Cloned {} to {}",
651            Self::sanitize_url(url),
652            path.display()
653        ));
654        out.data = Some(serde_json::json!({
655            "operation": "clone",
656            "url": Self::sanitize_url(url),
657            "path": path.display().to_string(),
658            "commit_sha": state.commit_sha,
659            "branch": state.branch,
660            "remote_url": state.remote_url
661        }));
662        Ok(out)
663    }
664
665    /// Executes git pull operation.
666    fn op_pull(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
667        let timeout_secs = args.timeout_secs.unwrap_or(300);
668
669        if !repo_path.exists() {
670            return Err(Error::ExecutionFailed(format!(
671                "Repository not found: {}",
672                repo_path.display()
673            )));
674        }
675
676        let state_before = Self::capture_state(repo_path, timeout_secs)?;
677
678        if ctx.dry_run {
679            let mut out = Output::ok("DRY RUN: would pull".into());
680            out.data = Some(serde_json::json!({
681                "operation": "pull",
682                "path": repo_path.display().to_string(),
683                "dry_run": true
684            }));
685            return Ok(out);
686        }
687
688        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
689
690        let output = Self::run_git_with_timeout(repo_path, &["pull", "--rebase"], timeout_secs)
691            .map_err(|e| Error::ExecutionFailed(format!("git pull failed: {}", e)))?;
692
693        let state_after = Self::capture_state(repo_path, timeout_secs)?;
694
695        let mut out = Output::ok("Pulled successfully".into());
696        out.data = Some(serde_json::json!({
697            "operation": "pull",
698            "path": repo_path.display().to_string(),
699            "commit_sha_before": state_before.commit_sha,
700            "commit_sha_after": state_after.commit_sha,
701            "branch": state_after.branch,
702            "backup_path": backup_path.map(|p| p.to_string_lossy().to_string()),
703            "git_output": Self::sanitize_output(&output)
704        }));
705        Ok(out)
706    }
707
708    /// Executes git commit operation.
709    fn op_commit(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
710        let timeout_secs = args.timeout_secs.unwrap_or(300);
711
712        if !repo_path.exists() {
713            return Err(Error::ExecutionFailed(format!(
714                "Repository not found: {}",
715                repo_path.display()
716            )));
717        }
718
719        let message = args
720            .message
721            .as_ref()
722            .ok_or_else(|| Error::ExecutionFailed("Commit message required".into()))?;
723        let message = Self::sanitize_commit_message(message)?;
724
725        let state_before = Self::capture_state(repo_path, timeout_secs)?;
726
727        if ctx.dry_run {
728            let mut out = Output::ok("DRY RUN: would commit".into());
729            out.data = Some(serde_json::json!({
730                "operation": "commit",
731                "path": repo_path.display().to_string(),
732                "message": &message,
733                "dry_run": true
734            }));
735            return Ok(out);
736        }
737
738        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
739
740        if let Some(files) = &args.files {
741            for file in files {
742                Self::validate_add_file(file, repo_path)?;
743                let output = Self::run_git_with_timeout(repo_path, &["add", file], timeout_secs)
744                    .map_err(|e| Error::ExecutionFailed(format!("git add failed: {}", e)))?;
745                let _ = output;
746            }
747        } else {
748            let untracked = Self::run_git_with_timeout(
749                repo_path,
750                &["ls-files", "--others", "--exclude-standard"],
751                timeout_secs,
752            )?;
753            for line in untracked.lines() {
754                let file = line.trim();
755                if file.is_empty() {
756                    continue;
757                }
758                if Self::is_secret_file(file) {
759                    eprintln!("[runtimo] Skipping secret file from git add: {}", file);
760                    continue;
761                }
762                Self::run_git_with_timeout(repo_path, &["add", file], timeout_secs).map_err(
763                    |e| Error::ExecutionFailed(format!("git add {} failed: {}", file, e)),
764                )?;
765            }
766        }
767
768        let output =
769            Self::run_git_with_timeout(repo_path, &["commit", "-m", &message], timeout_secs)
770                .map_err(|e| Error::ExecutionFailed(format!("git commit failed: {}", e)))?;
771        let _ = output;
772
773        let state_after = Self::capture_state(repo_path, timeout_secs)?;
774
775        let mut out = Output::ok(format!("Committed: {}", message));
776        out.data = Some(serde_json::json!({
777            "operation": "commit",
778            "path": repo_path.display().to_string(),
779            "message": message,
780            "commit_sha_before": state_before.commit_sha,
781            "commit_sha_after": state_after.commit_sha,
782            "branch": state_after.branch,
783            "backup_path": backup_path.map(|p| p.to_string_lossy().to_string())
784        }));
785        Ok(out)
786    }
787
788    /// Executes git revert operation.
789    fn op_revert(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
790        let timeout_secs = args.timeout_secs.unwrap_or(300);
791
792        if !repo_path.exists() {
793            return Err(Error::ExecutionFailed(format!(
794                "Repository not found: {}",
795                repo_path.display()
796            )));
797        }
798
799        let commit_sha = args
800            .commit_sha
801            .as_ref()
802            .ok_or_else(|| Error::ExecutionFailed("Commit SHA required for revert".into()))?;
803
804        Self::validate_commit_sha(commit_sha)?;
805
806        let state_before = Self::capture_state(repo_path, timeout_secs)?;
807
808        if ctx.dry_run {
809            let mut out = Output::ok(format!("DRY RUN: would revert {}", commit_sha));
810            out.data = Some(serde_json::json!({
811                "operation": "revert",
812                "path": repo_path.display().to_string(),
813                "commit_sha": commit_sha,
814                "dry_run": true
815            }));
816            return Ok(out);
817        }
818
819        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
820
821        let output = Self::run_git_with_timeout(
822            repo_path,
823            &["revert", "--no-edit", commit_sha],
824            timeout_secs,
825        )
826        .map_err(|e| Error::ExecutionFailed(format!("git revert failed: {}", e)))?;
827        let _ = output;
828
829        let state_after = Self::capture_state(repo_path, timeout_secs)?;
830
831        let mut out = Output::ok(format!("Reverted {}", commit_sha));
832        out.data = Some(serde_json::json!({
833            "operation": "revert",
834            "path": repo_path.display().to_string(),
835            "commit_sha": commit_sha,
836            "commit_sha_before": state_before.commit_sha,
837            "commit_sha_after": state_after.commit_sha,
838            "branch": state_after.branch,
839            "backup_path": backup_path.map(|p| p.to_string_lossy().to_string())
840        }));
841        Ok(out)
842    }
843
844    /// Executes git clean operation.
845    fn op_clean(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
846        let timeout_secs = args.timeout_secs.unwrap_or(300);
847
848        if !repo_path.exists() {
849            return Err(Error::ExecutionFailed(format!(
850                "Repository not found: {}",
851                repo_path.display()
852            )));
853        }
854
855        let state_before = Self::capture_state(repo_path, timeout_secs)?;
856
857        if ctx.dry_run {
858            let untracked_count = Self::count_untracked_files(repo_path, timeout_secs).unwrap_or(0);
859            let preview =
860                Self::run_git_with_timeout(repo_path, &["clean", "-fd", "--dry-run"], timeout_secs)
861                    .map(|s| Self::sanitize_output(&s))
862                    .unwrap_or_default();
863            let mut out = Output::ok(format!(
864                "DRY RUN: would clean {} untracked files",
865                untracked_count
866            ));
867            out.data = Some(serde_json::json!({
868                "operation": "clean",
869                "path": repo_path.display().to_string(),
870                "dry_run": true,
871                "untracked_count": untracked_count,
872                "preview": preview
873            }));
874            return Ok(out);
875        }
876
877        let untracked_count = Self::count_untracked_files(repo_path, timeout_secs)?;
878        if untracked_count > MAX_CLEAN_FILES {
879            return Err(Error::ExecutionFailed(format!(
880                "Too many untracked files to clean safely: {} (limit: {})",
881                untracked_count, MAX_CLEAN_FILES
882            )));
883        }
884
885        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
886
887        let output = Self::run_git_with_timeout(repo_path, &["clean", "-fd"], timeout_secs)
888            .map_err(|e| Error::ExecutionFailed(format!("git clean failed: {}", e)))?;
889        let _ = output;
890
891        let state_after = Self::capture_state(repo_path, timeout_secs)?;
892
893        let mut out = Output::ok(format!("Cleaned {} untracked files", untracked_count));
894        out.data = Some(serde_json::json!({
895            "operation": "clean",
896            "path": repo_path.display().to_string(),
897            "was_clean": state_before.is_clean,
898            "is_clean": state_after.is_clean,
899            "untracked_files_removed": untracked_count,
900            "backup_path": backup_path.map(|p| p.to_string_lossy().to_string())
901        }));
902        Ok(out)
903    }
904
905    /// Executes git status operation.
906    #[allow(clippy::unused_self, clippy::used_underscore_binding)]
907    fn op_status(&self, _args: &GitExecArgs, _ctx: &Context, repo_path: &Path) -> Result<Output> {
908        let timeout_secs = _args.timeout_secs.unwrap_or(300);
909
910        if !repo_path.exists() {
911            return Err(Error::ExecutionFailed(format!(
912                "Repository not found: {}",
913                repo_path.display()
914            )));
915        }
916
917        let state = Self::capture_state(repo_path, timeout_secs)?;
918
919        let status_output =
920            Self::run_git_with_timeout(repo_path, &["status", "--porcelain"], timeout_secs)
921                .unwrap_or_default();
922
923        let branch = state.branch.clone().unwrap_or_default();
924        let remote_url = state.remote_url.clone().unwrap_or_default();
925
926        let mut out = Output::ok(format!(
927            "On branch {}: {}",
928            branch,
929            if state.is_clean { "clean" } else { "dirty" }
930        ));
931        out.data = Some(serde_json::json!({
932            "operation": "status",
933            "path": repo_path.display().to_string(),
934            "branch": branch,
935            "remote_url": remote_url,
936            "commit_sha": state.commit_sha,
937            "is_clean": state.is_clean,
938            "status": status_output
939        }));
940        Ok(out)
941    }
942}
943
944impl TypedCapability for GitExec {
945    type Args = GitExecArgs;
946
947    fn name(&self) -> &'static str {
948        "GitExec"
949    }
950
951    fn description(&self) -> &'static str {
952        "git operations: clone, pull, commit, revert, clean, status. state tracking (sha, branch, remote), SSRF-blocked URLs, secret detection, timeout, undo via backup."
953    }
954
955    fn schema(&self) -> Value {
956        serde_json::json!({
957            "type": "object",
958            "properties": {
959                "operation": { "type": "string", "enum": ["clone", "pull", "commit", "revert", "clean", "status"] },
960                "url": { "type": "string" },
961                "path": { "type": "string" },
962                "branch": { "type": "string" },
963                "message": { "type": "string" },
964                "files": { "type": "array", "items": { "type": "string" } },
965                "commit_sha": { "type": "string" },
966                "timeout_secs": { "type": "integer", "minimum": 1, "maximum": 600 }
967            },
968            "required": ["operation"]
969        })
970    }
971
972    fn execute(
973        &self,
974        args: GitExecArgs,
975        ctx: &Context,
976    ) -> std::result::Result<Output, CapabilityError> {
977        let valid_ops = ["clone", "pull", "commit", "revert", "clean", "status"];
978        if !valid_ops.contains(&args.operation.as_str()) {
979            return Err(CapabilityError::InvalidArgs(format!(
980                "Invalid operation: {}. Must be one of: {}",
981                args.operation,
982                valid_ops.join(", ")
983            )));
984        }
985
986        if args.operation == "clone" {
987            if let Some(url) = &args.url {
988                Self::validate_url(url)
989                    .map_err(|e| CapabilityError::PermissionDenied(e.to_string()))?;
990            } else {
991                return Err(CapabilityError::InvalidArgs(
992                    "URL required for clone".into(),
993                ));
994            }
995            if let Some(path) = &args.path {
996                let ctx = PathContext {
997                    require_exists: false,
998                    require_file: false,
999                    ..Default::default()
1000                };
1001                validate_path(path, &ctx).map_err(CapabilityError::PermissionDenied)?;
1002            }
1003        }
1004
1005        if args.operation != "clone" {
1006            if let Some(path) = &args.path {
1007                let ctx = PathContext {
1008                    require_exists: true,
1009                    require_file: false,
1010                    ..Default::default()
1011                };
1012                validate_path(path, &ctx).map_err(CapabilityError::PermissionDenied)?;
1013            }
1014        }
1015
1016        if let Some(branch) = &args.branch {
1017            Self::validate_branch_name(branch)
1018                .map_err(|e| CapabilityError::InvalidArgs(e.to_string()))?;
1019        }
1020
1021        if let Some(sha) = &args.commit_sha {
1022            Self::validate_commit_sha(sha)
1023                .map_err(|e| CapabilityError::InvalidArgs(e.to_string()))?;
1024        }
1025
1026        let telemetry_before = Telemetry::capture();
1027        let process_before = ProcessSnapshot::capture();
1028
1029        let result = match args.operation.as_str() {
1030            "clone" => self.op_clone(&args, ctx),
1031            "pull" => {
1032                let path = args
1033                    .path
1034                    .as_ref()
1035                    .ok_or_else(|| CapabilityError::InvalidArgs("Path required for pull".into()))?;
1036                self.op_pull(&args, ctx, Path::new(path))
1037            }
1038            "commit" => {
1039                let path = args.path.as_ref().ok_or_else(|| {
1040                    CapabilityError::InvalidArgs("Path required for commit".into())
1041                })?;
1042                self.op_commit(&args, ctx, Path::new(path))
1043            }
1044            "revert" => {
1045                let path = args.path.as_ref().ok_or_else(|| {
1046                    CapabilityError::InvalidArgs("Path required for revert".into())
1047                })?;
1048                self.op_revert(&args, ctx, Path::new(path))
1049            }
1050            "clean" => {
1051                let path = args.path.as_ref().ok_or_else(|| {
1052                    CapabilityError::InvalidArgs("Path required for clean".into())
1053                })?;
1054                self.op_clean(&args, ctx, Path::new(path))
1055            }
1056            "status" => {
1057                let path = args.path.as_ref().ok_or_else(|| {
1058                    CapabilityError::InvalidArgs("Path required for status".into())
1059                })?;
1060                self.op_status(&args, ctx, Path::new(path))
1061            }
1062            _ => Err(Error::ExecutionFailed(format!(
1063                "Unknown operation: {}",
1064                args.operation
1065            ))),
1066        };
1067
1068        let telemetry_after = Telemetry::capture();
1069        let process_after = ProcessSnapshot::capture();
1070
1071        let mut output = result.map_err(|e| CapabilityError::Internal(e.to_string()))?;
1072        if let Some(obj) = output.data.as_mut().and_then(|d| d.as_object_mut()) {
1073            obj.insert(
1074                "telemetry_before".to_string(),
1075                serde_json::to_value(&telemetry_before).unwrap_or(Value::Null),
1076            );
1077            obj.insert(
1078                "telemetry_after".to_string(),
1079                serde_json::to_value(&telemetry_after).unwrap_or(Value::Null),
1080            );
1081            obj.insert(
1082                "process_before".to_string(),
1083                serde_json::to_value(&process_before.summary).unwrap_or(Value::Null),
1084            );
1085            obj.insert(
1086                "process_after".to_string(),
1087                serde_json::to_value(&process_after.summary).unwrap_or(Value::Null),
1088            );
1089        }
1090
1091        Ok(output)
1092    }
1093}
1094
1095#[cfg(test)]
1096mod tests {
1097    use super::*;
1098    use crate::capability::Capability;
1099
1100    fn test_backup_dir() -> PathBuf {
1101        std::env::temp_dir().join("runtimo_git_test")
1102    }
1103
1104    #[test]
1105    fn validates_git_url_https_only() {
1106        assert!(GitExec::validate_url("https://github.com/user/repo.git").is_ok());
1107        assert!(GitExec::validate_url("git@github.com:user/repo.git").is_ok());
1108
1109        assert!(GitExec::validate_url("http://example.com/repo.git").is_err());
1110        assert!(GitExec::validate_url("not-a-url").is_err());
1111        assert!(GitExec::validate_url("").is_err());
1112
1113        std::fs::remove_dir_all(test_backup_dir()).ok();
1114    }
1115
1116    #[test]
1117    fn blocks_ssrf_urls() {
1118        assert!(GitExec::validate_url("https://169.254.169.254/latest/meta-data/").is_err());
1119        assert!(GitExec::validate_url("https://127.0.0.1/repo.git").is_err());
1120        assert!(GitExec::validate_url("https://localhost/repo.git").is_err());
1121        assert!(GitExec::validate_url("https://192.168.1.1/repo.git").is_err());
1122        assert!(GitExec::validate_url("https://metadata.google.internal/computeMetadata").is_err());
1123
1124        std::fs::remove_dir_all(test_backup_dir()).ok();
1125    }
1126
1127    #[test]
1128    fn sanitizes_credentials_from_url() {
1129        assert_eq!(
1130            GitExec::sanitize_url("https://user:pass@github.com/repo.git"),
1131            "https://***@github.com/repo.git"
1132        );
1133        assert_eq!(
1134            GitExec::sanitize_url("https://github.com/repo.git"),
1135            "https://github.com/repo.git"
1136        );
1137        assert_eq!(
1138            GitExec::sanitize_url("git@github.com:user/repo.git"),
1139            "git@github.com:user/repo.git"
1140        );
1141    }
1142
1143    #[test]
1144    fn detects_secret_files() {
1145        assert!(GitExec::is_secret_file(".env"));
1146        assert!(GitExec::is_secret_file("config/.env"));
1147        assert!(GitExec::is_secret_file("credentials.json"));
1148        assert!(GitExec::is_secret_file(".ssh/id_rsa"));
1149        assert!(GitExec::is_secret_file("src/.env.local"));
1150
1151        assert!(!GitExec::is_secret_file("main.rs"));
1152        assert!(!GitExec::is_secret_file("Cargo.toml"));
1153        assert!(!GitExec::is_secret_file("README.md"));
1154    }
1155
1156    #[test]
1157    fn validates_branch_name() {
1158        assert!(GitExec::validate_branch_name("main").is_ok());
1159        assert!(GitExec::validate_branch_name("feature/my-branch").is_ok());
1160        assert!(GitExec::validate_branch_name("v1.0").is_ok());
1161
1162        assert!(GitExec::validate_branch_name("").is_err());
1163        assert!(GitExec::validate_branch_name("bad..name").is_err());
1164        assert!(GitExec::validate_branch_name("@{..}").is_err());
1165        // Option injection
1166        assert!(GitExec::validate_branch_name("--force").is_err());
1167        assert!(GitExec::validate_branch_name("--help").is_err());
1168        // Ref injection
1169        assert!(GitExec::validate_branch_name("refs/heads/main").is_err());
1170        // Control chars and whitespace
1171        assert!(GitExec::validate_branch_name("bad\nname").is_err());
1172        assert!(GitExec::validate_branch_name("bad\tname").is_err());
1173        // Metacharacters
1174        assert!(GitExec::validate_branch_name("bad:name").is_err());
1175        assert!(GitExec::validate_branch_name("bad~name").is_err());
1176        assert!(GitExec::validate_branch_name("bad^name").is_err());
1177        assert!(GitExec::validate_branch_name("bad*name").is_err());
1178        assert!(GitExec::validate_branch_name("bad[name").is_err());
1179        assert!(GitExec::validate_branch_name("bad\\name").is_err());
1180        assert!(GitExec::validate_branch_name("bad?name").is_err());
1181        assert!(GitExec::validate_branch_name("name.lock").is_err());
1182    }
1183
1184    #[test]
1185    fn validates_commit_sha() {
1186        assert!(GitExec::validate_commit_sha("abc1234").is_ok());
1187        assert!(GitExec::validate_commit_sha("a1b2c3d4").is_ok());
1188        assert!(GitExec::validate_commit_sha("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0").is_ok());
1189
1190        assert!(GitExec::validate_commit_sha("abc123").is_err());
1191        assert!(GitExec::validate_commit_sha("").is_err());
1192        assert!(GitExec::validate_commit_sha("xyz123").is_err());
1193    }
1194
1195    #[allow(clippy::expect_used)]
1196    #[test]
1197    fn rejects_path_traversal() {
1198        let cap = GitExec::new(test_backup_dir()).expect("Failed to create GitExec");
1199
1200        let result = Capability::execute(
1201            &cap,
1202            &serde_json::json!({
1203                "operation": "clone",
1204                "url": "https://github.com/user/repo.git",
1205                "path": "../../../etc/passwd"
1206            }),
1207            &Context {
1208                dry_run: false,
1209                job_id: "test".into(),
1210                working_dir: std::env::temp_dir(),
1211            },
1212        );
1213
1214        assert!(result.is_err() || !result.unwrap().status.is_empty());
1215        // The blanket impl's validate always returns Ok, so path traversal
1216        // is caught at execute time, not validate time.
1217        std::fs::remove_dir_all(test_backup_dir()).ok();
1218    }
1219
1220    #[allow(clippy::expect_used)]
1221    #[test]
1222    fn rejects_invalid_operation() {
1223        let cap = GitExec::new(test_backup_dir()).expect("Failed to create GitExec");
1224
1225        let result = Capability::execute(
1226            &cap,
1227            &serde_json::json!({
1228                "operation": "invalid_op"
1229            }),
1230            &Context {
1231                dry_run: false,
1232                job_id: "test".into(),
1233                working_dir: std::env::temp_dir(),
1234            },
1235        );
1236
1237        assert!(result.is_err());
1238        std::fs::remove_dir_all(test_backup_dir()).ok();
1239    }
1240
1241    #[test]
1242    #[allow(clippy::expect_used)]
1243    fn status_on_nonexistent_repo() {
1244        let cap = GitExec::new(test_backup_dir()).expect("Failed to create GitExec");
1245
1246        let result = Capability::execute(
1247            &cap,
1248            &serde_json::json!({
1249                "operation": "status",
1250                "path": "/tmp/nonexistent_repo"
1251            }),
1252            &Context {
1253                dry_run: false,
1254                job_id: "test".into(),
1255                working_dir: std::env::temp_dir(),
1256            },
1257        );
1258
1259        assert!(result.is_err());
1260        std::fs::remove_dir_all(test_backup_dir()).ok();
1261    }
1262
1263    #[test]
1264    fn sanitizes_commit_message() {
1265        assert!(GitExec::sanitize_commit_message("valid commit").is_ok());
1266        assert!(GitExec::sanitize_commit_message("  trimmed  ").is_ok());
1267        assert!(GitExec::sanitize_commit_message("").is_err());
1268        assert!(GitExec::sanitize_commit_message("   ").is_err());
1269        let result = GitExec::sanitize_commit_message("hello\x00world").unwrap();
1270        assert!(!result.contains('\x00'));
1271    }
1272
1273    #[test]
1274    fn timeout_enforced_on_git_command() {
1275        // Start a TCP listener on localhost that accepts but never responds.
1276        // This creates a guaranteed-timeout scenario without depending on
1277        // external network behavior. The listener is dropped after the test.
1278        let listener =
1279            std::net::TcpListener::bind("127.0.0.1:0").expect("failed to bind TCP listener");
1280        let port = listener.local_addr().unwrap().port();
1281
1282        let tmp = std::env::temp_dir().join("runtimo_git_timeout_test");
1283        std::fs::create_dir_all(&tmp).ok();
1284        Command::new("git")
1285            .arg("init")
1286            .current_dir(&tmp)
1287            .output()
1288            .ok();
1289
1290        // Spawn a thread that accepts one connection and hangs.
1291        // The git clone will connect and wait for a response that never comes.
1292        let _hang_handle = std::thread::spawn(move || {
1293            if let Ok((_stream, _addr)) = listener.accept() {
1294                // Hold the connection open indefinitely — never send a response
1295                std::thread::sleep(std::time::Duration::from_mins(5));
1296            }
1297        });
1298
1299        // git clone to localhost times out after 2 seconds.
1300        let result = GitExec::run_git_with_timeout(
1301            &tmp,
1302            &["clone", &format!("http://127.0.0.1:{}/repo.git", port)],
1303            2,
1304        );
1305
1306        // The operation should fail with a timeout (or a connection error
1307        // if git detects the protocol mismatch before the timeout fires).
1308        assert!(
1309            result.is_err(),
1310            "Expected timeout or connection error, got: {:?}",
1311            result
1312        );
1313
1314        std::fs::remove_dir_all(&tmp).ok();
1315    }
1316}