Skip to main content

runtimo_core/capabilities/
git_exec.rs

1//! GitExec capability — git operations with state tracking and undo support.
2//!
3//! Provides git operations (clone, pull, commit, revert, clean, status) with:
4//! - State tracking (commit sha, branch, remote URL)
5//! - Backup-before-mutate for undo support
6//! - WAL logging for audit trail
7//! - Path traversal protection
8//! - Timeout enforcement on all git subprocesses
9//! - URL validation (HTTPS/SSH only, SSRF blocking)
10//! - Credential sanitization from output and stderr
11//! - Secret file detection for git add
12//! - Telemetry and process tracking before/after execution
13//!
14//! # Network capability
15//!
16//! **Git operations ARE inherently network-capable.** `git clone`, `git pull`,
17//! and `git fetch` make outbound connections to remote repositories.
18//! This is by design — denying network access would make GitExec useless.
19//!
20//! The network isolation is at the transport/protocol level:
21//! - Only HTTPS (`https://`) and SSH (`git@`) URLs are accepted
22//! - SSRF targets (metadata services, localhost, private ranges) are blocked
23//! - Credentials are sanitized from all output, stderr, and telemetry
24//!
25//! **Note on ShellExec interaction:** GitExec spawns `git` subprocesses which
26//! internally invoke `git-remote-https` (a git helper, NOT the system `curl`).
27//! The ShellExec network blocklist (`curl`, `wget`, etc.) does NOT affect
28//! GitExec — git uses its own transport layer. However, `RUNTIMO_ENABLE_NETWORK`
29//! does NOT gate GitExec; GitExec's network access is controlled by its own
30//! URL validation and SSRF blocking.
31//!
32//! # Example
33//!
34//! ```rust,ignore
35//! use runtimo_core::capabilities::GitExec;
36//! use runtimo_core::capability::{Capability, Context};
37//! use serde_json::json;
38//! use std::path::PathBuf;
39//!
40//! let cap = GitExec::new(PathBuf::from("/tmp/backups"));
41//! let result = cap.execute(
42//!     &json!({"operation": "clone", "url": "https://github.com/user/repo.git", "path": "/tmp/repo"}),
43//!     &Context { dry_run: false, job_id: "job1".into(), working_dir: PathBuf::from("/tmp") }
44//! ).unwrap();
45//!
46//! assert!(result.status == "ok");
47//! ```
48
49use crate::backup::BackupManager;
50use crate::capability::{CapabilityError, Context, Output, TypedCapability};
51use crate::processes::ProcessSnapshot;
52use crate::telemetry::Telemetry;
53use crate::validation::path::{validate_path, PathContext};
54use crate::{Error, Result};
55use serde::{Deserialize, Serialize};
56use serde_json::Value;
57use std::path::{Path, PathBuf};
58use std::process::Command;
59use std::time::{Duration, Instant};
60
61/// Arguments for the [`GitExec`] capability.
62#[derive(Debug, Clone, Serialize, Deserialize)]
63#[allow(clippy::exhaustive_structs)] // args struct — fields are the contract
64pub struct GitExecArgs {
65    /// Git operation to perform (clone, pull, commit, revert, clean, status).
66    pub operation: String,
67    /// Repository URL (for clone/pull).
68    pub url: Option<String>,
69    /// Local path to repository (for clone/commit/revert/clean/status).
70    pub path: Option<String>,
71    /// Branch name (for checkout/clone).
72    pub branch: Option<String>,
73    /// Commit message (for commit).
74    pub message: Option<String>,
75    /// Files to commit (for commit).
76    pub files: Option<Vec<String>>,
77    /// Commit SHA to revert to (for revert).
78    pub commit_sha: Option<String>,
79    /// Timeout in seconds (default: 300).
80    pub timeout_secs: Option<u64>,
81}
82
83/// Git state before/after operation.
84#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct GitState {
86    /// Current commit SHA (HEAD).
87    pub commit_sha: Option<String>,
88    /// Current branch name.
89    pub branch: Option<String>,
90    /// Remote URL (origin).
91    pub remote_url: Option<String>,
92    /// Repository path.
93    pub repo_path: String,
94    /// Working directory status (clean/dirty).
95    pub is_clean: bool,
96}
97
98/// Known secret file patterns to exclude from `git add -A`.
99const SECRET_PATTERNS: &[&str] = &[
100    ".env",
101    ".env.local",
102    ".env.production",
103    ".env.staging",
104    "credentials.json",
105    "credentials.yml",
106    "credentials.yaml",
107    "secrets.json",
108    "secrets.yml",
109    "secrets.yaml",
110    ".ssh/id_rsa",
111    ".ssh/id_ed25519",
112    ".ssh/id_dsa",
113    "id_rsa",
114    "id_ed25519",
115    "id_dsa",
116    ".npmrc",
117    ".pypirc",
118    ".docker/config.json",
119    "token",
120    "api_key",
121    "api_secret",
122    ".aws/credentials",
123    ".azure/credentials",
124    "keystore.jks",
125    "keystore.p12",
126];
127
128/// Maximum number of untracked files allowed for `git clean -fd`.
129const MAX_CLEAN_FILES: usize = 1000;
130
131/// Capability that executes git operations with full state tracking.
132///
133/// Supports clone, pull, commit, revert, clean, and status operations.
134/// Creates backups before mutable operations for undo support.
135pub struct GitExec {
136    backup_mgr: BackupManager,
137}
138
139impl GitExec {
140    /// Creates a new GitExec capability with the given backup directory.
141    ///
142    /// # Errors
143    ///
144    /// Returns [`crate::Error::BackupError`] if the backup
145    /// directory cannot be created.
146    pub fn new(backup_dir: PathBuf) -> Result<Self> {
147        Ok(Self {
148            backup_mgr: BackupManager::new(backup_dir)?,
149        })
150    }
151
152    /// Runs a git command with timeout enforcement and returns the output.
153    fn run_git_with_timeout(repo_path: &Path, args: &[&str], timeout_secs: u64) -> Result<String> {
154        let mut child = Command::new("git")
155            .current_dir(repo_path)
156            .args(args)
157            .stdin(std::process::Stdio::null())
158            .spawn()
159            .map_err(|e| Error::ExecutionFailed(format!("git command failed: {}", e)))?;
160
161        let timeout = Duration::from_secs(timeout_secs);
162        let start = Instant::now();
163
164        loop {
165            match child.try_wait() {
166                Ok(Some(status)) => {
167                    let output = child
168                        .wait_with_output()
169                        .map_err(|e| Error::ExecutionFailed(format!("git wait failed: {}", e)))?;
170                    if !status.success() {
171                        let stderr = String::from_utf8_lossy(&output.stderr);
172                        // Sanitize stderr for safe JSON embedding: escape control chars
173                        let sanitized_stderr = stderr
174                            .chars()
175                            .filter(|c| !c.is_control() || *c == '\n' || *c == '\r' || *c == '\t')
176                            .collect::<String>();
177                        return Err(Error::ExecutionFailed(format!(
178                            "git {}: {}",
179                            args.join(" "),
180                            sanitized_stderr.trim()
181                        )));
182                    }
183                    return Ok(String::from_utf8_lossy(&output.stdout).to_string());
184                }
185                Ok(None) => {
186                    if start.elapsed() > timeout {
187                        let _ = child.kill();
188                        let _ = child.wait();
189                        return Err(Error::ExecutionFailed(format!(
190                            "git {} timed out after {}s",
191                            args.join(" "),
192                            timeout_secs
193                        )));
194                    }
195                    std::thread::sleep(Duration::from_millis(50));
196                }
197                Err(e) => {
198                    let _ = child.kill();
199                    let _ = child.wait();
200                    return Err(Error::ExecutionFailed(format!("git wait error: {}", e)));
201                }
202            }
203        }
204    }
205
206    /// Checks if the working tree is clean (no uncommitted changes).
207    fn is_working_tree_clean(repo_path: &Path) -> bool {
208        let output = Command::new("git")
209            .current_dir(repo_path)
210            .args(["status", "--porcelain"])
211            .output();
212
213        match output {
214            Ok(out) => out.stdout.is_empty() && out.stderr.is_empty(),
215            Err(_) => false,
216        }
217    }
218
219    /// Validates a git URL format. Blocks http:// (MITM risk) and SSRF patterns.
220    fn validate_url(url: &str) -> Result<()> {
221        let is_https = url.starts_with("https://");
222        let is_ssh = url.starts_with("git@");
223        if !is_https && !is_ssh {
224            return Err(Error::SchemaValidationFailed(format!(
225                "Insecure or unsupported URL scheme: {} (must use https:// or git@ SSH)",
226                url
227            )));
228        }
229
230        if is_https {
231            if let Some(host_part) = url
232                .strip_prefix("https://")
233                .and_then(|s| s.split('/').next())
234            {
235                let host = host_part.split(':').next().unwrap_or(host_part);
236                if Self::is_ssrf_host(host) {
237                    return Err(Error::SchemaValidationFailed(format!(
238                        "SSRF blocked: URL targets internal/metadata address: {}",
239                        url
240                    )));
241                }
242            }
243        }
244
245        Ok(())
246    }
247
248    /// Checks if a host is a known SSRF target (cloud metadata, localhost, link-local).
249    fn is_ssrf_host(host: &str) -> bool {
250        let lower = host.to_lowercase();
251        let ssrf_indicators = [
252            "169.254.169.254",
253            "169.254.",
254            "127.0.0.1",
255            "localhost",
256            "0.0.0.0",
257            "::1",
258            "10.0.0.",
259            "10.0.1.",
260            "10.0.2.",
261            "10.0.3.",
262            "172.16.",
263            "172.17.",
264            "172.18.",
265            "172.19.",
266            "172.20.",
267            "172.21.",
268            "172.22.",
269            "172.23.",
270            "172.24.",
271            "172.25.",
272            "172.26.",
273            "172.27.",
274            "172.28.",
275            "172.29.",
276            "172.30.",
277            "172.31.",
278            "192.168.",
279            "metadata.google",
280            "metadata.azure",
281            "instance-data",
282            "100.100.100.200",
283            "[::1]",
284            "[fe80:",
285        ];
286        ssrf_indicators
287            .iter()
288            .any(|indicator| lower.contains(indicator))
289    }
290
291    /// Validates a branch name against git's ref naming rules and option injection.
292    ///
293    /// Rejects: empty branches, `..` (range spec), `@{` (reflog), `--` prefix
294    /// (option injection), `refs/` patterns (ref injection), control characters,
295    /// whitespace, and shell/git metacharacters (`:`, `~`, `^`, `*`, `[`, `\\`,
296    /// `.lock`, `?`).
297    fn validate_branch_name(branch: &str) -> Result<()> {
298        if branch.is_empty() {
299            return Err(Error::SchemaValidationFailed("Branch name is empty".into()));
300        }
301        if branch.contains("..") || branch.contains("@{") {
302            return Err(Error::SchemaValidationFailed(format!(
303                "Invalid branch name: {}",
304                branch
305            )));
306        }
307        if branch.starts_with("--") {
308            return Err(Error::SchemaValidationFailed(format!(
309                "Branch name cannot start with '--': {}",
310                branch
311            )));
312        }
313        if branch.starts_with("refs/") || branch.contains("/refs/") {
314            return Err(Error::SchemaValidationFailed(format!(
315                "Ref injection detected in branch name: {}",
316                branch
317            )));
318        }
319        if branch.contains(|c: char| c.is_control() || c.is_whitespace()) {
320            return Err(Error::SchemaValidationFailed(format!(
321                "Branch name contains control or whitespace: {}",
322                branch
323            )));
324        }
325        if branch.contains([':', '~', '^', '*', '[', '\\', '?'])
326            || std::path::Path::new(branch)
327                .extension()
328                .is_some_and(|ext| ext.eq_ignore_ascii_case("lock"))
329        {
330            return Err(Error::SchemaValidationFailed(format!(
331                "Branch name contains invalid character: {}",
332                branch
333            )));
334        }
335        Ok(())
336    }
337
338    /// Validates a commit SHA.
339    fn validate_commit_sha(sha: &str) -> Result<()> {
340        if sha.len() < 7 || sha.len() > 40 {
341            return Err(Error::SchemaValidationFailed(format!(
342                "Invalid commit SHA length: {}",
343                sha
344            )));
345        }
346        if !sha.chars().all(|c| c.is_ascii_hexdigit()) {
347            return Err(Error::SchemaValidationFailed(format!(
348                "Invalid commit SHA: {}",
349                sha
350            )));
351        }
352        Ok(())
353    }
354
355    /// Sanitizes credentials from a URL string (redacts user:pass@).
356    /// Preserves SSH-style URLs (git@host:path) unchanged.
357    #[allow(clippy::arithmetic_side_effects)]
358    fn sanitize_url(url: &str) -> String {
359        if url.starts_with("git@") {
360            return url.to_string();
361        }
362        if let Some(at_pos) = url.find('@') {
363            if let Some(scheme_end) = url.find("://") {
364                let scheme = &url[..scheme_end + 3];
365                let after_at = &url[at_pos + 1..];
366                return format!("{}***@{}", scheme, after_at);
367            }
368            return format!("***@{}", &url[at_pos + 1..]);
369        }
370        url.to_string()
371    }
372
373    /// Sanitizes git output to remove credential leakage.
374    fn sanitize_output(output: &str) -> String {
375        let re_pattern = |line: &str| -> String {
376            let mut result = String::new();
377            let mut chars = line.chars().peekable();
378            while let Some(c) = chars.next() {
379                if c == ':' && chars.peek() == Some(&'/') && chars.clone().nth(1) == Some('/') {
380                    result.push_str("://");
381                    chars.next();
382                    chars.next();
383                    let mut user_pass = String::new();
384                    let mut found_at = false;
385                    for nc in chars.by_ref() {
386                        if nc == '@' {
387                            found_at = true;
388                            break;
389                        }
390                        user_pass.push(nc);
391                    }
392                    if found_at && !user_pass.is_empty() {
393                        result.push_str("***@");
394                    } else {
395                        result.push_str(&user_pass);
396                        if found_at {
397                            result.push('@');
398                        }
399                    }
400                } else {
401                    result.push(c);
402                }
403            }
404            result
405        };
406
407        output
408            .lines()
409            .map(re_pattern)
410            .collect::<Vec<_>>()
411            .join("\n")
412    }
413
414    /// Checks if a file path looks like a secret file that should not be committed.
415    fn is_secret_file(path: &str) -> bool {
416        let lower = path.to_lowercase();
417        SECRET_PATTERNS.iter().any(|pattern| {
418            lower == *pattern
419                || lower.ends_with(&format!("/{}", pattern))
420                || lower.contains(&format!("/{}/", pattern))
421        })
422    }
423
424    /// Validates a file path for git add (no traversal, no secrets).
425    fn validate_add_file(file: &str, repo_path: &Path) -> Result<()> {
426        if file.contains("..") {
427            return Err(Error::SchemaValidationFailed(format!(
428                "Path traversal in file path: {}",
429                file
430            )));
431        }
432        if Self::is_secret_file(file) {
433            return Err(Error::SchemaValidationFailed(format!(
434                "Secret file detected, refusing to add: {}",
435                file
436            )));
437        }
438        let full_path = repo_path.join(file);
439        if full_path.exists() {
440            let canonical = full_path.canonicalize().map_err(|e| {
441                Error::SchemaValidationFailed(format!("Cannot resolve file {}: {}", file, e))
442            })?;
443            let canonical_repo = repo_path.canonicalize().map_err(|e| {
444                Error::SchemaValidationFailed(format!("Cannot resolve repo: {}", e))
445            })?;
446            if !canonical.starts_with(&canonical_repo) {
447                return Err(Error::SchemaValidationFailed(format!(
448                    "File {} escapes repository boundary",
449                    file
450                )));
451            }
452        }
453        Ok(())
454    }
455
456    /// Checks available disk space (returns free bytes, or None if unknown).
457    fn disk_free_bytes(path: &Path) -> Option<u64> {
458        let output = Command::new("df")
459            .arg("--output=avail")
460            .arg("-B1")
461            .arg(path)
462            .output()
463            .ok()?;
464        if output.status.success() {
465            let stdout = String::from_utf8_lossy(&output.stdout);
466            stdout.lines().nth(1)?.trim().parse().ok()
467        } else {
468            None
469        }
470    }
471
472    /// Counts untracked files that would be removed by git clean -fd.
473    fn count_untracked_files(repo_path: &Path, timeout_secs: u64) -> Result<usize> {
474        let output = Self::run_git_with_timeout(
475            repo_path,
476            &["ls-files", "--others", "--exclude-standard"],
477            timeout_secs,
478        )?;
479        Ok(output.lines().filter(|l| !l.is_empty()).count())
480    }
481
482    /// Sanitizes a commit message (strips control chars, ensures non-empty).
483    fn sanitize_commit_message(msg: &str) -> Result<String> {
484        let sanitized: String = msg
485            .chars()
486            .filter(|c| !c.is_control() || *c == '\n' || *c == '\t')
487            .collect();
488        let trimmed = sanitized.trim();
489        if trimmed.is_empty() {
490            return Err(Error::SchemaValidationFailed(
491                "Commit message is empty after sanitization".into(),
492            ));
493        }
494        Ok(trimmed.to_string())
495    }
496
497    /// Creates a backup unconditionally before any mutating operation.
498    fn backup_before_mutation(&self, repo_path: &Path, job_id: &str) -> Result<PathBuf> {
499        self.backup_mgr.create_backup(repo_path, job_id)
500    }
501
502    /// Captures the current git state for a repository.
503    fn capture_state(repo_path: &Path, timeout_secs: u64) -> Result<GitState> {
504        let commit_sha =
505            Self::run_git_with_timeout(repo_path, &["rev-parse", "HEAD"], timeout_secs)
506                .map(|s| s.trim().to_string())
507                .ok();
508
509        let branch = Self::run_git_with_timeout(
510            repo_path,
511            &["rev-parse", "--abbrev-ref", "HEAD"],
512            timeout_secs,
513        )
514        .map(|s| s.trim().to_string())
515        .ok();
516
517        let remote_url =
518            Self::run_git_with_timeout(repo_path, &["remote", "get-url", "origin"], timeout_secs)
519                .ok()
520                .and_then(|s| {
521                    let trimmed = s.trim().to_string();
522                    let sanitized = Self::sanitize_url(&trimmed);
523                    if sanitized.is_empty() {
524                        None
525                    } else {
526                        Some(sanitized)
527                    }
528                });
529
530        let is_clean = Self::is_working_tree_clean(repo_path);
531
532        Ok(GitState {
533            commit_sha,
534            branch,
535            remote_url,
536            repo_path: repo_path.to_string_lossy().to_string(),
537            is_clean,
538        })
539    }
540
541    /// Executes git clone operation.
542    fn op_clone(&self, args: &GitExecArgs, ctx: &Context) -> Result<Output> {
543        let _ = self;
544        let timeout_secs = args.timeout_secs.unwrap_or(300);
545        let url = args
546            .url
547            .as_ref()
548            .ok_or_else(|| Error::ExecutionFailed("URL required for clone".into()))?;
549        let path = args
550            .path
551            .as_ref()
552            .ok_or_else(|| Error::ExecutionFailed("Path required for clone".into()))?;
553
554        Self::validate_url(url)?;
555
556        let path = Path::new(path);
557        if path.exists() {
558            return Err(Error::ExecutionFailed(format!(
559                "Path already exists: {}",
560                path.display()
561            )));
562        }
563
564        if let Some(free) = Self::disk_free_bytes(path.parent().unwrap_or_else(|| Path::new("/"))) {
565            if free < 100 * 1024 * 1024 {
566                return Err(Error::ExecutionFailed(
567                    "Insufficient disk space for clone (need at least 100MB)".into(),
568                ));
569            }
570        }
571
572        if ctx.dry_run {
573            let mut out = Output::ok(format!(
574                "DRY RUN: would clone {} to {}",
575                Self::sanitize_url(url),
576                path.display()
577            ));
578            out.data = Some(serde_json::json!({
579                "operation": "clone",
580                "url": Self::sanitize_url(url),
581                "path": path.display().to_string(),
582                "dry_run": true
583            }));
584            return Ok(out);
585        }
586
587        if let Some(parent) = path.parent() {
588            std::fs::create_dir_all(parent).map_err(|e| {
589                Error::ExecutionFailed(format!("mkdir {}: {}", parent.display(), e))
590            })?;
591        }
592
593        let mut cmd = Command::new("git");
594        cmd.arg("clone").arg(url).arg(path);
595
596        if let Some(branch) = &args.branch {
597            cmd.arg("-b").arg(branch);
598        }
599
600        let mut child = cmd
601            .stdin(std::process::Stdio::null())
602            .spawn()
603            .map_err(|e| Error::ExecutionFailed(format!("git clone spawn failed: {}", e)))?;
604
605        let timeout = Duration::from_secs(timeout_secs);
606        let start = Instant::now();
607        let status = loop {
608            match child.try_wait() {
609                Ok(Some(s)) => break s,
610                Ok(None) => {
611                    if start.elapsed() > timeout {
612                        let _ = child.kill();
613                        let _ = child.wait();
614                        return Err(Error::ExecutionFailed(format!(
615                            "git clone timed out after {}s",
616                            timeout_secs
617                        )));
618                    }
619                    std::thread::sleep(Duration::from_millis(100));
620                }
621                Err(e) => {
622                    let _ = child.kill();
623                    let _ = child.wait();
624                    return Err(Error::ExecutionFailed(format!(
625                        "git clone wait error: {}",
626                        e
627                    )));
628                }
629            }
630        };
631
632        if !status.success() {
633            return Err(Error::ExecutionFailed(
634                "git clone failed (see stderr)".into(),
635            ));
636        }
637
638        let state = Self::capture_state(path, timeout_secs)?;
639
640        let mut out = Output::ok(format!(
641            "Cloned {} to {}",
642            Self::sanitize_url(url),
643            path.display()
644        ));
645        out.data = Some(serde_json::json!({
646            "operation": "clone",
647            "url": Self::sanitize_url(url),
648            "path": path.display().to_string(),
649            "commit_sha": state.commit_sha,
650            "branch": state.branch,
651            "remote_url": state.remote_url
652        }));
653        Ok(out)
654    }
655
656    /// Executes git pull operation.
657    fn op_pull(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
658        let timeout_secs = args.timeout_secs.unwrap_or(300);
659
660        if !repo_path.exists() {
661            return Err(Error::ExecutionFailed(format!(
662                "Repository not found: {}",
663                repo_path.display()
664            )));
665        }
666
667        let state_before = Self::capture_state(repo_path, timeout_secs)?;
668
669        if ctx.dry_run {
670            let mut out = Output::ok("DRY RUN: would pull".into());
671            out.data = Some(serde_json::json!({
672                "operation": "pull",
673                "path": repo_path.display().to_string(),
674                "dry_run": true
675            }));
676            return Ok(out);
677        }
678
679        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
680
681        let output = Self::run_git_with_timeout(repo_path, &["pull", "--rebase"], timeout_secs)
682            .map_err(|e| Error::ExecutionFailed(format!("git pull failed: {}", e)))?;
683
684        let state_after = Self::capture_state(repo_path, timeout_secs)?;
685
686        let mut out = Output::ok("Pulled successfully".into());
687        out.data = Some(serde_json::json!({
688            "operation": "pull",
689            "path": repo_path.display().to_string(),
690            "commit_sha_before": state_before.commit_sha,
691            "commit_sha_after": state_after.commit_sha,
692            "branch": state_after.branch,
693            "backup_path": backup_path.map(|p| p.to_string_lossy().to_string()),
694            "git_output": Self::sanitize_output(&output)
695        }));
696        Ok(out)
697    }
698
699    /// Executes git commit operation.
700    fn op_commit(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
701        let timeout_secs = args.timeout_secs.unwrap_or(300);
702
703        if !repo_path.exists() {
704            return Err(Error::ExecutionFailed(format!(
705                "Repository not found: {}",
706                repo_path.display()
707            )));
708        }
709
710        let message = args
711            .message
712            .as_ref()
713            .ok_or_else(|| Error::ExecutionFailed("Commit message required".into()))?;
714        let message = Self::sanitize_commit_message(message)?;
715
716        let state_before = Self::capture_state(repo_path, timeout_secs)?;
717
718        if ctx.dry_run {
719            let mut out = Output::ok("DRY RUN: would commit".into());
720            out.data = Some(serde_json::json!({
721                "operation": "commit",
722                "path": repo_path.display().to_string(),
723                "message": &message,
724                "dry_run": true
725            }));
726            return Ok(out);
727        }
728
729        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
730
731        if let Some(files) = &args.files {
732            for file in files {
733                Self::validate_add_file(file, repo_path)?;
734                let output = Self::run_git_with_timeout(repo_path, &["add", file], timeout_secs)
735                    .map_err(|e| Error::ExecutionFailed(format!("git add failed: {}", e)))?;
736                let _ = output;
737            }
738        } else {
739            let untracked = Self::run_git_with_timeout(
740                repo_path,
741                &["ls-files", "--others", "--exclude-standard"],
742                timeout_secs,
743            )?;
744            for line in untracked.lines() {
745                let file = line.trim();
746                if file.is_empty() {
747                    continue;
748                }
749                if Self::is_secret_file(file) {
750                    eprintln!("[runtimo] Skipping secret file from git add: {}", file);
751                    continue;
752                }
753                Self::run_git_with_timeout(repo_path, &["add", file], timeout_secs).map_err(
754                    |e| Error::ExecutionFailed(format!("git add {} failed: {}", file, e)),
755                )?;
756            }
757        }
758
759        let output =
760            Self::run_git_with_timeout(repo_path, &["commit", "-m", &message], timeout_secs)
761                .map_err(|e| Error::ExecutionFailed(format!("git commit failed: {}", e)))?;
762        let _ = output;
763
764        let state_after = Self::capture_state(repo_path, timeout_secs)?;
765
766        let mut out = Output::ok(format!("Committed: {}", message));
767        out.data = Some(serde_json::json!({
768            "operation": "commit",
769            "path": repo_path.display().to_string(),
770            "message": message,
771            "commit_sha_before": state_before.commit_sha,
772            "commit_sha_after": state_after.commit_sha,
773            "branch": state_after.branch,
774            "backup_path": backup_path.map(|p| p.to_string_lossy().to_string())
775        }));
776        Ok(out)
777    }
778
779    /// Executes git revert operation.
780    fn op_revert(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
781        let timeout_secs = args.timeout_secs.unwrap_or(300);
782
783        if !repo_path.exists() {
784            return Err(Error::ExecutionFailed(format!(
785                "Repository not found: {}",
786                repo_path.display()
787            )));
788        }
789
790        let commit_sha = args
791            .commit_sha
792            .as_ref()
793            .ok_or_else(|| Error::ExecutionFailed("Commit SHA required for revert".into()))?;
794
795        Self::validate_commit_sha(commit_sha)?;
796
797        let state_before = Self::capture_state(repo_path, timeout_secs)?;
798
799        if ctx.dry_run {
800            let mut out = Output::ok(format!("DRY RUN: would revert {}", commit_sha));
801            out.data = Some(serde_json::json!({
802                "operation": "revert",
803                "path": repo_path.display().to_string(),
804                "commit_sha": commit_sha,
805                "dry_run": true
806            }));
807            return Ok(out);
808        }
809
810        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
811
812        let output = Self::run_git_with_timeout(
813            repo_path,
814            &["revert", "--no-edit", commit_sha],
815            timeout_secs,
816        )
817        .map_err(|e| Error::ExecutionFailed(format!("git revert failed: {}", e)))?;
818        let _ = output;
819
820        let state_after = Self::capture_state(repo_path, timeout_secs)?;
821
822        let mut out = Output::ok(format!("Reverted {}", commit_sha));
823        out.data = Some(serde_json::json!({
824            "operation": "revert",
825            "path": repo_path.display().to_string(),
826            "commit_sha": commit_sha,
827            "commit_sha_before": state_before.commit_sha,
828            "commit_sha_after": state_after.commit_sha,
829            "branch": state_after.branch,
830            "backup_path": backup_path.map(|p| p.to_string_lossy().to_string())
831        }));
832        Ok(out)
833    }
834
835    /// Executes git clean operation.
836    fn op_clean(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
837        let timeout_secs = args.timeout_secs.unwrap_or(300);
838
839        if !repo_path.exists() {
840            return Err(Error::ExecutionFailed(format!(
841                "Repository not found: {}",
842                repo_path.display()
843            )));
844        }
845
846        let state_before = Self::capture_state(repo_path, timeout_secs)?;
847
848        if ctx.dry_run {
849            let untracked_count = Self::count_untracked_files(repo_path, timeout_secs).unwrap_or(0);
850            let preview =
851                Self::run_git_with_timeout(repo_path, &["clean", "-fd", "--dry-run"], timeout_secs)
852                    .map(|s| Self::sanitize_output(&s))
853                    .unwrap_or_default();
854            let mut out = Output::ok(format!(
855                "DRY RUN: would clean {} untracked files",
856                untracked_count
857            ));
858            out.data = Some(serde_json::json!({
859                "operation": "clean",
860                "path": repo_path.display().to_string(),
861                "dry_run": true,
862                "untracked_count": untracked_count,
863                "preview": preview
864            }));
865            return Ok(out);
866        }
867
868        let untracked_count = Self::count_untracked_files(repo_path, timeout_secs)?;
869        if untracked_count > MAX_CLEAN_FILES {
870            return Err(Error::ExecutionFailed(format!(
871                "Too many untracked files to clean safely: {} (limit: {})",
872                untracked_count, MAX_CLEAN_FILES
873            )));
874        }
875
876        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
877
878        let output = Self::run_git_with_timeout(repo_path, &["clean", "-fd"], timeout_secs)
879            .map_err(|e| Error::ExecutionFailed(format!("git clean failed: {}", e)))?;
880        let _ = output;
881
882        let state_after = Self::capture_state(repo_path, timeout_secs)?;
883
884        let mut out = Output::ok(format!("Cleaned {} untracked files", untracked_count));
885        out.data = Some(serde_json::json!({
886            "operation": "clean",
887            "path": repo_path.display().to_string(),
888            "was_clean": state_before.is_clean,
889            "is_clean": state_after.is_clean,
890            "untracked_files_removed": untracked_count,
891            "backup_path": backup_path.map(|p| p.to_string_lossy().to_string())
892        }));
893        Ok(out)
894    }
895
896    /// Executes git status operation.
897    #[allow(clippy::unused_self, clippy::used_underscore_binding)]
898    fn op_status(&self, _args: &GitExecArgs, _ctx: &Context, repo_path: &Path) -> Result<Output> {
899        let timeout_secs = _args.timeout_secs.unwrap_or(300);
900
901        if !repo_path.exists() {
902            return Err(Error::ExecutionFailed(format!(
903                "Repository not found: {}",
904                repo_path.display()
905            )));
906        }
907
908        let state = Self::capture_state(repo_path, timeout_secs)?;
909
910        let status_output =
911            Self::run_git_with_timeout(repo_path, &["status", "--porcelain"], timeout_secs)
912                .unwrap_or_default();
913
914        let branch = state.branch.clone().unwrap_or_default();
915        let remote_url = state.remote_url.clone().unwrap_or_default();
916
917        let mut out = Output::ok(format!(
918            "On branch {}: {}",
919            branch,
920            if state.is_clean { "clean" } else { "dirty" }
921        ));
922        out.data = Some(serde_json::json!({
923            "operation": "status",
924            "path": repo_path.display().to_string(),
925            "branch": branch,
926            "remote_url": remote_url,
927            "commit_sha": state.commit_sha,
928            "is_clean": state.is_clean,
929            "status": status_output
930        }));
931        Ok(out)
932    }
933}
934
935impl TypedCapability for GitExec {
936    type Args = GitExecArgs;
937
938    fn name(&self) -> &'static str {
939        "GitExec"
940    }
941
942    fn description(&self) -> &'static str {
943        "git operations: clone, pull, commit, revert, clean, status. state tracking (sha, branch, remote), SSRF-blocked URLs, secret detection, timeout, undo via backup."
944    }
945
946    fn schema(&self) -> Value {
947        serde_json::json!({
948            "type": "object",
949            "properties": {
950                "operation": { "type": "string", "enum": ["clone", "pull", "commit", "revert", "clean", "status"] },
951                "url": { "type": "string" },
952                "path": { "type": "string" },
953                "branch": { "type": "string" },
954                "message": { "type": "string" },
955                "files": { "type": "array", "items": { "type": "string" } },
956                "commit_sha": { "type": "string" },
957                "timeout_secs": { "type": "integer", "minimum": 1, "maximum": 600 }
958            },
959            "required": ["operation"]
960        })
961    }
962
963    fn execute(
964        &self,
965        args: GitExecArgs,
966        ctx: &Context,
967    ) -> std::result::Result<Output, CapabilityError> {
968        let valid_ops = ["clone", "pull", "commit", "revert", "clean", "status"];
969        if !valid_ops.contains(&args.operation.as_str()) {
970            return Err(CapabilityError::InvalidArgs(format!(
971                "Invalid operation: {}. Must be one of: {}",
972                args.operation,
973                valid_ops.join(", ")
974            )));
975        }
976
977        if args.operation == "clone" {
978            if let Some(url) = &args.url {
979                Self::validate_url(url)
980                    .map_err(|e| CapabilityError::PermissionDenied(e.to_string()))?;
981            } else {
982                return Err(CapabilityError::InvalidArgs(
983                    "URL required for clone".into(),
984                ));
985            }
986            if let Some(path) = &args.path {
987                let ctx = PathContext {
988                    require_exists: false,
989                    require_file: false,
990                    ..Default::default()
991                };
992                validate_path(path, &ctx).map_err(CapabilityError::PermissionDenied)?;
993            }
994        }
995
996        if args.operation != "clone" {
997            if let Some(path) = &args.path {
998                let ctx = PathContext {
999                    require_exists: true,
1000                    require_file: false,
1001                    ..Default::default()
1002                };
1003                validate_path(path, &ctx).map_err(CapabilityError::PermissionDenied)?;
1004            }
1005        }
1006
1007        if let Some(branch) = &args.branch {
1008            Self::validate_branch_name(branch)
1009                .map_err(|e| CapabilityError::InvalidArgs(e.to_string()))?;
1010        }
1011
1012        if let Some(sha) = &args.commit_sha {
1013            Self::validate_commit_sha(sha)
1014                .map_err(|e| CapabilityError::InvalidArgs(e.to_string()))?;
1015        }
1016
1017        let telemetry_before = Telemetry::capture();
1018        let process_before = ProcessSnapshot::capture();
1019
1020        let result = match args.operation.as_str() {
1021            "clone" => self.op_clone(&args, ctx),
1022            "pull" => {
1023                let path = args
1024                    .path
1025                    .as_ref()
1026                    .ok_or_else(|| CapabilityError::InvalidArgs("Path required for pull".into()))?;
1027                self.op_pull(&args, ctx, Path::new(path))
1028            }
1029            "commit" => {
1030                let path = args.path.as_ref().ok_or_else(|| {
1031                    CapabilityError::InvalidArgs("Path required for commit".into())
1032                })?;
1033                self.op_commit(&args, ctx, Path::new(path))
1034            }
1035            "revert" => {
1036                let path = args.path.as_ref().ok_or_else(|| {
1037                    CapabilityError::InvalidArgs("Path required for revert".into())
1038                })?;
1039                self.op_revert(&args, ctx, Path::new(path))
1040            }
1041            "clean" => {
1042                let path = args.path.as_ref().ok_or_else(|| {
1043                    CapabilityError::InvalidArgs("Path required for clean".into())
1044                })?;
1045                self.op_clean(&args, ctx, Path::new(path))
1046            }
1047            "status" => {
1048                let path = args.path.as_ref().ok_or_else(|| {
1049                    CapabilityError::InvalidArgs("Path required for status".into())
1050                })?;
1051                self.op_status(&args, ctx, Path::new(path))
1052            }
1053            _ => Err(Error::ExecutionFailed(format!(
1054                "Unknown operation: {}",
1055                args.operation
1056            ))),
1057        };
1058
1059        let telemetry_after = Telemetry::capture();
1060        let process_after = ProcessSnapshot::capture();
1061
1062        let mut output = result.map_err(|e| CapabilityError::Internal(e.to_string()))?;
1063        if let Some(obj) = output.data.as_mut().and_then(|d| d.as_object_mut()) {
1064            obj.insert(
1065                "telemetry_before".to_string(),
1066                serde_json::to_value(&telemetry_before).unwrap_or(Value::Null),
1067            );
1068            obj.insert(
1069                "telemetry_after".to_string(),
1070                serde_json::to_value(&telemetry_after).unwrap_or(Value::Null),
1071            );
1072            obj.insert(
1073                "process_before".to_string(),
1074                serde_json::to_value(&process_before.summary).unwrap_or(Value::Null),
1075            );
1076            obj.insert(
1077                "process_after".to_string(),
1078                serde_json::to_value(&process_after.summary).unwrap_or(Value::Null),
1079            );
1080        }
1081
1082        Ok(output)
1083    }
1084}
1085
1086#[cfg(test)]
1087mod tests {
1088    use super::*;
1089    use crate::capability::Capability;
1090
1091    fn test_backup_dir() -> PathBuf {
1092        std::env::temp_dir().join("runtimo_git_test")
1093    }
1094
1095    #[test]
1096    fn validates_git_url_https_only() {
1097        assert!(GitExec::validate_url("https://github.com/user/repo.git").is_ok());
1098        assert!(GitExec::validate_url("git@github.com:user/repo.git").is_ok());
1099
1100        assert!(GitExec::validate_url("http://example.com/repo.git").is_err());
1101        assert!(GitExec::validate_url("not-a-url").is_err());
1102        assert!(GitExec::validate_url("").is_err());
1103
1104        std::fs::remove_dir_all(test_backup_dir()).ok();
1105    }
1106
1107    #[test]
1108    fn blocks_ssrf_urls() {
1109        assert!(GitExec::validate_url("https://169.254.169.254/latest/meta-data/").is_err());
1110        assert!(GitExec::validate_url("https://127.0.0.1/repo.git").is_err());
1111        assert!(GitExec::validate_url("https://localhost/repo.git").is_err());
1112        assert!(GitExec::validate_url("https://192.168.1.1/repo.git").is_err());
1113        assert!(GitExec::validate_url("https://metadata.google.internal/computeMetadata").is_err());
1114
1115        std::fs::remove_dir_all(test_backup_dir()).ok();
1116    }
1117
1118    #[test]
1119    fn sanitizes_credentials_from_url() {
1120        assert_eq!(
1121            GitExec::sanitize_url("https://user:pass@github.com/repo.git"),
1122            "https://***@github.com/repo.git"
1123        );
1124        assert_eq!(
1125            GitExec::sanitize_url("https://github.com/repo.git"),
1126            "https://github.com/repo.git"
1127        );
1128        assert_eq!(
1129            GitExec::sanitize_url("git@github.com:user/repo.git"),
1130            "git@github.com:user/repo.git"
1131        );
1132    }
1133
1134    #[test]
1135    fn detects_secret_files() {
1136        assert!(GitExec::is_secret_file(".env"));
1137        assert!(GitExec::is_secret_file("config/.env"));
1138        assert!(GitExec::is_secret_file("credentials.json"));
1139        assert!(GitExec::is_secret_file(".ssh/id_rsa"));
1140        assert!(GitExec::is_secret_file("src/.env.local"));
1141
1142        assert!(!GitExec::is_secret_file("main.rs"));
1143        assert!(!GitExec::is_secret_file("Cargo.toml"));
1144        assert!(!GitExec::is_secret_file("README.md"));
1145    }
1146
1147    #[test]
1148    fn validates_branch_name() {
1149        assert!(GitExec::validate_branch_name("main").is_ok());
1150        assert!(GitExec::validate_branch_name("feature/my-branch").is_ok());
1151        assert!(GitExec::validate_branch_name("v1.0").is_ok());
1152
1153        assert!(GitExec::validate_branch_name("").is_err());
1154        assert!(GitExec::validate_branch_name("bad..name").is_err());
1155        assert!(GitExec::validate_branch_name("@{..}").is_err());
1156        // Option injection
1157        assert!(GitExec::validate_branch_name("--force").is_err());
1158        assert!(GitExec::validate_branch_name("--help").is_err());
1159        // Ref injection
1160        assert!(GitExec::validate_branch_name("refs/heads/main").is_err());
1161        // Control chars and whitespace
1162        assert!(GitExec::validate_branch_name("bad\nname").is_err());
1163        assert!(GitExec::validate_branch_name("bad\tname").is_err());
1164        // Metacharacters
1165        assert!(GitExec::validate_branch_name("bad:name").is_err());
1166        assert!(GitExec::validate_branch_name("bad~name").is_err());
1167        assert!(GitExec::validate_branch_name("bad^name").is_err());
1168        assert!(GitExec::validate_branch_name("bad*name").is_err());
1169        assert!(GitExec::validate_branch_name("bad[name").is_err());
1170        assert!(GitExec::validate_branch_name("bad\\name").is_err());
1171        assert!(GitExec::validate_branch_name("bad?name").is_err());
1172        assert!(GitExec::validate_branch_name("name.lock").is_err());
1173    }
1174
1175    #[test]
1176    fn validates_commit_sha() {
1177        assert!(GitExec::validate_commit_sha("abc1234").is_ok());
1178        assert!(GitExec::validate_commit_sha("a1b2c3d4").is_ok());
1179        assert!(GitExec::validate_commit_sha("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0").is_ok());
1180
1181        assert!(GitExec::validate_commit_sha("abc123").is_err());
1182        assert!(GitExec::validate_commit_sha("").is_err());
1183        assert!(GitExec::validate_commit_sha("xyz123").is_err());
1184    }
1185
1186    #[allow(clippy::expect_used)]
1187    #[test]
1188    fn rejects_path_traversal() {
1189        let cap = GitExec::new(test_backup_dir()).expect("Failed to create GitExec");
1190
1191        let result = Capability::execute(
1192            &cap,
1193            &serde_json::json!({
1194                "operation": "clone",
1195                "url": "https://github.com/user/repo.git",
1196                "path": "../../../etc/passwd"
1197            }),
1198            &Context {
1199                dry_run: false,
1200                job_id: "test".into(),
1201                working_dir: std::env::temp_dir(),
1202            },
1203        );
1204
1205        assert!(result.is_err() || !result.unwrap().status.is_empty());
1206        // The blanket impl's validate always returns Ok, so path traversal
1207        // is caught at execute time, not validate time.
1208        std::fs::remove_dir_all(test_backup_dir()).ok();
1209    }
1210
1211    #[allow(clippy::expect_used)]
1212    #[test]
1213    fn rejects_invalid_operation() {
1214        let cap = GitExec::new(test_backup_dir()).expect("Failed to create GitExec");
1215
1216        let result = Capability::execute(
1217            &cap,
1218            &serde_json::json!({
1219                "operation": "invalid_op"
1220            }),
1221            &Context {
1222                dry_run: false,
1223                job_id: "test".into(),
1224                working_dir: std::env::temp_dir(),
1225            },
1226        );
1227
1228        assert!(result.is_err());
1229        std::fs::remove_dir_all(test_backup_dir()).ok();
1230    }
1231
1232    #[test]
1233    #[allow(clippy::expect_used)]
1234    fn status_on_nonexistent_repo() {
1235        let cap = GitExec::new(test_backup_dir()).expect("Failed to create GitExec");
1236
1237        let result = Capability::execute(
1238            &cap,
1239            &serde_json::json!({
1240                "operation": "status",
1241                "path": "/tmp/nonexistent_repo"
1242            }),
1243            &Context {
1244                dry_run: false,
1245                job_id: "test".into(),
1246                working_dir: std::env::temp_dir(),
1247            },
1248        );
1249
1250        assert!(result.is_err());
1251        std::fs::remove_dir_all(test_backup_dir()).ok();
1252    }
1253
1254    #[test]
1255    fn sanitizes_commit_message() {
1256        assert!(GitExec::sanitize_commit_message("valid commit").is_ok());
1257        assert!(GitExec::sanitize_commit_message("  trimmed  ").is_ok());
1258        assert!(GitExec::sanitize_commit_message("").is_err());
1259        assert!(GitExec::sanitize_commit_message("   ").is_err());
1260        let result = GitExec::sanitize_commit_message("hello\x00world").unwrap();
1261        assert!(!result.contains('\x00'));
1262    }
1263
1264    #[test]
1265    fn timeout_enforced_on_git_command() {
1266        // Start a TCP listener on localhost that accepts but never responds.
1267        // This creates a guaranteed-timeout scenario without depending on
1268        // external network behavior. The listener is dropped after the test.
1269        let listener =
1270            std::net::TcpListener::bind("127.0.0.1:0").expect("failed to bind TCP listener");
1271        let port = listener.local_addr().unwrap().port();
1272
1273        let tmp = std::env::temp_dir().join("runtimo_git_timeout_test");
1274        std::fs::create_dir_all(&tmp).ok();
1275        Command::new("git")
1276            .arg("init")
1277            .current_dir(&tmp)
1278            .output()
1279            .ok();
1280
1281        // Spawn a thread that accepts one connection and hangs.
1282        // The git clone will connect and wait for a response that never comes.
1283        let _hang_handle = std::thread::spawn(move || {
1284            if let Ok((_stream, _addr)) = listener.accept() {
1285                // Hold the connection open indefinitely — never send a response
1286                std::thread::sleep(std::time::Duration::from_mins(5));
1287            }
1288        });
1289
1290        // git clone to localhost times out after 2 seconds.
1291        let result = GitExec::run_git_with_timeout(
1292            &tmp,
1293            &["clone", &format!("http://127.0.0.1:{}/repo.git", port)],
1294            2,
1295        );
1296
1297        // The operation should fail with a timeout (or a connection error
1298        // if git detects the protocol mismatch before the timeout fires).
1299        assert!(
1300            result.is_err(),
1301            "Expected timeout or connection error, got: {:?}",
1302            result
1303        );
1304
1305        std::fs::remove_dir_all(&tmp).ok();
1306    }
1307}