Skip to main content

runtimo_core/capabilities/
git_exec.rs

1//! GitExec capability — git operations with state tracking and undo support.
2//!
3//! Provides git operations (clone, pull, commit, revert, clean, status) with:
4//! - State tracking (commit sha, branch, remote URL)
5//! - Backup-before-mutate for undo support
6//! - WAL logging for audit trail
7//! - Path traversal protection
8//! - Timeout enforcement on all git subprocesses
9//! - URL validation (HTTPS/SSH only, SSRF blocking)
10//! - Credential sanitization from output
11//! - Secret file detection for git add
12//! - Telemetry and process tracking before/after execution
13//!
14//! # Network capability
15//!
16//! **Git operations ARE inherently network-capable.** `git clone`, `git pull`,
17//! and `git fetch` make outbound connections to remote repositories.
18//! This is by design — denying network access would make GitExec useless.
19//!
20//! The network isolation is at the transport/protocol level:
21//! - Only HTTPS (`https://`) and SSH (`git@`) URLs are accepted
22//! - SSRF targets (metadata services, localhost, private ranges) are blocked
23//! - Credentials are sanitized from all output and telemetry
24//!
25//! **Note on ShellExec interaction:** GitExec spawns `git` subprocesses which
26//! internally invoke `git-remote-https` (a git helper, NOT the system `curl`).
27//! The ShellExec network blocklist (`curl`, `wget`, etc.) does NOT affect
28//! GitExec — git uses its own transport layer. However, `RUNTIMO_ENABLE_NETWORK`
29//! does NOT gate GitExec; GitExec's network access is controlled by its own
30//! URL validation and SSRF blocking.
31//!
32//! # Example
33//!
34//! ```rust,ignore
35//! use runtimo_core::capabilities::GitExec;
36//! use runtimo_core::capability::{Capability, Context};
37//! use serde_json::json;
38//! use std::path::PathBuf;
39//!
40//! let cap = GitExec::new(PathBuf::from("/tmp/backups"));
41//! let result = cap.execute(
42//!     &json!({"operation": "clone", "url": "https://github.com/user/repo.git", "path": "/tmp/repo"}),
43//!     &Context { dry_run: false, job_id: "job1".into(), working_dir: PathBuf::from("/tmp") }
44//! ).unwrap();
45//!
46//! assert!(result.success);
47//! ```
48
49use crate::backup::BackupManager;
50use crate::capability::{Capability, Context, Output};
51use crate::processes::ProcessSnapshot;
52use crate::telemetry::Telemetry;
53use crate::validation::path::{validate_path, PathContext};
54use crate::{Error, Result};
55use serde::{Deserialize, Serialize};
56use serde_json::Value;
57use std::path::{Path, PathBuf};
58use std::process::Command;
59use std::time::{Duration, Instant};
60
61/// Arguments for the [`GitExec`] capability.
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct GitExecArgs {
64    /// Git operation to perform (clone, pull, commit, revert, clean, status).
65    pub operation: String,
66    /// Repository URL (for clone/pull).
67    pub url: Option<String>,
68    /// Local path to repository (for clone/commit/revert/clean/status).
69    pub path: Option<String>,
70    /// Branch name (for checkout/clone).
71    pub branch: Option<String>,
72    /// Commit message (for commit).
73    pub message: Option<String>,
74    /// Files to commit (for commit).
75    pub files: Option<Vec<String>>,
76    /// Commit SHA to revert to (for revert).
77    pub commit_sha: Option<String>,
78    /// Timeout in seconds (default: 300).
79    pub timeout_secs: Option<u64>,
80}
81
82/// Git state before/after operation.
83#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct GitState {
85    /// Current commit SHA (HEAD).
86    pub commit_sha: Option<String>,
87    /// Current branch name.
88    pub branch: Option<String>,
89    /// Remote URL (origin).
90    pub remote_url: Option<String>,
91    /// Repository path.
92    pub repo_path: String,
93    /// Working directory status (clean/dirty).
94    pub is_clean: bool,
95}
96
97/// Known secret file patterns to exclude from `git add -A`.
98const SECRET_PATTERNS: &[&str] = &[
99    ".env",
100    ".env.local",
101    ".env.production",
102    ".env.staging",
103    "credentials.json",
104    "credentials.yml",
105    "credentials.yaml",
106    "secrets.json",
107    "secrets.yml",
108    "secrets.yaml",
109    ".ssh/id_rsa",
110    ".ssh/id_ed25519",
111    ".ssh/id_dsa",
112    "id_rsa",
113    "id_ed25519",
114    "id_dsa",
115    ".npmrc",
116    ".pypirc",
117    ".docker/config.json",
118    "token",
119    "api_key",
120    "api_secret",
121    ".aws/credentials",
122    ".azure/credentials",
123    "keystore.jks",
124    "keystore.p12",
125];
126
127/// Maximum number of untracked files allowed for `git clean -fd`.
128const MAX_CLEAN_FILES: usize = 1000;
129
130/// Capability that executes git operations with full state tracking.
131///
132/// Supports clone, pull, commit, revert, clean, and status operations.
133/// Creates backups before mutable operations for undo support.
134pub struct GitExec {
135    backup_mgr: BackupManager,
136}
137
138impl GitExec {
139    /// Creates a new GitExec capability with the given backup directory.
140    ///
141    /// # Errors
142    ///
143    /// Returns [`crate::Error::BackupError`] if the backup
144    /// directory cannot be created.
145    pub fn new(backup_dir: PathBuf) -> Result<Self> {
146        Ok(Self {
147            backup_mgr: BackupManager::new(backup_dir)?,
148        })
149    }
150
151    /// Runs a git command with timeout enforcement and returns the output.
152    fn run_git_with_timeout(repo_path: &Path, args: &[&str], timeout_secs: u64) -> Result<String> {
153        let mut child = Command::new("git")
154            .current_dir(repo_path)
155            .args(args)
156            .stdin(std::process::Stdio::null())
157            .spawn()
158            .map_err(|e| Error::ExecutionFailed(format!("git command failed: {}", e)))?;
159
160        let timeout = Duration::from_secs(timeout_secs);
161        let start = Instant::now();
162
163        loop {
164            match child.try_wait() {
165                Ok(Some(status)) => {
166                    let output = child
167                        .wait_with_output()
168                        .map_err(|e| Error::ExecutionFailed(format!("git wait failed: {}", e)))?;
169                    if !status.success() {
170                        let stderr = String::from_utf8_lossy(&output.stderr);
171                        return Err(Error::ExecutionFailed(format!(
172                            "git {}: {}",
173                            args.join(" "),
174                            stderr.trim()
175                        )));
176                    }
177                    return Ok(String::from_utf8_lossy(&output.stdout).to_string());
178                }
179                Ok(None) => {
180                    if start.elapsed() > timeout {
181                        let _ = child.kill();
182                        let _ = child.wait();
183                        return Err(Error::ExecutionFailed(format!(
184                            "git {} timed out after {}s",
185                            args.join(" "),
186                            timeout_secs
187                        )));
188                    }
189                    std::thread::sleep(Duration::from_millis(50));
190                }
191                Err(e) => {
192                    let _ = child.kill();
193                    let _ = child.wait();
194                    return Err(Error::ExecutionFailed(format!("git wait error: {}", e)));
195                }
196            }
197        }
198    }
199
200    /// Checks if the working tree is clean (no uncommitted changes).
201    fn is_working_tree_clean(repo_path: &Path) -> bool {
202        let output = Command::new("git")
203            .current_dir(repo_path)
204            .args(["status", "--porcelain"])
205            .output();
206
207        match output {
208            Ok(out) => out.stdout.is_empty() && out.stderr.is_empty(),
209            Err(_) => false,
210        }
211    }
212
213    /// Validates a git URL format. Blocks http:// (MITM risk) and SSRF patterns.
214    fn validate_url(url: &str) -> Result<()> {
215        let is_https = url.starts_with("https://");
216        let is_ssh = url.starts_with("git@");
217        if !is_https && !is_ssh {
218            return Err(Error::SchemaValidationFailed(format!(
219                "Insecure or unsupported URL scheme: {} (must use https:// or git@ SSH)",
220                url
221            )));
222        }
223
224        if is_https {
225            if let Some(host_part) = url
226                .strip_prefix("https://")
227                .and_then(|s| s.split('/').next())
228            {
229                let host = host_part.split(':').next().unwrap_or(host_part);
230                if Self::is_ssrf_host(host) {
231                    return Err(Error::SchemaValidationFailed(format!(
232                        "SSRF blocked: URL targets internal/metadata address: {}",
233                        url
234                    )));
235                }
236            }
237        }
238
239        Ok(())
240    }
241
242    /// Checks if a host is a known SSRF target (cloud metadata, localhost, link-local).
243    fn is_ssrf_host(host: &str) -> bool {
244        let lower = host.to_lowercase();
245        let ssrf_indicators = [
246            "169.254.169.254",
247            "169.254.",
248            "127.0.0.1",
249            "localhost",
250            "0.0.0.0",
251            "::1",
252            "10.0.0.",
253            "10.0.1.",
254            "10.0.2.",
255            "10.0.3.",
256            "172.16.",
257            "172.17.",
258            "172.18.",
259            "172.19.",
260            "172.20.",
261            "172.21.",
262            "172.22.",
263            "172.23.",
264            "172.24.",
265            "172.25.",
266            "172.26.",
267            "172.27.",
268            "172.28.",
269            "172.29.",
270            "172.30.",
271            "172.31.",
272            "192.168.",
273            "metadata.google",
274            "metadata.azure",
275            "instance-data",
276            "100.100.100.200",
277            "[::1]",
278            "[fe80:",
279        ];
280        ssrf_indicators
281            .iter()
282            .any(|indicator| lower.contains(indicator))
283    }
284
285    /// Validates a branch name against git's ref naming rules and option injection.
286    ///
287    /// Rejects: empty branches, `..` (range spec), `@{` (reflog), `--` prefix
288    /// (option injection), `refs/` patterns (ref injection), control characters,
289    /// whitespace, and shell/git metacharacters (`:`, `~`, `^`, `*`, `[`, `\\`,
290    /// `.lock`, `?`).
291    fn validate_branch_name(branch: &str) -> Result<()> {
292        if branch.is_empty() {
293            return Err(Error::SchemaValidationFailed("Branch name is empty".into()));
294        }
295        if branch.contains("..") || branch.contains("@{") {
296            return Err(Error::SchemaValidationFailed(format!(
297                "Invalid branch name: {}",
298                branch
299            )));
300        }
301        if branch.starts_with("--") {
302            return Err(Error::SchemaValidationFailed(format!(
303                "Branch name cannot start with '--': {}",
304                branch
305            )));
306        }
307        if branch.starts_with("refs/") || branch.contains("/refs/") {
308            return Err(Error::SchemaValidationFailed(format!(
309                "Ref injection detected in branch name: {}",
310                branch
311            )));
312        }
313        if branch.contains(|c: char| c.is_control() || c.is_whitespace()) {
314            return Err(Error::SchemaValidationFailed(format!(
315                "Branch name contains control or whitespace: {}",
316                branch
317            )));
318        }
319        if branch.contains([':', '~', '^', '*', '[', '\\', '?'])
320            || std::path::Path::new(branch)
321                .extension()
322                .is_some_and(|ext| ext.eq_ignore_ascii_case("lock"))
323        {
324            return Err(Error::SchemaValidationFailed(format!(
325                "Branch name contains invalid character: {}",
326                branch
327            )));
328        }
329        Ok(())
330    }
331
332    /// Validates a commit SHA.
333    fn validate_commit_sha(sha: &str) -> Result<()> {
334        if sha.len() < 7 || sha.len() > 40 {
335            return Err(Error::SchemaValidationFailed(format!(
336                "Invalid commit SHA length: {}",
337                sha
338            )));
339        }
340        if !sha.chars().all(|c| c.is_ascii_hexdigit()) {
341            return Err(Error::SchemaValidationFailed(format!(
342                "Invalid commit SHA: {}",
343                sha
344            )));
345        }
346        Ok(())
347    }
348
349    /// Sanitizes credentials from a URL string (redacts user:pass@).
350    /// Preserves SSH-style URLs (git@host:path) unchanged.
351    #[allow(clippy::arithmetic_side_effects)]
352    fn sanitize_url(url: &str) -> String {
353        if url.starts_with("git@") {
354            return url.to_string();
355        }
356        if let Some(at_pos) = url.find('@') {
357            if let Some(scheme_end) = url.find("://") {
358                let scheme = &url[..scheme_end + 3];
359                let after_at = &url[at_pos + 1..];
360                return format!("{}***@{}", scheme, after_at);
361            }
362            return format!("***@{}", &url[at_pos + 1..]);
363        }
364        url.to_string()
365    }
366
367    /// Sanitizes git output to remove credential leakage.
368    fn sanitize_output(output: &str) -> String {
369        let re_pattern = |line: &str| -> String {
370            let mut result = String::new();
371            let mut chars = line.chars().peekable();
372            while let Some(c) = chars.next() {
373                if c == ':' && chars.peek() == Some(&'/') && chars.clone().nth(1) == Some('/') {
374                    result.push_str("://");
375                    chars.next();
376                    chars.next();
377                    let mut user_pass = String::new();
378                    let mut found_at = false;
379                    for nc in chars.by_ref() {
380                        if nc == '@' {
381                            found_at = true;
382                            break;
383                        }
384                        user_pass.push(nc);
385                    }
386                    if found_at && !user_pass.is_empty() {
387                        result.push_str("***@");
388                    } else {
389                        result.push_str(&user_pass);
390                        if found_at {
391                            result.push('@');
392                        }
393                    }
394                } else {
395                    result.push(c);
396                }
397            }
398            result
399        };
400
401        output
402            .lines()
403            .map(re_pattern)
404            .collect::<Vec<_>>()
405            .join("\n")
406    }
407
408    /// Checks if a file path looks like a secret file that should not be committed.
409    fn is_secret_file(path: &str) -> bool {
410        let lower = path.to_lowercase();
411        SECRET_PATTERNS.iter().any(|pattern| {
412            lower == *pattern
413                || lower.ends_with(&format!("/{}", pattern))
414                || lower.contains(&format!("/{}/", pattern))
415        })
416    }
417
418    /// Validates a file path for git add (no traversal, no secrets).
419    fn validate_add_file(file: &str, repo_path: &Path) -> Result<()> {
420        if file.contains("..") {
421            return Err(Error::SchemaValidationFailed(format!(
422                "Path traversal in file path: {}",
423                file
424            )));
425        }
426        if Self::is_secret_file(file) {
427            return Err(Error::SchemaValidationFailed(format!(
428                "Secret file detected, refusing to add: {}",
429                file
430            )));
431        }
432        let full_path = repo_path.join(file);
433        if full_path.exists() {
434            let canonical = full_path.canonicalize().map_err(|e| {
435                Error::SchemaValidationFailed(format!("Cannot resolve file {}: {}", file, e))
436            })?;
437            let canonical_repo = repo_path.canonicalize().map_err(|e| {
438                Error::SchemaValidationFailed(format!("Cannot resolve repo: {}", e))
439            })?;
440            if !canonical.starts_with(&canonical_repo) {
441                return Err(Error::SchemaValidationFailed(format!(
442                    "File {} escapes repository boundary",
443                    file
444                )));
445            }
446        }
447        Ok(())
448    }
449
450    /// Checks available disk space (returns free bytes, or None if unknown).
451    fn disk_free_bytes(path: &Path) -> Option<u64> {
452        let output = Command::new("df")
453            .arg("--output=avail")
454            .arg("-B1")
455            .arg(path)
456            .output()
457            .ok()?;
458        if output.status.success() {
459            let stdout = String::from_utf8_lossy(&output.stdout);
460            stdout.lines().nth(1)?.trim().parse().ok()
461        } else {
462            None
463        }
464    }
465
466    /// Counts untracked files that would be removed by git clean -fd.
467    fn count_untracked_files(repo_path: &Path, timeout_secs: u64) -> Result<usize> {
468        let output = Self::run_git_with_timeout(
469            repo_path,
470            &["ls-files", "--others", "--exclude-standard"],
471            timeout_secs,
472        )?;
473        Ok(output.lines().filter(|l| !l.is_empty()).count())
474    }
475
476    /// Sanitizes a commit message (strips control chars, ensures non-empty).
477    fn sanitize_commit_message(msg: &str) -> Result<String> {
478        let sanitized: String = msg
479            .chars()
480            .filter(|c| !c.is_control() || *c == '\n' || *c == '\t')
481            .collect();
482        let trimmed = sanitized.trim();
483        if trimmed.is_empty() {
484            return Err(Error::SchemaValidationFailed(
485                "Commit message is empty after sanitization".into(),
486            ));
487        }
488        Ok(trimmed.to_string())
489    }
490
491    /// Creates a backup unconditionally before any mutating operation.
492    fn backup_before_mutation(&self, repo_path: &Path, job_id: &str) -> Result<PathBuf> {
493        self.backup_mgr.create_backup(repo_path, job_id)
494    }
495
496    /// Captures the current git state for a repository.
497    fn capture_state(repo_path: &Path, timeout_secs: u64) -> Result<GitState> {
498        let commit_sha =
499            Self::run_git_with_timeout(repo_path, &["rev-parse", "HEAD"], timeout_secs)
500                .map(|s| s.trim().to_string())
501                .ok();
502
503        let branch = Self::run_git_with_timeout(
504            repo_path,
505            &["rev-parse", "--abbrev-ref", "HEAD"],
506            timeout_secs,
507        )
508        .map(|s| s.trim().to_string())
509        .ok();
510
511        let remote_url =
512            Self::run_git_with_timeout(repo_path, &["remote", "get-url", "origin"], timeout_secs)
513                .ok()
514                .and_then(|s| {
515                    let trimmed = s.trim().to_string();
516                    let sanitized = Self::sanitize_url(&trimmed);
517                    if sanitized.is_empty() {
518                        None
519                    } else {
520                        Some(sanitized)
521                    }
522                });
523
524        let is_clean = Self::is_working_tree_clean(repo_path);
525
526        Ok(GitState {
527            commit_sha,
528            branch,
529            remote_url,
530            repo_path: repo_path.to_string_lossy().to_string(),
531            is_clean,
532        })
533    }
534
535    /// Executes git clone operation.
536    fn op_clone(&self, args: &GitExecArgs, ctx: &Context) -> Result<Output> {
537        let _ = self;
538        let timeout_secs = args.timeout_secs.unwrap_or(300);
539        let url = args
540            .url
541            .as_ref()
542            .ok_or_else(|| Error::ExecutionFailed("URL required for clone".into()))?;
543        let path = args
544            .path
545            .as_ref()
546            .ok_or_else(|| Error::ExecutionFailed("Path required for clone".into()))?;
547
548        Self::validate_url(url)?;
549
550        let path = Path::new(path);
551        if path.exists() {
552            return Err(Error::ExecutionFailed(format!(
553                "Path already exists: {}",
554                path.display()
555            )));
556        }
557
558        if let Some(free) = Self::disk_free_bytes(path.parent().unwrap_or_else(|| Path::new("/"))) {
559            if free < 100 * 1024 * 1024 {
560                return Err(Error::ExecutionFailed(
561                    "Insufficient disk space for clone (need at least 100MB)".into(),
562                ));
563            }
564        }
565
566        if ctx.dry_run {
567            return Ok(Output {
568                success: true,
569                data: serde_json::json!({
570                    "operation": "clone",
571                    "url": Self::sanitize_url(url),
572                    "path": path.display().to_string(),
573                    "dry_run": true
574                }),
575                message: Some(format!(
576                    "DRY RUN: would clone {} to {}",
577                    Self::sanitize_url(url),
578                    path.display()
579                )),
580            });
581        }
582
583        if let Some(parent) = path.parent() {
584            std::fs::create_dir_all(parent).map_err(|e| {
585                Error::ExecutionFailed(format!("mkdir {}: {}", parent.display(), e))
586            })?;
587        }
588
589        let mut cmd = Command::new("git");
590        cmd.arg("clone").arg(url).arg(path);
591
592        if let Some(branch) = &args.branch {
593            cmd.arg("-b").arg(branch);
594        }
595
596        let mut child = cmd
597            .stdin(std::process::Stdio::null())
598            .spawn()
599            .map_err(|e| Error::ExecutionFailed(format!("git clone spawn failed: {}", e)))?;
600
601        let timeout = Duration::from_secs(timeout_secs);
602        let start = Instant::now();
603        let status = loop {
604            match child.try_wait() {
605                Ok(Some(s)) => break s,
606                Ok(None) => {
607                    if start.elapsed() > timeout {
608                        let _ = child.kill();
609                        let _ = child.wait();
610                        return Err(Error::ExecutionFailed(format!(
611                            "git clone timed out after {}s",
612                            timeout_secs
613                        )));
614                    }
615                    std::thread::sleep(Duration::from_millis(100));
616                }
617                Err(e) => {
618                    let _ = child.kill();
619                    let _ = child.wait();
620                    return Err(Error::ExecutionFailed(format!(
621                        "git clone wait error: {}",
622                        e
623                    )));
624                }
625            }
626        };
627
628        if !status.success() {
629            return Err(Error::ExecutionFailed(
630                "git clone failed (see stderr)".into(),
631            ));
632        }
633
634        let state = Self::capture_state(path, timeout_secs)?;
635
636        Ok(Output {
637            success: true,
638            data: serde_json::json!({
639                "operation": "clone",
640                "url": Self::sanitize_url(url),
641                "path": path.display().to_string(),
642                "commit_sha": state.commit_sha,
643                "branch": state.branch,
644                "remote_url": state.remote_url
645            }),
646            message: Some(format!(
647                "Cloned {} to {}",
648                Self::sanitize_url(url),
649                path.display()
650            )),
651        })
652    }
653
654    /// Executes git pull operation.
655    fn op_pull(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
656        let timeout_secs = args.timeout_secs.unwrap_or(300);
657
658        if !repo_path.exists() {
659            return Err(Error::ExecutionFailed(format!(
660                "Repository not found: {}",
661                repo_path.display()
662            )));
663        }
664
665        let state_before = Self::capture_state(repo_path, timeout_secs)?;
666
667        if ctx.dry_run {
668            return Ok(Output {
669                success: true,
670                data: serde_json::json!({
671                    "operation": "pull",
672                    "path": repo_path.display().to_string(),
673                    "dry_run": true
674                }),
675                message: Some("DRY RUN: would pull".into()),
676            });
677        }
678
679        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
680
681        let output = Self::run_git_with_timeout(repo_path, &["pull", "--rebase"], timeout_secs)
682            .map_err(|e| Error::ExecutionFailed(format!("git pull failed: {}", e)))?;
683
684        let state_after = Self::capture_state(repo_path, timeout_secs)?;
685
686        Ok(Output {
687            success: true,
688            data: serde_json::json!({
689                "operation": "pull",
690                "path": repo_path.display().to_string(),
691                "commit_sha_before": state_before.commit_sha,
692                "commit_sha_after": state_after.commit_sha,
693                "branch": state_after.branch,
694                "backup_path": backup_path.map(|p| p.to_string_lossy().to_string()),
695                "git_output": Self::sanitize_output(&output)
696            }),
697            message: Some("Pulled successfully".into()),
698        })
699    }
700
701    /// Executes git commit operation.
702    fn op_commit(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
703        let timeout_secs = args.timeout_secs.unwrap_or(300);
704
705        if !repo_path.exists() {
706            return Err(Error::ExecutionFailed(format!(
707                "Repository not found: {}",
708                repo_path.display()
709            )));
710        }
711
712        let message = args
713            .message
714            .as_ref()
715            .ok_or_else(|| Error::ExecutionFailed("Commit message required".into()))?;
716        let message = Self::sanitize_commit_message(message)?;
717
718        let state_before = Self::capture_state(repo_path, timeout_secs)?;
719
720        if ctx.dry_run {
721            return Ok(Output {
722                success: true,
723                data: serde_json::json!({
724                    "operation": "commit",
725                    "path": repo_path.display().to_string(),
726                    "message": &message,
727                    "dry_run": true
728                }),
729                message: Some("DRY RUN: would commit".into()),
730            });
731        }
732
733        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
734
735        if let Some(files) = &args.files {
736            for file in files {
737                Self::validate_add_file(file, repo_path)?;
738                let output = Self::run_git_with_timeout(repo_path, &["add", file], timeout_secs)
739                    .map_err(|e| Error::ExecutionFailed(format!("git add failed: {}", e)))?;
740                let _ = output;
741            }
742        } else {
743            let untracked = Self::run_git_with_timeout(
744                repo_path,
745                &["ls-files", "--others", "--exclude-standard"],
746                timeout_secs,
747            )?;
748            for line in untracked.lines() {
749                let file = line.trim();
750                if file.is_empty() {
751                    continue;
752                }
753                if Self::is_secret_file(file) {
754                    eprintln!("[runtimo] Skipping secret file from git add: {}", file);
755                    continue;
756                }
757                Self::run_git_with_timeout(repo_path, &["add", file], timeout_secs).map_err(
758                    |e| Error::ExecutionFailed(format!("git add {} failed: {}", file, e)),
759                )?;
760            }
761        }
762
763        let output =
764            Self::run_git_with_timeout(repo_path, &["commit", "-m", &message], timeout_secs)
765                .map_err(|e| Error::ExecutionFailed(format!("git commit failed: {}", e)))?;
766        let _ = output;
767
768        let state_after = Self::capture_state(repo_path, timeout_secs)?;
769
770        Ok(Output {
771            success: true,
772            data: serde_json::json!({
773                "operation": "commit",
774                "path": repo_path.display().to_string(),
775                "message": message,
776                "commit_sha_before": state_before.commit_sha,
777                "commit_sha_after": state_after.commit_sha,
778                "branch": state_after.branch,
779                "backup_path": backup_path.map(|p| p.to_string_lossy().to_string())
780            }),
781            message: Some(format!("Committed: {}", message)),
782        })
783    }
784
785    /// Executes git revert operation.
786    fn op_revert(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
787        let timeout_secs = args.timeout_secs.unwrap_or(300);
788
789        if !repo_path.exists() {
790            return Err(Error::ExecutionFailed(format!(
791                "Repository not found: {}",
792                repo_path.display()
793            )));
794        }
795
796        let commit_sha = args
797            .commit_sha
798            .as_ref()
799            .ok_or_else(|| Error::ExecutionFailed("Commit SHA required for revert".into()))?;
800
801        Self::validate_commit_sha(commit_sha)?;
802
803        let state_before = Self::capture_state(repo_path, timeout_secs)?;
804
805        if ctx.dry_run {
806            return Ok(Output {
807                success: true,
808                data: serde_json::json!({
809                    "operation": "revert",
810                    "path": repo_path.display().to_string(),
811                    "commit_sha": commit_sha,
812                    "dry_run": true
813                }),
814                message: Some(format!("DRY RUN: would revert {}", commit_sha)),
815            });
816        }
817
818        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
819
820        let output = Self::run_git_with_timeout(
821            repo_path,
822            &["revert", "--no-edit", commit_sha],
823            timeout_secs,
824        )
825        .map_err(|e| Error::ExecutionFailed(format!("git revert failed: {}", e)))?;
826        let _ = output;
827
828        let state_after = Self::capture_state(repo_path, timeout_secs)?;
829
830        Ok(Output {
831            success: true,
832            data: serde_json::json!({
833                "operation": "revert",
834                "path": repo_path.display().to_string(),
835                "commit_sha": commit_sha,
836                "commit_sha_before": state_before.commit_sha,
837                "commit_sha_after": state_after.commit_sha,
838                "branch": state_after.branch,
839                "backup_path": backup_path.map(|p| p.to_string_lossy().to_string())
840            }),
841            message: Some(format!("Reverted {}", commit_sha)),
842        })
843    }
844
845    /// Executes git clean operation.
846    fn op_clean(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
847        let timeout_secs = args.timeout_secs.unwrap_or(300);
848
849        if !repo_path.exists() {
850            return Err(Error::ExecutionFailed(format!(
851                "Repository not found: {}",
852                repo_path.display()
853            )));
854        }
855
856        let state_before = Self::capture_state(repo_path, timeout_secs)?;
857
858        if ctx.dry_run {
859            let untracked_count = Self::count_untracked_files(repo_path, timeout_secs).unwrap_or(0);
860            let preview =
861                Self::run_git_with_timeout(repo_path, &["clean", "-fd", "--dry-run"], timeout_secs)
862                    .map(|s| Self::sanitize_output(&s))
863                    .unwrap_or_default();
864            return Ok(Output {
865                success: true,
866                data: serde_json::json!({
867                    "operation": "clean",
868                    "path": repo_path.display().to_string(),
869                    "dry_run": true,
870                    "untracked_count": untracked_count,
871                    "preview": preview
872                }),
873                message: Some(format!(
874                    "DRY RUN: would clean {} untracked files",
875                    untracked_count
876                )),
877            });
878        }
879
880        let untracked_count = Self::count_untracked_files(repo_path, timeout_secs)?;
881        if untracked_count > MAX_CLEAN_FILES {
882            return Err(Error::ExecutionFailed(format!(
883                "Too many untracked files to clean safely: {} (limit: {})",
884                untracked_count, MAX_CLEAN_FILES
885            )));
886        }
887
888        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
889
890        let output = Self::run_git_with_timeout(repo_path, &["clean", "-fd"], timeout_secs)
891            .map_err(|e| Error::ExecutionFailed(format!("git clean failed: {}", e)))?;
892        let _ = output;
893
894        let state_after = Self::capture_state(repo_path, timeout_secs)?;
895
896        Ok(Output {
897            success: true,
898            data: serde_json::json!({
899                "operation": "clean",
900                "path": repo_path.display().to_string(),
901                "was_clean": state_before.is_clean,
902                "is_clean": state_after.is_clean,
903                "untracked_files_removed": untracked_count,
904                "backup_path": backup_path.map(|p| p.to_string_lossy().to_string())
905            }),
906            message: Some(format!("Cleaned {} untracked files", untracked_count)),
907        })
908    }
909
910    /// Executes git status operation.
911    #[allow(clippy::unused_self, clippy::used_underscore_binding)]
912    fn op_status(&self, _args: &GitExecArgs, _ctx: &Context, repo_path: &Path) -> Result<Output> {
913        let timeout_secs = _args.timeout_secs.unwrap_or(300);
914
915        if !repo_path.exists() {
916            return Err(Error::ExecutionFailed(format!(
917                "Repository not found: {}",
918                repo_path.display()
919            )));
920        }
921
922        let state = Self::capture_state(repo_path, timeout_secs)?;
923
924        let status_output =
925            Self::run_git_with_timeout(repo_path, &["status", "--porcelain"], timeout_secs)
926                .unwrap_or_default();
927
928        let branch = state.branch.clone().unwrap_or_default();
929        let remote_url = state.remote_url.clone().unwrap_or_default();
930
931        Ok(Output {
932            success: true,
933            data: serde_json::json!({
934                "operation": "status",
935                "path": repo_path.display().to_string(),
936                "branch": branch,
937                "remote_url": remote_url,
938                "commit_sha": state.commit_sha,
939                "is_clean": state.is_clean,
940                "status": status_output
941            }),
942            message: Some(format!(
943                "On branch {}: {}",
944                branch,
945                if state.is_clean { "clean" } else { "dirty" }
946            )),
947        })
948    }
949}
950
951impl Capability for GitExec {
952    fn name(&self) -> &'static str {
953        "GitExec"
954    }
955
956    fn description(&self) -> &'static str {
957        "git ops: clone|pull|commit|revert|clean|status. state tracking, timeout, undo."
958    }
959
960    fn schema(&self) -> Value {
961        serde_json::json!({
962            "type": "object",
963            "properties": {
964                "operation": { "type": "string", "enum": ["clone", "pull", "commit", "revert", "clean", "status"] },
965                "url": { "type": "string" },
966                "path": { "type": "string" },
967                "branch": { "type": "string" },
968                "message": { "type": "string" },
969                "files": { "type": "array", "items": { "type": "string" } },
970                "commit_sha": { "type": "string" },
971                "timeout_secs": { "type": "integer", "minimum": 1, "maximum": 600 }
972            },
973            "required": ["operation"]
974        })
975    }
976
977    fn validate(&self, args: &Value) -> Result<()> {
978        let args: GitExecArgs = serde_json::from_value(args.clone())
979            .map_err(|e| Error::SchemaValidationFailed(e.to_string()))?;
980
981        let valid_ops = ["clone", "pull", "commit", "revert", "clean", "status"];
982        if !valid_ops.contains(&args.operation.as_str()) {
983            return Err(Error::SchemaValidationFailed(format!(
984                "Invalid operation: {}. Must be one of: {}",
985                args.operation,
986                valid_ops.join(", ")
987            )));
988        }
989
990        if args.operation == "clone" {
991            if let Some(url) = &args.url {
992                Self::validate_url(url)?;
993            } else {
994                return Err(Error::SchemaValidationFailed(
995                    "URL required for clone".into(),
996                ));
997            }
998            if let Some(path) = &args.path {
999                let ctx = PathContext {
1000                    require_exists: false,
1001                    require_file: false,
1002                    ..Default::default()
1003                };
1004                validate_path(path, &ctx).map_err(Error::SchemaValidationFailed)?;
1005            }
1006        }
1007
1008        if args.operation != "clone" {
1009            if let Some(path) = &args.path {
1010                let ctx = PathContext {
1011                    require_exists: true,
1012                    require_file: false,
1013                    ..Default::default()
1014                };
1015                validate_path(path, &ctx).map_err(Error::SchemaValidationFailed)?;
1016            }
1017        }
1018
1019        if let Some(branch) = &args.branch {
1020            Self::validate_branch_name(branch)?;
1021        }
1022
1023        if let Some(sha) = &args.commit_sha {
1024            Self::validate_commit_sha(sha)?;
1025        }
1026
1027        Ok(())
1028    }
1029
1030    fn execute(&self, args: &Value, ctx: &Context) -> Result<Output> {
1031        let args: GitExecArgs = serde_json::from_value(args.clone())
1032            .map_err(|e| Error::ExecutionFailed(e.to_string()))?;
1033
1034        let telemetry_before = Telemetry::capture();
1035        let process_before = ProcessSnapshot::capture();
1036
1037        let result = match args.operation.as_str() {
1038            "clone" => self.op_clone(&args, ctx),
1039            "pull" => {
1040                let path = args
1041                    .path
1042                    .as_ref()
1043                    .ok_or_else(|| Error::ExecutionFailed("Path required for pull".into()))?;
1044                self.op_pull(&args, ctx, Path::new(path))
1045            }
1046            "commit" => {
1047                let path = args
1048                    .path
1049                    .as_ref()
1050                    .ok_or_else(|| Error::ExecutionFailed("Path required for commit".into()))?;
1051                self.op_commit(&args, ctx, Path::new(path))
1052            }
1053            "revert" => {
1054                let path = args
1055                    .path
1056                    .as_ref()
1057                    .ok_or_else(|| Error::ExecutionFailed("Path required for revert".into()))?;
1058                self.op_revert(&args, ctx, Path::new(path))
1059            }
1060            "clean" => {
1061                let path = args
1062                    .path
1063                    .as_ref()
1064                    .ok_or_else(|| Error::ExecutionFailed("Path required for clean".into()))?;
1065                self.op_clean(&args, ctx, Path::new(path))
1066            }
1067            "status" => {
1068                let path = args
1069                    .path
1070                    .as_ref()
1071                    .ok_or_else(|| Error::ExecutionFailed("Path required for status".into()))?;
1072                self.op_status(&args, ctx, Path::new(path))
1073            }
1074            _ => Err(Error::ExecutionFailed(format!(
1075                "Unknown operation: {}",
1076                args.operation
1077            ))),
1078        };
1079
1080        let telemetry_after = Telemetry::capture();
1081        let process_after = ProcessSnapshot::capture();
1082
1083        let mut output = result?;
1084        if let Some(obj) = output.data.as_object_mut() {
1085            obj.insert(
1086                "telemetry_before".to_string(),
1087                serde_json::to_value(&telemetry_before).unwrap_or(Value::Null),
1088            );
1089            obj.insert(
1090                "telemetry_after".to_string(),
1091                serde_json::to_value(&telemetry_after).unwrap_or(Value::Null),
1092            );
1093            obj.insert(
1094                "process_before".to_string(),
1095                serde_json::to_value(&process_before.summary).unwrap_or(Value::Null),
1096            );
1097            obj.insert(
1098                "process_after".to_string(),
1099                serde_json::to_value(&process_after.summary).unwrap_or(Value::Null),
1100            );
1101        }
1102
1103        Ok(output)
1104    }
1105}
1106
1107#[cfg(test)]
1108mod tests {
1109    use super::*;
1110    use crate::capability::Capability;
1111
1112    fn test_backup_dir() -> PathBuf {
1113        std::env::temp_dir().join("runtimo_git_test")
1114    }
1115
1116    #[test]
1117    fn validates_git_url_https_only() {
1118        assert!(GitExec::validate_url("https://github.com/user/repo.git").is_ok());
1119        assert!(GitExec::validate_url("git@github.com:user/repo.git").is_ok());
1120
1121        assert!(GitExec::validate_url("http://example.com/repo.git").is_err());
1122        assert!(GitExec::validate_url("not-a-url").is_err());
1123        assert!(GitExec::validate_url("").is_err());
1124
1125        std::fs::remove_dir_all(test_backup_dir()).ok();
1126    }
1127
1128    #[test]
1129    fn blocks_ssrf_urls() {
1130        assert!(GitExec::validate_url("https://169.254.169.254/latest/meta-data/").is_err());
1131        assert!(GitExec::validate_url("https://127.0.0.1/repo.git").is_err());
1132        assert!(GitExec::validate_url("https://localhost/repo.git").is_err());
1133        assert!(GitExec::validate_url("https://192.168.1.1/repo.git").is_err());
1134        assert!(GitExec::validate_url("https://metadata.google.internal/computeMetadata").is_err());
1135
1136        std::fs::remove_dir_all(test_backup_dir()).ok();
1137    }
1138
1139    #[test]
1140    fn sanitizes_credentials_from_url() {
1141        assert_eq!(
1142            GitExec::sanitize_url("https://user:pass@github.com/repo.git"),
1143            "https://***@github.com/repo.git"
1144        );
1145        assert_eq!(
1146            GitExec::sanitize_url("https://github.com/repo.git"),
1147            "https://github.com/repo.git"
1148        );
1149        assert_eq!(
1150            GitExec::sanitize_url("git@github.com:user/repo.git"),
1151            "git@github.com:user/repo.git"
1152        );
1153    }
1154
1155    #[test]
1156    fn detects_secret_files() {
1157        assert!(GitExec::is_secret_file(".env"));
1158        assert!(GitExec::is_secret_file("config/.env"));
1159        assert!(GitExec::is_secret_file("credentials.json"));
1160        assert!(GitExec::is_secret_file(".ssh/id_rsa"));
1161        assert!(GitExec::is_secret_file("src/.env.local"));
1162
1163        assert!(!GitExec::is_secret_file("main.rs"));
1164        assert!(!GitExec::is_secret_file("Cargo.toml"));
1165        assert!(!GitExec::is_secret_file("README.md"));
1166    }
1167
1168    #[test]
1169    fn validates_branch_name() {
1170        assert!(GitExec::validate_branch_name("main").is_ok());
1171        assert!(GitExec::validate_branch_name("feature/my-branch").is_ok());
1172        assert!(GitExec::validate_branch_name("v1.0").is_ok());
1173
1174        assert!(GitExec::validate_branch_name("").is_err());
1175        assert!(GitExec::validate_branch_name("bad..name").is_err());
1176        assert!(GitExec::validate_branch_name("@{..}").is_err());
1177        // Option injection
1178        assert!(GitExec::validate_branch_name("--force").is_err());
1179        assert!(GitExec::validate_branch_name("--help").is_err());
1180        // Ref injection
1181        assert!(GitExec::validate_branch_name("refs/heads/main").is_err());
1182        // Control chars and whitespace
1183        assert!(GitExec::validate_branch_name("bad\nname").is_err());
1184        assert!(GitExec::validate_branch_name("bad\tname").is_err());
1185        // Metacharacters
1186        assert!(GitExec::validate_branch_name("bad:name").is_err());
1187        assert!(GitExec::validate_branch_name("bad~name").is_err());
1188        assert!(GitExec::validate_branch_name("bad^name").is_err());
1189        assert!(GitExec::validate_branch_name("bad*name").is_err());
1190        assert!(GitExec::validate_branch_name("bad[name").is_err());
1191        assert!(GitExec::validate_branch_name("bad\\name").is_err());
1192        assert!(GitExec::validate_branch_name("bad?name").is_err());
1193        assert!(GitExec::validate_branch_name("name.lock").is_err());
1194    }
1195
1196    #[test]
1197    fn validates_commit_sha() {
1198        assert!(GitExec::validate_commit_sha("abc1234").is_ok());
1199        assert!(GitExec::validate_commit_sha("a1b2c3d4").is_ok());
1200        assert!(GitExec::validate_commit_sha("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0").is_ok());
1201
1202        assert!(GitExec::validate_commit_sha("abc123").is_err());
1203        assert!(GitExec::validate_commit_sha("").is_err());
1204        assert!(GitExec::validate_commit_sha("xyz123").is_err());
1205    }
1206
1207    #[allow(clippy::expect_used)]
1208    #[test]
1209    fn rejects_path_traversal() {
1210        let cap = GitExec::new(test_backup_dir()).expect("Failed to create GitExec");
1211
1212        let err = cap
1213            .validate(&serde_json::json!({
1214                "operation": "clone",
1215                "url": "https://github.com/user/repo.git",
1216                "path": "../../../etc/passwd"
1217            }))
1218            .unwrap_err();
1219
1220        assert!(err.to_string().contains("traversal"));
1221        std::fs::remove_dir_all(test_backup_dir()).ok();
1222    }
1223
1224    #[allow(clippy::expect_used)]
1225    #[test]
1226    fn rejects_invalid_operation() {
1227        let cap = GitExec::new(test_backup_dir()).expect("Failed to create GitExec");
1228
1229        let err = cap
1230            .validate(&serde_json::json!({
1231                "operation": "invalid_op"
1232            }))
1233            .unwrap_err();
1234
1235        assert!(err.to_string().contains("Invalid operation"));
1236        std::fs::remove_dir_all(test_backup_dir()).ok();
1237    }
1238
1239    #[test]
1240    #[allow(clippy::expect_used)]
1241    fn status_on_nonexistent_repo() {
1242        let cap = GitExec::new(test_backup_dir()).expect("Failed to create GitExec");
1243
1244        let result = cap.execute(
1245            &serde_json::json!({
1246                "operation": "status",
1247                "path": "/tmp/nonexistent_repo"
1248            }),
1249            &Context {
1250                dry_run: false,
1251                job_id: "test".into(),
1252                working_dir: std::env::temp_dir(),
1253            },
1254        );
1255
1256        assert!(result.is_err());
1257        std::fs::remove_dir_all(test_backup_dir()).ok();
1258    }
1259
1260    #[test]
1261    fn sanitizes_commit_message() {
1262        assert!(GitExec::sanitize_commit_message("valid commit").is_ok());
1263        assert!(GitExec::sanitize_commit_message("  trimmed  ").is_ok());
1264        assert!(GitExec::sanitize_commit_message("").is_err());
1265        assert!(GitExec::sanitize_commit_message("   ").is_err());
1266        let result = GitExec::sanitize_commit_message("hello\x00world").unwrap();
1267        assert!(!result.contains('\x00'));
1268    }
1269
1270    #[test]
1271    fn timeout_enforced_on_git_command() {
1272        // Start a TCP listener on localhost that accepts but never responds.
1273        // This creates a guaranteed-timeout scenario without depending on
1274        // external network behavior. The listener is dropped after the test.
1275        let listener =
1276            std::net::TcpListener::bind("127.0.0.1:0").expect("failed to bind TCP listener");
1277        let port = listener.local_addr().unwrap().port();
1278
1279        let tmp = std::env::temp_dir().join("runtimo_git_timeout_test");
1280        std::fs::create_dir_all(&tmp).ok();
1281        Command::new("git")
1282            .arg("init")
1283            .current_dir(&tmp)
1284            .output()
1285            .ok();
1286
1287        // Spawn a thread that accepts one connection and hangs.
1288        // The git clone will connect and wait for a response that never comes.
1289        let _hang_handle = std::thread::spawn(move || {
1290            if let Ok((_stream, _addr)) = listener.accept() {
1291                // Hold the connection open indefinitely — never send a response
1292                std::thread::sleep(std::time::Duration::from_secs(300));
1293            }
1294        });
1295
1296        // git clone to localhost times out after 2 seconds.
1297        let result = GitExec::run_git_with_timeout(
1298            &tmp,
1299            &["clone", &format!("http://127.0.0.1:{}/repo.git", port)],
1300            2,
1301        );
1302
1303        // The operation should fail with a timeout (or a connection error
1304        // if git detects the protocol mismatch before the timeout fires).
1305        assert!(
1306            result.is_err(),
1307            "Expected timeout or connection error, got: {:?}",
1308            result
1309        );
1310
1311        std::fs::remove_dir_all(&tmp).ok();
1312    }
1313}