Skip to main content

runtimo_core/capabilities/
git_exec.rs

1//! GitExec capability — git operations with state tracking and undo support.
2//!
3//! Provides git operations (clone, pull, commit, revert, clean, status) with:
4//! - State tracking (commit sha, branch, remote URL)
5//! - Backup-before-mutate for undo support
6//! - WAL logging for audit trail
7//! - Path traversal protection
8//! - Timeout enforcement on all git subprocesses
9//! - URL validation (HTTPS/SSH only, SSRF blocking)
10//! - Credential sanitization from output
11//! - Secret file detection for git add
12//! - Telemetry and process tracking before/after execution
13//!
14//! # Example
15//!
16//! ```rust,ignore
17//! use runtimo_core::capabilities::GitExec;
18//! use runtimo_core::capability::{Capability, Context};
19//! use serde_json::json;
20//! use std::path::PathBuf;
21//!
22//! let cap = GitExec::new(PathBuf::from("/tmp/backups"));
23//! let result = cap.execute(
24//!     &json!({"operation": "clone", "url": "https://github.com/user/repo.git", "path": "/tmp/repo"}),
25//!     &Context { dry_run: false, job_id: "job1".into(), working_dir: PathBuf::from("/tmp") }
26//! ).unwrap();
27//!
28//! assert!(result.success);
29//! ```
30
31use crate::backup::BackupManager;
32use crate::capability::{Capability, Context, Output};
33use crate::processes::ProcessSnapshot;
34use crate::telemetry::Telemetry;
35use crate::validation::path::{validate_path, PathContext};
36use crate::{Error, Result};
37use serde::{Deserialize, Serialize};
38use serde_json::Value;
39use std::path::{Path, PathBuf};
40use std::process::Command;
41use std::time::{Duration, Instant};
42
43/// Arguments for the [`GitExec`] capability.
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct GitExecArgs {
46    /// Git operation to perform (clone, pull, commit, revert, clean, status).
47    pub operation: String,
48    /// Repository URL (for clone/pull).
49    pub url: Option<String>,
50    /// Local path to repository (for clone/commit/revert/clean/status).
51    pub path: Option<String>,
52    /// Branch name (for checkout/clone).
53    pub branch: Option<String>,
54    /// Commit message (for commit).
55    pub message: Option<String>,
56    /// Files to commit (for commit).
57    pub files: Option<Vec<String>>,
58    /// Commit SHA to revert to (for revert).
59    pub commit_sha: Option<String>,
60    /// Timeout in seconds (default: 300).
61    pub timeout_secs: Option<u64>,
62}
63
64/// Git state before/after operation.
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct GitState {
67    /// Current commit SHA (HEAD).
68    pub commit_sha: Option<String>,
69    /// Current branch name.
70    pub branch: Option<String>,
71    /// Remote URL (origin).
72    pub remote_url: Option<String>,
73    /// Repository path.
74    pub repo_path: String,
75    /// Working directory status (clean/dirty).
76    pub is_clean: bool,
77}
78
79/// Known secret file patterns to exclude from `git add -A`.
80const SECRET_PATTERNS: &[&str] = &[
81    ".env", ".env.local", ".env.production", ".env.staging",
82    "credentials.json", "credentials.yml", "credentials.yaml",
83    "secrets.json", "secrets.yml", "secrets.yaml",
84    ".ssh/id_rsa", ".ssh/id_ed25519", ".ssh/id_dsa",
85    "id_rsa", "id_ed25519", "id_dsa",
86    ".npmrc", ".pypirc", ".docker/config.json",
87    "token", "api_key", "api_secret",
88    ".aws/credentials", ".azure/credentials",
89    "keystore.jks", "keystore.p12",
90];
91
92/// Maximum number of untracked files allowed for `git clean -fd`.
93const MAX_CLEAN_FILES: usize = 1000;
94
95/// Capability that executes git operations with full state tracking.
96///
97/// Supports clone, pull, commit, revert, clean, and status operations.
98/// Creates backups before mutable operations for undo support.
99pub struct GitExec {
100    backup_mgr: BackupManager,
101}
102
103impl GitExec {
104    /// Creates a new GitExec capability with the given backup directory.
105    ///
106    /// # Errors
107    ///
108    /// Returns [`crate::Error::BackupError`] if the backup
109    /// directory cannot be created.
110    pub fn new(backup_dir: PathBuf) -> Result<Self> {
111        Ok(Self {
112            backup_mgr: BackupManager::new(backup_dir)?,
113        })
114    }
115
116    /// Runs a git command with timeout enforcement and returns the output.
117    fn run_git_with_timeout(repo_path: &Path, args: &[&str], timeout_secs: u64) -> Result<String> {
118        let mut child = Command::new("git")
119            .current_dir(repo_path)
120            .args(args)
121            .stdin(std::process::Stdio::null())
122            .spawn()
123            .map_err(|e| Error::ExecutionFailed(format!("git command failed: {}", e)))?;
124
125        let timeout = Duration::from_secs(timeout_secs);
126        let start = Instant::now();
127
128        loop {
129            match child.try_wait() {
130                Ok(Some(status)) => {
131                    let output = child
132                        .wait_with_output()
133                        .map_err(|e| Error::ExecutionFailed(format!("git wait failed: {}", e)))?;
134                    if !status.success() {
135                        let stderr = String::from_utf8_lossy(&output.stderr);
136                        return Err(Error::ExecutionFailed(format!(
137                            "git {}: {}",
138                            args.join(" "),
139                            stderr.trim()
140                        )));
141                    }
142                    return Ok(String::from_utf8_lossy(&output.stdout).to_string());
143                }
144                Ok(None) => {
145                    if start.elapsed() > timeout {
146                        let _ = child.kill();
147                        let _ = child.wait();
148                        return Err(Error::ExecutionFailed(format!(
149                            "git {} timed out after {}s",
150                            args.join(" "),
151                            timeout_secs
152                        )));
153                    }
154                    std::thread::sleep(Duration::from_millis(50));
155                }
156                Err(e) => {
157                    let _ = child.kill();
158                    let _ = child.wait();
159                    return Err(Error::ExecutionFailed(format!("git wait error: {}", e)));
160                }
161            }
162        }
163    }
164
165    /// Runs a git command (backwards-compatible, uses default timeout).
166    #[allow(dead_code)]
167    fn run_git(repo_path: &Path, args: &[&str]) -> Result<String> {
168        Self::run_git_with_timeout(repo_path, args, 300)
169    }
170
171    /// Checks if the working tree is clean (no uncommitted changes).
172    fn is_working_tree_clean(repo_path: &Path) -> bool {
173        let output = Command::new("git")
174            .current_dir(repo_path)
175            .args(["status", "--porcelain"])
176            .output();
177
178        match output {
179            Ok(out) => out.stdout.is_empty() && out.stderr.is_empty(),
180            Err(_) => false,
181        }
182    }
183
184    /// Validates a git URL format. Blocks http:// (MITM risk) and SSRF patterns.
185    fn validate_url(url: &str) -> Result<()> {
186        let is_https = url.starts_with("https://");
187        let is_ssh = url.starts_with("git@");
188        if !is_https && !is_ssh {
189            return Err(Error::SchemaValidationFailed(format!(
190                "Insecure or unsupported URL scheme: {} (must use https:// or git@ SSH)",
191                url
192            )));
193        }
194
195        if is_https {
196            if let Some(host_part) = url.strip_prefix("https://").and_then(|s| s.split('/').next())
197            {
198                let host = host_part.split(':').next().unwrap_or(host_part);
199                if Self::is_ssrf_host(host) {
200                    return Err(Error::SchemaValidationFailed(format!(
201                        "SSRF blocked: URL targets internal/metadata address: {}",
202                        url
203                    )));
204                }
205            }
206        }
207
208        Ok(())
209    }
210
211    /// Checks if a host is a known SSRF target (cloud metadata, localhost, link-local).
212    fn is_ssrf_host(host: &str) -> bool {
213        let lower = host.to_lowercase();
214        let ssrf_indicators = [
215            "169.254.169.254",
216            "169.254.",
217            "127.0.0.1",
218            "localhost",
219            "0.0.0.0",
220            "::1",
221            "10.0.0.",
222            "10.0.1.",
223            "10.0.2.",
224            "10.0.3.",
225            "172.16.",
226            "172.17.",
227            "172.18.",
228            "172.19.",
229            "172.20.",
230            "172.21.",
231            "172.22.",
232            "172.23.",
233            "172.24.",
234            "172.25.",
235            "172.26.",
236            "172.27.",
237            "172.28.",
238            "172.29.",
239            "172.30.",
240            "172.31.",
241            "192.168.",
242            "metadata.google",
243            "metadata.azure",
244            "instance-data",
245            "100.100.100.200",
246            "[::1]",
247            "[fe80:",
248        ];
249        ssrf_indicators.iter().any(|indicator| lower.contains(indicator))
250    }
251
252    /// Validates a branch name.
253    fn validate_branch_name(branch: &str) -> Result<()> {
254        if branch.is_empty() {
255            return Err(Error::SchemaValidationFailed("Branch name is empty".into()));
256        }
257        if branch.contains("..") || branch.contains("@{") {
258            return Err(Error::SchemaValidationFailed(format!(
259                "Invalid branch name: {}",
260                branch
261            )));
262        }
263        Ok(())
264    }
265
266    /// Validates a commit SHA.
267    fn validate_commit_sha(sha: &str) -> Result<()> {
268        if sha.len() < 7 || sha.len() > 40 {
269            return Err(Error::SchemaValidationFailed(format!(
270                "Invalid commit SHA length: {}",
271                sha
272            )));
273        }
274        if !sha.chars().all(|c| c.is_ascii_hexdigit()) {
275            return Err(Error::SchemaValidationFailed(format!(
276                "Invalid commit SHA: {}",
277                sha
278            )));
279        }
280        Ok(())
281    }
282
283    /// Sanitizes credentials from a URL string (redacts user:pass@).
284    /// Preserves SSH-style URLs (git@host:path) unchanged.
285    fn sanitize_url(url: &str) -> String {
286        if url.starts_with("git@") {
287            return url.to_string();
288        }
289        if let Some(at_pos) = url.find('@') {
290            if let Some(scheme_end) = url.find("://") {
291                let scheme = &url[..scheme_end + 3];
292                let after_at = &url[at_pos + 1..];
293                return format!("{}***@{}", scheme, after_at);
294            }
295            return format!("***@{}", &url[at_pos + 1..]);
296        }
297        url.to_string()
298    }
299
300    /// Sanitizes git output to remove credential leakage.
301    fn sanitize_output(output: &str) -> String {
302        let re_pattern = |line: &str| -> String {
303            let mut result = String::new();
304            let mut chars = line.chars().peekable();
305            while let Some(c) = chars.next() {
306                if c == ':' && chars.peek() == Some(&'/') && chars.clone().nth(1) == Some('/') {
307                    result.push_str("://");
308                    chars.next(); chars.next();
309                    let mut user_pass = String::new();
310                    let mut found_at = false;
311                    for nc in chars.by_ref() {
312                        if nc == '@' {
313                            found_at = true;
314                            break;
315                        }
316                        user_pass.push(nc);
317                    }
318                    if found_at && !user_pass.is_empty() {
319                        result.push_str("***@");
320                    } else {
321                        result.push_str(&user_pass);
322                        if found_at {
323                            result.push('@');
324                        }
325                    }
326                } else {
327                    result.push(c);
328                }
329            }
330            result
331        };
332
333        output.lines().map(re_pattern).collect::<Vec<_>>().join("\n")
334    }
335
336    /// Checks if a file path looks like a secret file that should not be committed.
337    fn is_secret_file(path: &str) -> bool {
338        let lower = path.to_lowercase();
339        SECRET_PATTERNS.iter().any(|pattern| {
340            lower == *pattern
341                || lower.ends_with(&format!("/{}", pattern))
342                || lower.contains(&format!("/{}/", pattern))
343        })
344    }
345
346    /// Validates a file path for git add (no traversal, no secrets).
347    fn validate_add_file(file: &str, repo_path: &Path) -> Result<()> {
348        if file.contains("..") {
349            return Err(Error::SchemaValidationFailed(format!(
350                "Path traversal in file path: {}",
351                file
352            )));
353        }
354        if Self::is_secret_file(file) {
355            return Err(Error::SchemaValidationFailed(format!(
356                "Secret file detected, refusing to add: {}",
357                file
358            )));
359        }
360        let full_path = repo_path.join(file);
361        if full_path.exists() {
362            let canonical = full_path.canonicalize().map_err(|e| {
363                Error::SchemaValidationFailed(format!("Cannot resolve file {}: {}", file, e))
364            })?;
365            let canonical_repo = repo_path.canonicalize().map_err(|e| {
366                Error::SchemaValidationFailed(format!("Cannot resolve repo: {}", e))
367            })?;
368            if !canonical.starts_with(&canonical_repo) {
369                return Err(Error::SchemaValidationFailed(format!(
370                    "File {} escapes repository boundary",
371                    file
372                )));
373            }
374        }
375        Ok(())
376    }
377
378    /// Checks available disk space (returns free bytes, or None if unknown).
379    fn disk_free_bytes(path: &Path) -> Option<u64> {
380        let output = Command::new("df")
381            .arg("--output=avail")
382            .arg("-B1")
383            .arg(path)
384            .output()
385            .ok()?;
386        if output.status.success() {
387            let stdout = String::from_utf8_lossy(&output.stdout);
388            stdout.lines().nth(1)?.trim().parse().ok()
389        } else {
390            None
391        }
392    }
393
394    /// Counts untracked files that would be removed by git clean -fd.
395    fn count_untracked_files(repo_path: &Path, timeout_secs: u64) -> Result<usize> {
396        let output = Self::run_git_with_timeout(repo_path, &["ls-files", "--others", "--exclude-standard"], timeout_secs)?;
397        Ok(output.lines().filter(|l| !l.is_empty()).count())
398    }
399
400    /// Sanitizes a commit message (strips control chars, ensures non-empty).
401    fn sanitize_commit_message(msg: &str) -> Result<String> {
402        let sanitized: String = msg.chars().filter(|c| !c.is_control() || *c == '\n' || *c == '\t').collect();
403        let trimmed = sanitized.trim();
404        if trimmed.is_empty() {
405            return Err(Error::SchemaValidationFailed("Commit message is empty after sanitization".into()));
406        }
407        Ok(trimmed.to_string())
408    }
409
410    /// Creates a backup unconditionally before any mutating operation.
411    fn backup_before_mutation(&self, repo_path: &Path, job_id: &str) -> Result<PathBuf> {
412        self.backup_mgr.create_backup(repo_path, job_id)
413    }
414
415    /// Captures the current git state for a repository.
416    fn capture_state(repo_path: &Path, timeout_secs: u64) -> Result<GitState> {
417        let commit_sha = Self::run_git_with_timeout(repo_path, &["rev-parse", "HEAD"], timeout_secs)
418            .map(|s| s.trim().to_string())
419            .ok();
420
421        let branch = Self::run_git_with_timeout(repo_path, &["rev-parse", "--abbrev-ref", "HEAD"], timeout_secs)
422            .map(|s| s.trim().to_string())
423            .ok();
424
425        let remote_url = Self::run_git_with_timeout(repo_path, &["remote", "get-url", "origin"], timeout_secs)
426            .ok()
427            .and_then(|s| {
428                let trimmed = s.trim().to_string();
429                let sanitized = Self::sanitize_url(&trimmed);
430                if sanitized.is_empty() { None } else { Some(sanitized) }
431            });
432
433        let is_clean = Self::is_working_tree_clean(repo_path);
434
435        Ok(GitState {
436            commit_sha,
437            branch,
438            remote_url,
439            repo_path: repo_path.to_string_lossy().to_string(),
440            is_clean,
441        })
442    }
443
444    /// Executes git clone operation.
445    fn op_clone(&self, args: &GitExecArgs, ctx: &Context) -> Result<Output> {
446        let timeout_secs = args.timeout_secs.unwrap_or(300);
447        let url = args
448            .url
449            .as_ref()
450            .ok_or_else(|| Error::ExecutionFailed("URL required for clone".into()))?;
451        let path = args
452            .path
453            .as_ref()
454            .ok_or_else(|| Error::ExecutionFailed("Path required for clone".into()))?;
455
456        Self::validate_url(url)?;
457
458        let path = Path::new(path);
459        if path.exists() {
460            return Err(Error::ExecutionFailed(format!(
461                "Path already exists: {}",
462                path.display()
463            )));
464        }
465
466        if let Some(free) = Self::disk_free_bytes(path.parent().unwrap_or(Path::new("/"))) {
467            if free < 100 * 1024 * 1024 {
468                return Err(Error::ExecutionFailed(
469                    "Insufficient disk space for clone (need at least 100MB)".into(),
470                ));
471            }
472        }
473
474        if ctx.dry_run {
475            return Ok(Output {
476                success: true,
477                data: serde_json::json!({
478                    "operation": "clone",
479                    "url": Self::sanitize_url(url),
480                    "path": path.display().to_string(),
481                    "dry_run": true
482                }),
483                message: Some(format!(
484                    "DRY RUN: would clone {} to {}",
485                    Self::sanitize_url(url),
486                    path.display()
487                )),
488            });
489        }
490
491        if let Some(parent) = path.parent() {
492            std::fs::create_dir_all(parent).map_err(|e| {
493                Error::ExecutionFailed(format!("mkdir {}: {}", parent.display(), e))
494            })?;
495        }
496
497        let mut cmd = Command::new("git");
498        cmd.arg("clone").arg(url).arg(path);
499
500        if let Some(branch) = &args.branch {
501            cmd.arg("-b").arg(branch);
502        }
503
504        let mut child = cmd
505            .stdin(std::process::Stdio::null())
506            .spawn()
507            .map_err(|e| Error::ExecutionFailed(format!("git clone spawn failed: {}", e)))?;
508
509        let timeout = Duration::from_secs(timeout_secs);
510        let start = Instant::now();
511        let status = loop {
512            match child.try_wait() {
513                Ok(Some(s)) => break s,
514                Ok(None) => {
515                    if start.elapsed() > timeout {
516                        let _ = child.kill();
517                        let _ = child.wait();
518                        return Err(Error::ExecutionFailed(format!(
519                            "git clone timed out after {}s",
520                            timeout_secs
521                        )));
522                    }
523                    std::thread::sleep(Duration::from_millis(100));
524                }
525                Err(e) => {
526                    let _ = child.kill();
527                    let _ = child.wait();
528                    return Err(Error::ExecutionFailed(format!("git clone wait error: {}", e)));
529                }
530            }
531        };
532
533        if !status.success() {
534            return Err(Error::ExecutionFailed("git clone failed (see stderr)".into()));
535        }
536
537        let state = Self::capture_state(path, timeout_secs)?;
538
539        Ok(Output {
540            success: true,
541            data: serde_json::json!({
542                "operation": "clone",
543                "url": Self::sanitize_url(url),
544                "path": path.display().to_string(),
545                "commit_sha": state.commit_sha,
546                "branch": state.branch,
547                "remote_url": state.remote_url
548            }),
549            message: Some(format!("Cloned {} to {}", Self::sanitize_url(url), path.display())),
550        })
551    }
552
553    /// Executes git pull operation.
554    fn op_pull(&self, _args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
555        let timeout_secs = _args.timeout_secs.unwrap_or(300);
556
557        if !repo_path.exists() {
558            return Err(Error::ExecutionFailed(format!(
559                "Repository not found: {}",
560                repo_path.display()
561            )));
562        }
563
564        let state_before = Self::capture_state(repo_path, timeout_secs)?;
565
566        if ctx.dry_run {
567            return Ok(Output {
568                success: true,
569                data: serde_json::json!({
570                    "operation": "pull",
571                    "path": repo_path.display().to_string(),
572                    "dry_run": true
573                }),
574                message: Some("DRY RUN: would pull".into()),
575            });
576        }
577
578        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
579
580        let output = Self::run_git_with_timeout(repo_path, &["pull", "--rebase"], timeout_secs)
581            .map_err(|e| Error::ExecutionFailed(format!("git pull failed: {}", e)))?;
582
583        let state_after = Self::capture_state(repo_path, timeout_secs)?;
584
585        Ok(Output {
586            success: true,
587            data: serde_json::json!({
588                "operation": "pull",
589                "path": repo_path.display().to_string(),
590                "commit_sha_before": state_before.commit_sha,
591                "commit_sha_after": state_after.commit_sha,
592                "branch": state_after.branch,
593                "backup_path": backup_path.map(|p| p.to_string_lossy().to_string()),
594                "git_output": Self::sanitize_output(&output)
595            }),
596            message: Some("Pulled successfully".into()),
597        })
598    }
599
600    /// Executes git commit operation.
601    fn op_commit(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
602        let timeout_secs = args.timeout_secs.unwrap_or(300);
603
604        if !repo_path.exists() {
605            return Err(Error::ExecutionFailed(format!(
606                "Repository not found: {}",
607                repo_path.display()
608            )));
609        }
610
611        let message = args
612            .message
613            .as_ref()
614            .ok_or_else(|| Error::ExecutionFailed("Commit message required".into()))?;
615        let message = Self::sanitize_commit_message(message)?;
616
617        let state_before = Self::capture_state(repo_path, timeout_secs)?;
618
619        if ctx.dry_run {
620            return Ok(Output {
621                success: true,
622                data: serde_json::json!({
623                    "operation": "commit",
624                    "path": repo_path.display().to_string(),
625                    "message": &message,
626                    "dry_run": true
627                }),
628                message: Some("DRY RUN: would commit".into()),
629            });
630        }
631
632        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
633
634        if let Some(files) = &args.files {
635            for file in files {
636                Self::validate_add_file(file, repo_path)?;
637                let output = Self::run_git_with_timeout(repo_path, &["add", file], timeout_secs)
638                    .map_err(|e| Error::ExecutionFailed(format!("git add failed: {}", e)))?;
639                let _ = output;
640            }
641        } else {
642            let untracked = Self::run_git_with_timeout(repo_path, &["ls-files", "--others", "--exclude-standard"], timeout_secs)?;
643            for line in untracked.lines() {
644                let file = line.trim();
645                if file.is_empty() {
646                    continue;
647                }
648                if Self::is_secret_file(file) {
649                    eprintln!("[runtimo] Skipping secret file from git add: {}", file);
650                    continue;
651                }
652                Self::run_git_with_timeout(repo_path, &["add", file], timeout_secs)
653                    .map_err(|e| Error::ExecutionFailed(format!("git add {} failed: {}", file, e)))?;
654            }
655        }
656
657        let output = Self::run_git_with_timeout(repo_path, &["commit", "-m", &message], timeout_secs)
658            .map_err(|e| Error::ExecutionFailed(format!("git commit failed: {}", e)))?;
659        let _ = output;
660
661        let state_after = Self::capture_state(repo_path, timeout_secs)?;
662
663        Ok(Output {
664            success: true,
665            data: serde_json::json!({
666                "operation": "commit",
667                "path": repo_path.display().to_string(),
668                "message": message,
669                "commit_sha_before": state_before.commit_sha,
670                "commit_sha_after": state_after.commit_sha,
671                "branch": state_after.branch,
672                "backup_path": backup_path.map(|p| p.to_string_lossy().to_string())
673            }),
674            message: Some(format!("Committed: {}", message)),
675        })
676    }
677
678    /// Executes git revert operation.
679    fn op_revert(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
680        let timeout_secs = args.timeout_secs.unwrap_or(300);
681
682        if !repo_path.exists() {
683            return Err(Error::ExecutionFailed(format!(
684                "Repository not found: {}",
685                repo_path.display()
686            )));
687        }
688
689        let commit_sha = args
690            .commit_sha
691            .as_ref()
692            .ok_or_else(|| Error::ExecutionFailed("Commit SHA required for revert".into()))?;
693
694        Self::validate_commit_sha(commit_sha)?;
695
696        let state_before = Self::capture_state(repo_path, timeout_secs)?;
697
698        if ctx.dry_run {
699            return Ok(Output {
700                success: true,
701                data: serde_json::json!({
702                    "operation": "revert",
703                    "path": repo_path.display().to_string(),
704                    "commit_sha": commit_sha,
705                    "dry_run": true
706                }),
707                message: Some(format!("DRY RUN: would revert {}", commit_sha)),
708            });
709        }
710
711        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
712
713        let output = Self::run_git_with_timeout(repo_path, &["revert", "--no-edit", commit_sha], timeout_secs)
714            .map_err(|e| Error::ExecutionFailed(format!("git revert failed: {}", e)))?;
715        let _ = output;
716
717        let state_after = Self::capture_state(repo_path, timeout_secs)?;
718
719        Ok(Output {
720            success: true,
721            data: serde_json::json!({
722                "operation": "revert",
723                "path": repo_path.display().to_string(),
724                "commit_sha": commit_sha,
725                "commit_sha_before": state_before.commit_sha,
726                "commit_sha_after": state_after.commit_sha,
727                "branch": state_after.branch,
728                "backup_path": backup_path.map(|p| p.to_string_lossy().to_string())
729            }),
730            message: Some(format!("Reverted {}", commit_sha)),
731        })
732    }
733
734    /// Executes git clean operation.
735    fn op_clean(&self, args: &GitExecArgs, ctx: &Context, repo_path: &Path) -> Result<Output> {
736        let timeout_secs = args.timeout_secs.unwrap_or(300);
737
738        if !repo_path.exists() {
739            return Err(Error::ExecutionFailed(format!(
740                "Repository not found: {}",
741                repo_path.display()
742            )));
743        }
744
745        let state_before = Self::capture_state(repo_path, timeout_secs)?;
746
747        if ctx.dry_run {
748            let untracked_count = Self::count_untracked_files(repo_path, timeout_secs).unwrap_or(0);
749            let preview = Self::run_git_with_timeout(repo_path, &["clean", "-fd", "--dry-run"], timeout_secs)
750                .map(|s| Self::sanitize_output(&s))
751                .unwrap_or_default();
752            return Ok(Output {
753                success: true,
754                data: serde_json::json!({
755                    "operation": "clean",
756                    "path": repo_path.display().to_string(),
757                    "dry_run": true,
758                    "untracked_count": untracked_count,
759                    "preview": preview
760                }),
761                message: Some(format!("DRY RUN: would clean {} untracked files", untracked_count)),
762            });
763        }
764
765        let untracked_count = Self::count_untracked_files(repo_path, timeout_secs)?;
766        if untracked_count > MAX_CLEAN_FILES {
767            return Err(Error::ExecutionFailed(format!(
768                "Too many untracked files to clean safely: {} (limit: {})",
769                untracked_count, MAX_CLEAN_FILES
770            )));
771        }
772
773        let backup_path = Some(self.backup_before_mutation(repo_path, &ctx.job_id)?);
774
775        let output = Self::run_git_with_timeout(repo_path, &["clean", "-fd"], timeout_secs)
776            .map_err(|e| Error::ExecutionFailed(format!("git clean failed: {}", e)))?;
777        let _ = output;
778
779        let state_after = Self::capture_state(repo_path, timeout_secs)?;
780
781        Ok(Output {
782            success: true,
783            data: serde_json::json!({
784                "operation": "clean",
785                "path": repo_path.display().to_string(),
786                "was_clean": state_before.is_clean,
787                "is_clean": state_after.is_clean,
788                "untracked_files_removed": untracked_count,
789                "backup_path": backup_path.map(|p| p.to_string_lossy().to_string())
790            }),
791            message: Some(format!("Cleaned {} untracked files", untracked_count)),
792        })
793    }
794
795    /// Executes git status operation.
796    fn op_status(&self, _args: &GitExecArgs, _ctx: &Context, repo_path: &Path) -> Result<Output> {
797        let timeout_secs = _args.timeout_secs.unwrap_or(300);
798
799        if !repo_path.exists() {
800            return Err(Error::ExecutionFailed(format!(
801                "Repository not found: {}",
802                repo_path.display()
803            )));
804        }
805
806        let state = Self::capture_state(repo_path, timeout_secs)?;
807
808        let status_output =
809            Self::run_git_with_timeout(repo_path, &["status", "--porcelain"], timeout_secs).unwrap_or_default();
810
811        let branch = state.branch.clone().unwrap_or_default();
812        let remote_url = state.remote_url.clone().unwrap_or_default();
813
814        Ok(Output {
815            success: true,
816            data: serde_json::json!({
817                "operation": "status",
818                "path": repo_path.display().to_string(),
819                "branch": branch,
820                "remote_url": remote_url,
821                "commit_sha": state.commit_sha,
822                "is_clean": state.is_clean,
823                "status": status_output
824            }),
825            message: Some(format!(
826                "On branch {}: {}",
827                branch,
828                if state.is_clean { "clean" } else { "dirty" }
829            )),
830        })
831    }
832}
833
834impl Capability for GitExec {
835    fn name(&self) -> &'static str {
836        "GitExec"
837    }
838
839    fn description(&self) -> &'static str {
840        "Git operations (clone, pull, commit, revert, clean, status) with state tracking, timeout enforcement, and undo support."
841    }
842
843    fn schema(&self) -> Value {
844        serde_json::json!({
845            "type": "object",
846            "properties": {
847                "operation": { "type": "string", "enum": ["clone", "pull", "commit", "revert", "clean", "status"] },
848                "url": { "type": "string" },
849                "path": { "type": "string" },
850                "branch": { "type": "string" },
851                "message": { "type": "string" },
852                "files": { "type": "array", "items": { "type": "string" } },
853                "commit_sha": { "type": "string" },
854                "timeout_secs": { "type": "integer", "minimum": 1, "maximum": 600 }
855            },
856            "required": ["operation"]
857        })
858    }
859
860    fn validate(&self, args: &Value) -> Result<()> {
861        let args: GitExecArgs = serde_json::from_value(args.clone())
862            .map_err(|e| Error::SchemaValidationFailed(e.to_string()))?;
863
864        let valid_ops = ["clone", "pull", "commit", "revert", "clean", "status"];
865        if !valid_ops.contains(&args.operation.as_str()) {
866            return Err(Error::SchemaValidationFailed(format!(
867                "Invalid operation: {}. Must be one of: {}",
868                args.operation,
869                valid_ops.join(", ")
870            )));
871        }
872
873        if args.operation == "clone" {
874            if let Some(url) = &args.url {
875                Self::validate_url(url)?;
876            } else {
877                return Err(Error::SchemaValidationFailed(
878                    "URL required for clone".into(),
879                ));
880            }
881            if let Some(path) = &args.path {
882                let ctx = PathContext {
883                    require_exists: false,
884                    require_file: false,
885                    ..Default::default()
886                };
887                validate_path(path, &ctx).map_err(Error::SchemaValidationFailed)?;
888            }
889        }
890
891        if args.operation != "clone" {
892            if let Some(path) = &args.path {
893                let ctx = PathContext {
894                    require_exists: true,
895                    require_file: false,
896                    ..Default::default()
897                };
898                validate_path(path, &ctx).map_err(Error::SchemaValidationFailed)?;
899            }
900        }
901
902        if let Some(branch) = &args.branch {
903            Self::validate_branch_name(branch)?;
904        }
905
906        if let Some(sha) = &args.commit_sha {
907            Self::validate_commit_sha(sha)?;
908        }
909
910        Ok(())
911    }
912
913    fn execute(&self, args: &Value, ctx: &Context) -> Result<Output> {
914        let args: GitExecArgs = serde_json::from_value(args.clone())
915            .map_err(|e| Error::ExecutionFailed(e.to_string()))?;
916
917        let telemetry_before = Telemetry::capture();
918        let process_before = ProcessSnapshot::capture();
919
920        let result = match args.operation.as_str() {
921            "clone" => self.op_clone(&args, ctx),
922            "pull" => {
923                let path = args
924                    .path
925                    .as_ref()
926                    .ok_or_else(|| Error::ExecutionFailed("Path required for pull".into()))?;
927                self.op_pull(&args, ctx, Path::new(path))
928            }
929            "commit" => {
930                let path = args
931                    .path
932                    .as_ref()
933                    .ok_or_else(|| Error::ExecutionFailed("Path required for commit".into()))?;
934                self.op_commit(&args, ctx, Path::new(path))
935            }
936            "revert" => {
937                let path = args
938                    .path
939                    .as_ref()
940                    .ok_or_else(|| Error::ExecutionFailed("Path required for revert".into()))?;
941                self.op_revert(&args, ctx, Path::new(path))
942            }
943            "clean" => {
944                let path = args
945                    .path
946                    .as_ref()
947                    .ok_or_else(|| Error::ExecutionFailed("Path required for clean".into()))?;
948                self.op_clean(&args, ctx, Path::new(path))
949            }
950            "status" => {
951                let path = args
952                    .path
953                    .as_ref()
954                    .ok_or_else(|| Error::ExecutionFailed("Path required for status".into()))?;
955                self.op_status(&args, ctx, Path::new(path))
956            }
957            _ => Err(Error::ExecutionFailed(format!(
958                "Unknown operation: {}",
959                args.operation
960            ))),
961        };
962
963        let telemetry_after = Telemetry::capture();
964        let process_after = ProcessSnapshot::capture();
965
966        let mut output = result?;
967        output.data.as_object_mut().map(|obj| {
968            obj.insert("telemetry_before".to_string(), serde_json::to_value(&telemetry_before).unwrap_or(Value::Null));
969            obj.insert("telemetry_after".to_string(), serde_json::to_value(&telemetry_after).unwrap_or(Value::Null));
970            obj.insert("process_before".to_string(), serde_json::to_value(&process_before.summary).unwrap_or(Value::Null));
971            obj.insert("process_after".to_string(), serde_json::to_value(&process_after.summary).unwrap_or(Value::Null));
972        });
973
974        Ok(output)
975    }
976}
977
978#[cfg(test)]
979mod tests {
980    use super::*;
981    use crate::capability::Capability;
982
983    fn test_backup_dir() -> PathBuf {
984        std::env::temp_dir().join("runtimo_git_test")
985    }
986
987    #[test]
988    fn validates_git_url_https_only() {
989        assert!(GitExec::validate_url("https://github.com/user/repo.git").is_ok());
990        assert!(GitExec::validate_url("git@github.com:user/repo.git").is_ok());
991
992        assert!(GitExec::validate_url("http://example.com/repo.git").is_err());
993        assert!(GitExec::validate_url("not-a-url").is_err());
994        assert!(GitExec::validate_url("").is_err());
995
996        std::fs::remove_dir_all(test_backup_dir()).ok();
997    }
998
999    #[test]
1000    fn blocks_ssrf_urls() {
1001        assert!(GitExec::validate_url("https://169.254.169.254/latest/meta-data/").is_err());
1002        assert!(GitExec::validate_url("https://127.0.0.1/repo.git").is_err());
1003        assert!(GitExec::validate_url("https://localhost/repo.git").is_err());
1004        assert!(GitExec::validate_url("https://192.168.1.1/repo.git").is_err());
1005        assert!(GitExec::validate_url("https://metadata.google.internal/computeMetadata").is_err());
1006
1007        std::fs::remove_dir_all(test_backup_dir()).ok();
1008    }
1009
1010    #[test]
1011    fn sanitizes_credentials_from_url() {
1012        assert_eq!(
1013            GitExec::sanitize_url("https://user:pass@github.com/repo.git"),
1014            "https://***@github.com/repo.git"
1015        );
1016        assert_eq!(
1017            GitExec::sanitize_url("https://github.com/repo.git"),
1018            "https://github.com/repo.git"
1019        );
1020        assert_eq!(
1021            GitExec::sanitize_url("git@github.com:user/repo.git"),
1022            "git@github.com:user/repo.git"
1023        );
1024    }
1025
1026    #[test]
1027    fn detects_secret_files() {
1028        assert!(GitExec::is_secret_file(".env"));
1029        assert!(GitExec::is_secret_file("config/.env"));
1030        assert!(GitExec::is_secret_file("credentials.json"));
1031        assert!(GitExec::is_secret_file(".ssh/id_rsa"));
1032        assert!(GitExec::is_secret_file("src/.env.local"));
1033
1034        assert!(!GitExec::is_secret_file("main.rs"));
1035        assert!(!GitExec::is_secret_file("Cargo.toml"));
1036        assert!(!GitExec::is_secret_file("README.md"));
1037    }
1038
1039    #[test]
1040    fn validates_branch_name() {
1041        assert!(GitExec::validate_branch_name("main").is_ok());
1042        assert!(GitExec::validate_branch_name("feature/my-branch").is_ok());
1043        assert!(GitExec::validate_branch_name("v1.0").is_ok());
1044
1045        assert!(GitExec::validate_branch_name("").is_err());
1046        assert!(GitExec::validate_branch_name("bad..name").is_err());
1047        assert!(GitExec::validate_branch_name("@{..}").is_err());
1048    }
1049
1050    #[test]
1051    fn validates_commit_sha() {
1052        assert!(GitExec::validate_commit_sha("abc1234").is_ok());
1053        assert!(GitExec::validate_commit_sha("a1b2c3d4").is_ok());
1054        assert!(GitExec::validate_commit_sha("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0").is_ok());
1055
1056        assert!(GitExec::validate_commit_sha("abc123").is_err());
1057        assert!(GitExec::validate_commit_sha("").is_err());
1058        assert!(GitExec::validate_commit_sha("xyz123").is_err());
1059    }
1060
1061    #[test]
1062    fn rejects_path_traversal() {
1063        let cap = GitExec::new(test_backup_dir()).expect("Failed to create GitExec");
1064
1065        let err = cap
1066            .validate(&serde_json::json!({
1067                "operation": "clone",
1068                "url": "https://github.com/user/repo.git",
1069                "path": "../../../etc/passwd"
1070            }))
1071            .unwrap_err();
1072
1073        assert!(err.to_string().contains("traversal"));
1074        std::fs::remove_dir_all(test_backup_dir()).ok();
1075    }
1076
1077    #[test]
1078    fn rejects_invalid_operation() {
1079        let cap = GitExec::new(test_backup_dir()).expect("Failed to create GitExec");
1080
1081        let err = cap
1082            .validate(&serde_json::json!({
1083                "operation": "invalid_op"
1084            }))
1085            .unwrap_err();
1086
1087        assert!(err.to_string().contains("Invalid operation"));
1088        std::fs::remove_dir_all(test_backup_dir()).ok();
1089    }
1090
1091    #[test]
1092    fn status_on_nonexistent_repo() {
1093        let cap = GitExec::new(test_backup_dir()).expect("Failed to create GitExec");
1094
1095        let result = cap.execute(
1096            &serde_json::json!({
1097                "operation": "status",
1098                "path": "/tmp/nonexistent_repo"
1099            }),
1100            &Context {
1101                dry_run: false,
1102                job_id: "test".into(),
1103                working_dir: std::env::temp_dir(),
1104            },
1105        );
1106
1107        assert!(result.is_err());
1108        std::fs::remove_dir_all(test_backup_dir()).ok();
1109    }
1110
1111    #[test]
1112    fn sanitizes_commit_message() {
1113        assert!(GitExec::sanitize_commit_message("valid commit").is_ok());
1114        assert!(GitExec::sanitize_commit_message("  trimmed  ").is_ok());
1115        assert!(GitExec::sanitize_commit_message("").is_err());
1116        assert!(GitExec::sanitize_commit_message("   ").is_err());
1117        let result = GitExec::sanitize_commit_message("hello\x00world").unwrap();
1118        assert!(!result.contains('\x00'));
1119    }
1120
1121    #[test]
1122    fn timeout_enforced_on_git_command() {
1123        let tmp = std::env::temp_dir().join("runtimo_git_timeout_test");
1124        std::fs::create_dir_all(&tmp).ok();
1125        Command::new("git").arg("init").current_dir(&tmp).output().ok();
1126
1127        let result = GitExec::run_git_with_timeout(&tmp, &["clone", "https://10.255.255.1/nonexistent.git"], 1);
1128        assert!(result.is_err());
1129        assert!(result.unwrap_err().to_string().contains("timed out"));
1130
1131        std::fs::remove_dir_all(&tmp).ok();
1132    }
1133}