Skip to main content

sparrow/
hook_cmd.rs

1//! `sparrow hook install` and `sparrow hook scan` commands.
2//!
3//! The `install` subcommand installs a pre-commit hook via `git config` that
4//! scans staged files for secrets, tokens, and other sensitive content before
5//! each commit.
6//!
7//! The `scan` subcommand performs a one-off scan of staged files (or the
8//! entire working tree) for secrets and sensitive patterns.
9
10use std::path::PathBuf;
11use std::process::Command;
12
13// ─── Hook install ────────────────────────────────────────────────────────────
14
15/// Install the Sparrow pre-commit hook into the current git repository.
16///
17/// Copies `hooks/pre-commit` to `.git/hooks/pre-commit` and makes it
18/// executable. If a hook already exists, it is backed up first.
19pub fn run_hook_install() -> anyhow::Result<()> {
20    // Find the git repository root
21    let repo_root = find_git_root()?;
22
23    let hooks_dir = repo_root.join(".git").join("hooks");
24    let pre_commit_path = hooks_dir.join("pre-commit");
25
26    // Find the Sparrow hooks directory (next to the binary, or in the project)
27    let sparrow_hook_script = find_sparrow_hook_script()?;
28
29    println!("🔒 Sparrow Hook Install");
30    println!("   Dépôt  : {}", repo_root.display());
31    println!("   Script : {}", sparrow_hook_script.display());
32
33    // Create hooks dir if needed
34    std::fs::create_dir_all(&hooks_dir)?;
35
36    // Backup existing hook
37    if pre_commit_path.exists() {
38        let backup = hooks_dir.join("pre-commit.sparrow-backup");
39        println!("   ⚠️  Un hook pre-commit existe déjà → backup → {}", backup.display());
40        std::fs::rename(&pre_commit_path, &backup)?;
41    }
42
43    // Copy the hook script
44    std::fs::copy(&sparrow_hook_script, &pre_commit_path)?;
45
46    // Make it executable
47    #[cfg(unix)]
48    {
49        use std::os::unix::fs::PermissionsExt;
50        let mut perms = std::fs::metadata(&pre_commit_path)?.permissions();
51        perms.set_mode(0o755);
52        std::fs::set_permissions(&pre_commit_path, perms)?;
53    }
54
55    println!("   ✓ Hook pre-commit installé !");
56    println!();
57    println!("   Le hook scanne automatiquement :");
58    println!("   • Clés API et tokens (AWS, GitHub, OpenAI, etc.)");
59    println!("   • Fichiers .env, credentials.json");
60    println!("   • Fichiers d'agent IA (.sparrow/, .codex/)");
61    println!("   • Secrets en clair (password, secret, token)");
62    println!();
63    println!("   Pour désinstaller : rm {}", pre_commit_path.display());
64    println!(
65        "   Pour restaurer l'ancien hook : mv {} {}",
66        hooks_dir.join("pre-commit.sparrow-backup").display(),
67        pre_commit_path.display(),
68    );
69
70    Ok(())
71}
72
73// ─── Hook scan ───────────────────────────────────────────────────────────────
74
75/// Run a one-off scan of staged files (or the entire working tree if
76/// `scan_all` is true) for secrets and sensitive patterns.
77pub fn run_hook_scan(scan_all: bool) -> anyhow::Result<()> {
78    let repo_root = match find_git_root() {
79        Ok(r) => r,
80        Err(_) => {
81            // Not in a git repo — scan the current directory
82            std::env::current_dir()?
83        }
84    };
85
86    println!("🔍 Sparrow Security Scan");
87    println!("   Répertoire : {}", repo_root.display());
88
89    if scan_all {
90        println!("   Mode : arbre complet");
91        println!();
92        scan_directory(&repo_root)?;
93    } else {
94        println!("   Mode : fichiers stagés (git diff --cached)");
95        println!();
96        scan_staged_files(&repo_root)?;
97    }
98
99    Ok(())
100}
101
102// ─── Git helpers ─────────────────────────────────────────────────────────────
103
104fn find_git_root() -> anyhow::Result<PathBuf> {
105    let output = Command::new("git")
106        .args(["rev-parse", "--show-toplevel"])
107        .output()?;
108
109    if !output.status.success() {
110        anyhow::bail!(
111            "Pas dans un dépôt Git.\n\
112             → Lance cette commande depuis un dépôt Git.\n\
113             → Ou utilise `sparrow hook scan --all` pour scanner le dossier courant."
114        );
115    }
116
117    let path = String::from_utf8_lossy(&output.stdout).trim().to_string();
118    Ok(PathBuf::from(path))
119}
120
121fn find_sparrow_hook_script() -> anyhow::Result<PathBuf> {
122    // Look in several places:
123    // 1. Relative to the current executable
124    // 2. In the project checkout at hooks/pre-commit
125    // 3. Embedded fallback
126
127    let candidates = vec![
128        std::env::current_exe()
129            .ok()
130            .and_then(|e| e.parent().map(|p| p.join("hooks/pre-commit")))
131            .unwrap_or_default(),
132        PathBuf::from("hooks/pre-commit"),
133        PathBuf::from("/tmp/Sparrow_cleanup/hooks/pre-commit"),
134    ];
135
136    for candidate in &candidates {
137        if candidate.exists() {
138            return Ok(candidate.clone());
139        }
140    }
141
142    // If the script file doesn't exist yet, create it from our embedded content
143    let hooks_dir = PathBuf::from("hooks");
144    std::fs::create_dir_all(&hooks_dir)?;
145    let hook_path = hooks_dir.join("pre-commit");
146    if !hook_path.exists() {
147        std::fs::write(&hook_path, PRE_COMMIT_HOOK_SCRIPT)?;
148        #[cfg(unix)]
149        {
150            use std::os::unix::fs::PermissionsExt;
151            let mut perms = std::fs::metadata(&hook_path)?.permissions();
152            perms.set_mode(0o755);
153            std::fs::set_permissions(&hook_path, perms)?;
154        }
155    }
156    Ok(hook_path)
157}
158
159// ─── Scanning logic ──────────────────────────────────────────────────────────
160
161/// Scan staged files (git diff --cached --name-only).
162fn scan_staged_files(repo_root: &std::path::Path) -> anyhow::Result<()> {
163    let output = Command::new("git")
164        .args(["diff", "--cached", "--name-only", "--diff-filter=ACM"])
165        .current_dir(repo_root)
166        .output()?;
167
168    if !output.status.success() {
169        // No staged files or not a git repo — scan nothing
170        println!("   Aucun fichier stagé à scanner.");
171        return Ok(());
172    }
173
174    let files: Vec<PathBuf> = String::from_utf8_lossy(&output.stdout)
175        .lines()
176        .filter(|l| !l.is_empty())
177        .map(|l| repo_root.join(l))
178        .collect();
179
180    if files.is_empty() {
181        println!("   ✓ Aucun fichier stagé trouvé.");
182        return Ok(());
183    }
184
185    println!("   {} fichier(s) à scanner...\n", files.len());
186
187    let mut issues_found = 0;
188
189    for file in &files {
190        if let Ok(content) = std::fs::read_to_string(file) {
191            let findings = scan_content(&content, file);
192            if !findings.is_empty() {
193                for finding in findings {
194                    println!("   ⚠️  {} : {}", file.display(), finding);
195                    issues_found += 1;
196                }
197            }
198        }
199    }
200
201    if issues_found == 0 {
202        println!("   ✓ Aucun problème détecté !");
203    } else {
204        println!("\n   ⚠️  {} problème(s) détecté(s) !", issues_found);
205        println!("   → Corrige-les avant de commit.\n");
206        // Exit with non-zero to block the commit
207        std::process::exit(1);
208    }
209
210    Ok(())
211}
212
213/// Scan all files in a directory recursively.
214fn scan_directory(dir: &std::path::Path) -> anyhow::Result<()> {
215    
216
217    let mut issues_found = 0;
218    let mut files_scanned = 0;
219
220    for entry in walkdir::WalkDir::new(dir)
221        .into_iter()
222        .filter_map(|e| e.ok())
223        .filter(|e| e.file_type().is_file())
224    {
225        let path = entry.path();
226
227        // Skip hidden directories (but not hidden files at root)
228        if path
229            .components()
230            .any(|c| c.as_os_str().to_string_lossy().starts_with('.') && c != path.components().next().unwrap_or(std::path::Component::CurDir))
231        {
232            // Skip .git, .sparrow, node_modules, target, etc.
233            let path_str = path.to_string_lossy();
234            if path_str.contains("/.git/")
235                || path_str.contains("/node_modules/")
236                || path_str.contains("/target/")
237                || path_str.contains("/.sparrow/")
238                || path_str.contains("/__pycache__/")
239            {
240                continue;
241            }
242        }
243
244        // Skip binary files (simple heuristic)
245        if let Ok(content) = std::fs::read(path) {
246            if content.iter().any(|&b| b == 0) {
247                continue; // null byte → binary
248            }
249        }
250
251        files_scanned += 1;
252
253        if let Ok(content) = std::fs::read_to_string(path) {
254            let findings = scan_content(&content, path);
255            for finding in findings {
256                println!("   ⚠️  {} : {}", path.display(), finding);
257                issues_found += 1;
258            }
259        }
260    }
261
262    println!("\n   {} fichiers scannés.", files_scanned);
263    if issues_found == 0 {
264        println!("   ✓ Aucun problème détecté !");
265    } else {
266        println!("   ⚠️  {} problème(s) détecté(s) !", issues_found);
267    }
268
269    Ok(())
270}
271
272// ─── Secret patterns ─────────────────────────────────────────────────────────
273
274/// Patterns that indicate a potential secret leak.
275static SECRET_PATTERNS: &[(&str, &str)] = &[
276    // API keys (generic)
277    (r"(?i)(?:api[_-]?key|api[_-]?secret|apikey)\s*[:=]\s*[\x27\x22]?\w{20,}[\x27\x22]?", "Clé API en clair"),
278    (r"(?i)(?:secret[_-]?key|secretkey)\s*[:=]\s*[\x27\x22]?\w{20,}[\x27\x22]?", "Clé secrète en clair"),
279    (r"(?i)(?:access[_-]?key|accesskey)\s*[:=]\s*[\x27\x22]?\w{16,}[\x27\x22]?", "Clé d'accès en clair"),
280
281    // AWS
282    (r"AKIA[0-9A-Z]{16}", "AWS Access Key ID"),
283    (r"(?i)aws[_-]?secret[_-]?access[_-]?key\s*[:=]\s*[\x27\x22]?[0-9a-zA-Z/+]{40}[\x27\x22]?", "AWS Secret Key"),
284
285    // GitHub
286    (r"ghp_[0-9a-zA-Z]{36}", "GitHub Personal Access Token"),
287    (r"github_pat_[0-9a-zA-Z_]{36,}", "GitHub PAT (fine-grained)"),
288    (r"gho_[0-9a-zA-Z]{36}", "GitHub OAuth Token"),
289
290    // OpenAI
291    (r"sk-[0-9a-zA-Z]{32,}", "OpenAI API Key"),
292    (r"sk-proj-[0-9a-zA-Z]{32,}", "OpenAI Project Key"),
293
294    // Anthropic
295    (r"sk-ant-[0-9a-zA-Z]{32,}", "Anthropic API Key"),
296
297    // Generic token patterns
298    (r"(?i)(?:password|passwd|pwd)\s*[:=]\s*[\x27\x22]?\S{4,}[\x27\x22]?", "Mot de passe en clair"),
299    (r"(?i)(?:token|auth[_-]?token)\s*[:=]\s*[\x27\x22]?\S{20,}[\x27\x22]?", "Token en clair"),
300
301    // Private keys
302    (r"-----BEGIN (?:RSA|DSA|EC|OPENSSH|PGP) PRIVATE KEY-----", "Clé privée"),
303    (r"-----BEGIN PRIVATE KEY-----", "Clé privée"),
304
305    // JWT tokens
306    (r"eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}", "JWT Token"),
307
308    // Slack
309    (r"xox[baprs]-[0-9a-zA-Z-]{10,}", "Slack Token"),
310
311    // Stripe
312    (r"sk_live_[0-9a-zA-Z]{24,}", "Stripe Live Key"),
313    (r"pk_live_[0-9a-zA-Z]{24,}", "Stripe Live Publishable Key"),
314
315    // Google
316    (r"AIza[0-9A-Za-z_-]{35}", "Google API Key"),
317
318    // Agent files
319    (r"(?i)\.(?:sparrow|codex|agent|ai|llm)", "Fichier de config agent IA"),
320];
321
322/// Sensitive filenames that should never be committed.
323static SENSITIVE_FILENAMES: &[&str] = &[
324    ".env",
325    ".env.local",
326    ".env.production",
327    ".env.development",
328    "credentials.json",
329    "service-account.json",
330    "secrets.yaml",
331    "secrets.yml",
332    ".netrc",
333    ".npmrc",
334    ".pypirc",
335    "id_rsa",
336    "id_ed25519",
337    "id_ecdsa",
338    "*.pem",
339    "*.key",
340    "*.p12",
341    "*.pfx",
342];
343
344/// Scan a file's content for secrets.
345fn scan_content(content: &str, path: &std::path::Path) -> Vec<String> {
346    let mut findings = Vec::new();
347
348    // Check filename
349    let filename = path
350        .file_name()
351        .map(|n| n.to_string_lossy())
352        .unwrap_or_default();
353
354    for &sensitive in SENSITIVE_FILENAMES {
355        if sensitive.starts_with('*') {
356            let ext = &sensitive[1..]; // e.g., ".pem" from "*.pem"
357            if filename.ends_with(ext) {
358                findings.push(format!("Fichier sensible ({sensitive})"));
359            }
360        } else if filename == sensitive {
361            findings.push(format!("Fichier sensible ({sensitive})"));
362        }
363    }
364
365    // Check content patterns
366    for &(pattern, label) in SECRET_PATTERNS {
367        if let Ok(re) = regex::Regex::new(pattern) {
368            if re.is_match(content) {
369                // Find the matching line for context
370                for (line_num, line) in content.lines().enumerate() {
371                    if re.is_match(line) {
372                        // Redact the secret part for display
373                        let redacted = re.replace(line, |_caps: &regex::Captures| {
374                            "***REDACTED***"
375                        });
376                        findings.push(format!(
377                            "{} (ligne {}) : {}",
378                            label,
379                            line_num + 1,
380                            redacted.trim(),
381                        ));
382                        break; // One finding per pattern per file
383                    }
384                }
385            }
386        }
387    }
388
389    findings
390}
391
392// ─── Embedded pre-commit hook script (shell) ─────────────────────────────────
393
394/// The shell script that is installed as a Git pre-commit hook.
395/// It runs `sparrow hook scan` to check staged files.
396pub const PRE_COMMIT_HOOK_SCRIPT: &str = r#"#!/usr/bin/env bash
397# Sparrow Pre-Commit Hook
398# Scans staged files for secrets, tokens, and sensitive patterns.
399# Installed by: sparrow hook install
400# Docs: https://github.com/ucav/Sparrow
401
402set -euo pipefail
403
404RED='\033[0;31m'
405YELLOW='\033[1;33m'
406GREEN='\033[0;32m'
407NC='\033[0m' # No Color
408
409echo -e "${YELLOW}🔒 Sparrow pre-commit hook — scanning staged files...${NC}"
410
411# ─── 1. Check for sensitive filenames ────────────────────────────────
412STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM 2>/dev/null || true)
413
414if [ -z "$STAGED_FILES" ]; then
415    echo -e "${GREEN}✓ No staged files to scan.${NC}"
416    exit 0
417fi
418
419SENSITIVE_NAMES=(
420    ".env" ".env.local" ".env.production" ".env.development"
421    "credentials.json" "service-account.json" "secrets.yaml" "secrets.yml"
422    ".netrc" ".npmrc" ".pypirc"
423    "id_rsa" "id_ed25519" "id_ecdsa"
424)
425
426ISSUES=0
427
428for file in $STAGED_FILES; do
429    filename=$(basename "$file")
430    for sensitive in "${SENSITIVE_NAMES[@]}"; do
431        if [ "$filename" = "$sensitive" ]; then
432            echo -e "${RED}✗ BLOCKED: Sensitive file staged: $file${NC}"
433            echo -e "  → Remove it: git rm --cached $file"
434            ISSUES=$((ISSUES + 1))
435        fi
436    done
437
438    # Check for .pem, .key, .p12, .pfx extensions
439    case "$filename" in
440        *.pem|*.key|*.p12|*.pfx)
441            echo -e "${RED}✗ BLOCKED: Private key file staged: $file${NC}"
442            echo -e "  → Remove it: git rm --cached $file"
443            ISSUES=$((ISSUES + 1))
444            ;;
445    esac
446
447    # Check for agent config directories
448    case "$file" in
449        .sparrow/*|.codex/*|.agent/*)
450            echo -e "${YELLOW}⚠ WARNING: Agent config directory staged: $file${NC}"
451            echo -e "  → Consider adding to .gitignore"
452            ;;
453    esac
454
455    # ─── 2. Scan content of staged files ─────────────────────────────
456    if [ -f "$file" ]; then
457        # Get the staged content (not working tree)
458        STAGED_CONTENT=$(git show ":$file" 2>/dev/null || true)
459
460        if [ -n "$STAGED_CONTENT" ]; then
461            # Check for common API key patterns
462            # GitHub tokens
463            if echo "$STAGED_CONTENT" | grep -qE 'ghp_[0-9a-zA-Z]{36}|github_pat_[0-9a-zA-Z_]{36,}'; then
464                echo -e "${RED}✗ BLOCKED: GitHub token detected in $file${NC}"
465                ISSUES=$((ISSUES + 1))
466            fi
467
468            # OpenAI keys
469            if echo "$STAGED_CONTENT" | grep -qE 'sk-[0-9a-zA-Z]{32,}|sk-proj-[0-9a-zA-Z]{32,}'; then
470                echo -e "${RED}✗ BLOCKED: OpenAI API key detected in $file${NC}"
471                ISSUES=$((ISSUES + 1))
472            fi
473
474            # Anthropic keys
475            if echo "$STAGED_CONTENT" | grep -qE 'sk-ant-[0-9a-zA-Z]{32,}'; then
476                echo -e "${RED}✗ BLOCKED: Anthropic API key detected in $file${NC}"
477                ISSUES=$((ISSUES + 1))
478            fi
479
480            # AWS keys
481            if echo "$STAGED_CONTENT" | grep -qE 'AKIA[0-9A-Z]{16}'; then
482                echo -e "${RED}✗ BLOCKED: AWS Access Key detected in $file${NC}"
483                ISSUES=$((ISSUES + 1))
484            fi
485
486            # Generic API key patterns
487            if echo "$STAGED_CONTENT" | grep -qiE '(api[_-]?key|api[_-]?secret|secret[_-]?key)\s*[:=]\s*['"'"'"]?\w{20,}'; then
488                echo -e "${YELLOW}⚠ WARNING: Possible API key in $file${NC}"
489                echo -e "  → Review and use environment variables instead"
490            fi
491
492            # Private keys
493            if echo "$STAGED_CONTENT" | grep -qE '-----BEGIN (RSA|DSA|EC|OPENSSH|PGP) PRIVATE KEY-----'; then
494                echo -e "${RED}✗ BLOCKED: Private key detected in $file${NC}"
495                ISSUES=$((ISSUES + 1))
496            fi
497
498            # Password/token assignment
499            if echo "$STAGED_CONTENT" | grep -qiE '(password|passwd|pwd|token|auth[_-]?token)\s*[:=]\s*['"'"'"]?\S{4,}'; then
500                echo -e "${YELLOW}⚠ WARNING: Possible hardcoded password/token in $file${NC}"
501            fi
502        fi
503    fi
504done
505
506# ─── 3. Result ────────────────────────────────────────────────────────────────
507if [ $ISSUES -gt 0 ]; then
508    echo ""
509    echo -e "${RED}══════════════════════════════════════════════════${NC}"
510    echo -e "${RED}  COMMIT BLOQUÉ — $ISSUES problème(s) de sécurité${NC}"
511    echo -e "${RED}══════════════════════════════════════════════════${NC}"
512    echo ""
513    echo "Pour ignorer (déconseillé) : git commit --no-verify"
514    echo "Pour enlever un fichier du stage : git rm --cached <fichier>"
515    echo "Pour désinstaller ce hook : rm .git/hooks/pre-commit"
516    exit 1
517else
518    echo -e "${GREEN}✓ Aucun problème détecté — commit autorisé.${NC}"
519    exit 0
520fi
521"#;