Skip to main content

sparrow/
hook_cmd.rs

1//! `sparrow hook install` and `sparrow hook scan` commands.
2//!
3//! The `install` subcommand installs a pre-commit hook via `git config` that
4//! scans staged files for secrets, tokens, and other sensitive content before
5//! each commit.
6//!
7//! The `scan` subcommand performs a one-off scan of staged files (or the
8//! entire working tree) for secrets and sensitive patterns.
9
10use std::path::PathBuf;
11use std::process::Command;
12
13// ─── Hook install ────────────────────────────────────────────────────────────
14
15/// Install the Sparrow pre-commit hook into the current git repository.
16///
17/// Copies `hooks/pre-commit` to `.git/hooks/pre-commit` and makes it
18/// executable. If a hook already exists, it is backed up first.
19pub fn run_hook_install() -> anyhow::Result<()> {
20    // Find the git repository root
21    let repo_root = find_git_root()?;
22
23    let hooks_dir = repo_root.join(".git").join("hooks");
24    let pre_commit_path = hooks_dir.join("pre-commit");
25
26    // Find the Sparrow hooks directory (next to the binary, or in the project)
27    let sparrow_hook_script = find_sparrow_hook_script()?;
28
29    println!("🔒 Sparrow Hook Install");
30    println!("   Dépôt  : {}", repo_root.display());
31    println!("   Script : {}", sparrow_hook_script.display());
32
33    // Create hooks dir if needed
34    std::fs::create_dir_all(&hooks_dir)?;
35
36    // Backup existing hook
37    if pre_commit_path.exists() {
38        let backup = hooks_dir.join("pre-commit.sparrow-backup");
39        println!(
40            "   ⚠️  Un hook pre-commit existe déjà → backup → {}",
41            backup.display()
42        );
43        std::fs::rename(&pre_commit_path, &backup)?;
44    }
45
46    // Copy the hook script
47    std::fs::copy(&sparrow_hook_script, &pre_commit_path)?;
48
49    // Make it executable
50    #[cfg(unix)]
51    {
52        use std::os::unix::fs::PermissionsExt;
53        let mut perms = std::fs::metadata(&pre_commit_path)?.permissions();
54        perms.set_mode(0o755);
55        std::fs::set_permissions(&pre_commit_path, perms)?;
56    }
57
58    println!("   ✓ Hook pre-commit installé !");
59    println!();
60    println!("   Le hook scanne automatiquement :");
61    println!("   • Clés API et tokens (AWS, GitHub, OpenAI, etc.)");
62    println!("   • Fichiers .env, credentials.json");
63    println!("   • Fichiers d'agent IA (.sparrow/, .codex/)");
64    println!("   • Secrets en clair (password, secret, token)");
65    println!();
66    println!("   Pour désinstaller : rm {}", pre_commit_path.display());
67    println!(
68        "   Pour restaurer l'ancien hook : mv {} {}",
69        hooks_dir.join("pre-commit.sparrow-backup").display(),
70        pre_commit_path.display(),
71    );
72
73    Ok(())
74}
75
76// ─── Hook scan ───────────────────────────────────────────────────────────────
77
78/// Run a one-off scan of staged files (or the entire working tree if
79/// `scan_all` is true) for secrets and sensitive patterns.
80pub fn run_hook_scan(scan_all: bool) -> anyhow::Result<()> {
81    let repo_root = match find_git_root() {
82        Ok(r) => r,
83        Err(_) => {
84            // Not in a git repo — scan the current directory
85            std::env::current_dir()?
86        }
87    };
88
89    println!("🔍 Sparrow Security Scan");
90    println!("   Répertoire : {}", repo_root.display());
91
92    if scan_all {
93        println!("   Mode : arbre complet");
94        println!();
95        scan_directory(&repo_root)?;
96    } else {
97        println!("   Mode : fichiers stagés (git diff --cached)");
98        println!();
99        scan_staged_files(&repo_root)?;
100    }
101
102    Ok(())
103}
104
105// ─── Git helpers ─────────────────────────────────────────────────────────────
106
107fn find_git_root() -> anyhow::Result<PathBuf> {
108    let output = Command::new("git")
109        .args(["rev-parse", "--show-toplevel"])
110        .output()?;
111
112    if !output.status.success() {
113        anyhow::bail!(
114            "Pas dans un dépôt Git.\n\
115             → Lance cette commande depuis un dépôt Git.\n\
116             → Ou utilise `sparrow hook scan --all` pour scanner le dossier courant."
117        );
118    }
119
120    let path = String::from_utf8_lossy(&output.stdout).trim().to_string();
121    Ok(PathBuf::from(path))
122}
123
124fn find_sparrow_hook_script() -> anyhow::Result<PathBuf> {
125    // Look in several places:
126    // 1. Relative to the current executable
127    // 2. In the project checkout at hooks/pre-commit
128    // 3. Embedded fallback
129
130    let candidates = vec![
131        std::env::current_exe()
132            .ok()
133            .and_then(|e| e.parent().map(|p| p.join("hooks/pre-commit")))
134            .unwrap_or_default(),
135        PathBuf::from("hooks/pre-commit"),
136        PathBuf::from("/tmp/Sparrow_cleanup/hooks/pre-commit"),
137    ];
138
139    for candidate in &candidates {
140        if candidate.exists() {
141            return Ok(candidate.clone());
142        }
143    }
144
145    // If the script file doesn't exist yet, create it from our embedded content
146    let hooks_dir = PathBuf::from("hooks");
147    std::fs::create_dir_all(&hooks_dir)?;
148    let hook_path = hooks_dir.join("pre-commit");
149    if !hook_path.exists() {
150        std::fs::write(&hook_path, PRE_COMMIT_HOOK_SCRIPT)?;
151        #[cfg(unix)]
152        {
153            use std::os::unix::fs::PermissionsExt;
154            let mut perms = std::fs::metadata(&hook_path)?.permissions();
155            perms.set_mode(0o755);
156            std::fs::set_permissions(&hook_path, perms)?;
157        }
158    }
159    Ok(hook_path)
160}
161
162// ─── Scanning logic ──────────────────────────────────────────────────────────
163
164/// Scan staged files (git diff --cached --name-only).
165fn scan_staged_files(repo_root: &std::path::Path) -> anyhow::Result<()> {
166    let output = Command::new("git")
167        .args(["diff", "--cached", "--name-only", "--diff-filter=ACM"])
168        .current_dir(repo_root)
169        .output()?;
170
171    if !output.status.success() {
172        // No staged files or not a git repo — scan nothing
173        println!("   Aucun fichier stagé à scanner.");
174        return Ok(());
175    }
176
177    let files: Vec<PathBuf> = String::from_utf8_lossy(&output.stdout)
178        .lines()
179        .filter(|l| !l.is_empty())
180        .map(|l| repo_root.join(l))
181        .collect();
182
183    if files.is_empty() {
184        println!("   ✓ Aucun fichier stagé trouvé.");
185        return Ok(());
186    }
187
188    println!("   {} fichier(s) à scanner...\n", files.len());
189
190    let mut issues_found = 0;
191
192    for file in &files {
193        if let Ok(content) = std::fs::read_to_string(file) {
194            let findings = scan_content(&content, file);
195            if !findings.is_empty() {
196                for finding in findings {
197                    println!("   ⚠️  {} : {}", file.display(), finding);
198                    issues_found += 1;
199                }
200            }
201        }
202    }
203
204    if issues_found == 0 {
205        println!("   ✓ Aucun problème détecté !");
206    } else {
207        println!("\n   ⚠️  {} problème(s) détecté(s) !", issues_found);
208        println!("   → Corrige-les avant de commit.\n");
209        // Exit with non-zero to block the commit
210        std::process::exit(1);
211    }
212
213    Ok(())
214}
215
216/// Scan all files in a directory recursively.
217fn scan_directory(dir: &std::path::Path) -> anyhow::Result<()> {
218    let mut issues_found = 0;
219    let mut files_scanned = 0;
220
221    for entry in walkdir::WalkDir::new(dir)
222        .into_iter()
223        .filter_map(|e| e.ok())
224        .filter(|e| e.file_type().is_file())
225    {
226        let path = entry.path();
227
228        // Skip hidden directories (but not hidden files at root)
229        if path.components().any(|c| {
230            c.as_os_str().to_string_lossy().starts_with('.')
231                && c != path
232                    .components()
233                    .next()
234                    .unwrap_or(std::path::Component::CurDir)
235        }) {
236            // Skip .git, .sparrow, node_modules, target, etc.
237            let path_str = path.to_string_lossy();
238            if path_str.contains("/.git/")
239                || path_str.contains("/node_modules/")
240                || path_str.contains("/target/")
241                || path_str.contains("/.sparrow/")
242                || path_str.contains("/__pycache__/")
243            {
244                continue;
245            }
246        }
247
248        // Skip binary files (simple heuristic)
249        if let Ok(content) = std::fs::read(path) {
250            if content.contains(&0) {
251                continue; // null byte → binary
252            }
253        }
254
255        files_scanned += 1;
256
257        if let Ok(content) = std::fs::read_to_string(path) {
258            let findings = scan_content(&content, path);
259            for finding in findings {
260                println!("   ⚠️  {} : {}", path.display(), finding);
261                issues_found += 1;
262            }
263        }
264    }
265
266    println!("\n   {} fichiers scannés.", files_scanned);
267    if issues_found == 0 {
268        println!("   ✓ Aucun problème détecté !");
269    } else {
270        println!("   ⚠️  {} problème(s) détecté(s) !", issues_found);
271    }
272
273    Ok(())
274}
275
276// ─── Secret patterns ─────────────────────────────────────────────────────────
277
278/// Patterns that indicate a potential secret leak.
279static SECRET_PATTERNS: &[(&str, &str)] = &[
280    // API keys (generic)
281    (
282        r"(?i)(?:api[_-]?key|api[_-]?secret|apikey)\s*[:=]\s*[\x27\x22]?\w{20,}[\x27\x22]?",
283        "Clé API en clair",
284    ),
285    (
286        r"(?i)(?:secret[_-]?key|secretkey)\s*[:=]\s*[\x27\x22]?\w{20,}[\x27\x22]?",
287        "Clé secrète en clair",
288    ),
289    (
290        r"(?i)(?:access[_-]?key|accesskey)\s*[:=]\s*[\x27\x22]?\w{16,}[\x27\x22]?",
291        "Clé d'accès en clair",
292    ),
293    // AWS
294    (r"AKIA[0-9A-Z]{16}", "AWS Access Key ID"),
295    (
296        r"(?i)aws[_-]?secret[_-]?access[_-]?key\s*[:=]\s*[\x27\x22]?[0-9a-zA-Z/+]{40}[\x27\x22]?",
297        "AWS Secret Key",
298    ),
299    // GitHub
300    (r"ghp_[0-9a-zA-Z]{36}", "GitHub Personal Access Token"),
301    (r"github_pat_[0-9a-zA-Z_]{36,}", "GitHub PAT (fine-grained)"),
302    (r"gho_[0-9a-zA-Z]{36}", "GitHub OAuth Token"),
303    // OpenAI
304    (r"sk-[0-9a-zA-Z]{32,}", "OpenAI API Key"),
305    (r"sk-proj-[0-9a-zA-Z]{32,}", "OpenAI Project Key"),
306    // Anthropic
307    (r"sk-ant-[0-9a-zA-Z]{32,}", "Anthropic API Key"),
308    // Generic token patterns
309    (
310        r"(?i)(?:password|passwd|pwd)\s*[:=]\s*[\x27\x22]?\S{4,}[\x27\x22]?",
311        "Mot de passe en clair",
312    ),
313    (
314        r"(?i)(?:token|auth[_-]?token)\s*[:=]\s*[\x27\x22]?\S{20,}[\x27\x22]?",
315        "Token en clair",
316    ),
317    // Private keys
318    (
319        r"-----BEGIN (?:RSA|DSA|EC|OPENSSH|PGP) PRIVATE KEY-----",
320        "Clé privée",
321    ),
322    (r"-----BEGIN PRIVATE KEY-----", "Clé privée"),
323    // JWT tokens
324    (
325        r"eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}",
326        "JWT Token",
327    ),
328    // Slack
329    (r"xox[baprs]-[0-9a-zA-Z-]{10,}", "Slack Token"),
330    // Stripe
331    (r"sk_live_[0-9a-zA-Z]{24,}", "Stripe Live Key"),
332    (r"pk_live_[0-9a-zA-Z]{24,}", "Stripe Live Publishable Key"),
333    // Google
334    (r"AIza[0-9A-Za-z_-]{35}", "Google API Key"),
335    // Agent files
336    (
337        r"(?i)\.(?:sparrow|codex|agent|ai|llm)",
338        "Fichier de config agent IA",
339    ),
340];
341
342/// Sensitive filenames that should never be committed.
343static SENSITIVE_FILENAMES: &[&str] = &[
344    ".env",
345    ".env.local",
346    ".env.production",
347    ".env.development",
348    "credentials.json",
349    "service-account.json",
350    "secrets.yaml",
351    "secrets.yml",
352    ".netrc",
353    ".npmrc",
354    ".pypirc",
355    "id_rsa",
356    "id_ed25519",
357    "id_ecdsa",
358    "*.pem",
359    "*.key",
360    "*.p12",
361    "*.pfx",
362];
363
364/// Scan a file's content for secrets.
365fn scan_content(content: &str, path: &std::path::Path) -> Vec<String> {
366    let mut findings = Vec::new();
367
368    // Check filename
369    let filename = path
370        .file_name()
371        .map(|n| n.to_string_lossy())
372        .unwrap_or_default();
373
374    for &sensitive in SENSITIVE_FILENAMES {
375        if let Some(ext) = sensitive.strip_prefix('*') {
376            // e.g., ".pem" from "*.pem"
377            if filename.ends_with(ext) {
378                findings.push(format!("Fichier sensible ({sensitive})"));
379            }
380        } else if filename == sensitive {
381            findings.push(format!("Fichier sensible ({sensitive})"));
382        }
383    }
384
385    // Check content patterns
386    for &(pattern, label) in SECRET_PATTERNS {
387        if let Ok(re) = regex::Regex::new(pattern) {
388            if re.is_match(content) {
389                // Find the matching line for context
390                for (line_num, line) in content.lines().enumerate() {
391                    if re.is_match(line) {
392                        // Redact the secret part for display
393                        let redacted = re.replace(line, |_caps: &regex::Captures| "***REDACTED***");
394                        findings.push(format!(
395                            "{} (ligne {}) : {}",
396                            label,
397                            line_num + 1,
398                            redacted.trim(),
399                        ));
400                        break; // One finding per pattern per file
401                    }
402                }
403            }
404        }
405    }
406
407    findings
408}
409
410// ─── Embedded pre-commit hook script (shell) ─────────────────────────────────
411
412/// The shell script that is installed as a Git pre-commit hook.
413/// It runs `sparrow hook scan` to check staged files.
414pub const PRE_COMMIT_HOOK_SCRIPT: &str = r#"#!/usr/bin/env bash
415# Sparrow Pre-Commit Hook
416# Scans staged files for secrets, tokens, and sensitive patterns.
417# Installed by: sparrow hook install
418# Docs: https://github.com/ucav/Sparrow
419
420set -euo pipefail
421
422RED='\033[0;31m'
423YELLOW='\033[1;33m'
424GREEN='\033[0;32m'
425NC='\033[0m' # No Color
426
427echo -e "${YELLOW}🔒 Sparrow pre-commit hook — scanning staged files...${NC}"
428
429# ─── 1. Check for sensitive filenames ────────────────────────────────
430STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM 2>/dev/null || true)
431
432if [ -z "$STAGED_FILES" ]; then
433    echo -e "${GREEN}✓ No staged files to scan.${NC}"
434    exit 0
435fi
436
437SENSITIVE_NAMES=(
438    ".env" ".env.local" ".env.production" ".env.development"
439    "credentials.json" "service-account.json" "secrets.yaml" "secrets.yml"
440    ".netrc" ".npmrc" ".pypirc"
441    "id_rsa" "id_ed25519" "id_ecdsa"
442)
443
444ISSUES=0
445
446for file in $STAGED_FILES; do
447    filename=$(basename "$file")
448    for sensitive in "${SENSITIVE_NAMES[@]}"; do
449        if [ "$filename" = "$sensitive" ]; then
450            echo -e "${RED}✗ BLOCKED: Sensitive file staged: $file${NC}"
451            echo -e "  → Remove it: git rm --cached $file"
452            ISSUES=$((ISSUES + 1))
453        fi
454    done
455
456    # Check for .pem, .key, .p12, .pfx extensions
457    case "$filename" in
458        *.pem|*.key|*.p12|*.pfx)
459            echo -e "${RED}✗ BLOCKED: Private key file staged: $file${NC}"
460            echo -e "  → Remove it: git rm --cached $file"
461            ISSUES=$((ISSUES + 1))
462            ;;
463    esac
464
465    # Check for agent config directories
466    case "$file" in
467        .sparrow/*|.codex/*|.agent/*)
468            echo -e "${YELLOW}⚠ WARNING: Agent config directory staged: $file${NC}"
469            echo -e "  → Consider adding to .gitignore"
470            ;;
471    esac
472
473    # ─── 2. Scan content of staged files ─────────────────────────────
474    if [ -f "$file" ]; then
475        # Get the staged content (not working tree)
476        STAGED_CONTENT=$(git show ":$file" 2>/dev/null || true)
477
478        if [ -n "$STAGED_CONTENT" ]; then
479            # Check for common API key patterns
480            # GitHub tokens
481            if echo "$STAGED_CONTENT" | grep -qE 'ghp_[0-9a-zA-Z]{36}|github_pat_[0-9a-zA-Z_]{36,}'; then
482                echo -e "${RED}✗ BLOCKED: GitHub token detected in $file${NC}"
483                ISSUES=$((ISSUES + 1))
484            fi
485
486            # OpenAI keys
487            if echo "$STAGED_CONTENT" | grep -qE 'sk-[0-9a-zA-Z]{32,}|sk-proj-[0-9a-zA-Z]{32,}'; then
488                echo -e "${RED}✗ BLOCKED: OpenAI API key detected in $file${NC}"
489                ISSUES=$((ISSUES + 1))
490            fi
491
492            # Anthropic keys
493            if echo "$STAGED_CONTENT" | grep -qE 'sk-ant-[0-9a-zA-Z]{32,}'; then
494                echo -e "${RED}✗ BLOCKED: Anthropic API key detected in $file${NC}"
495                ISSUES=$((ISSUES + 1))
496            fi
497
498            # AWS keys
499            if echo "$STAGED_CONTENT" | grep -qE 'AKIA[0-9A-Z]{16}'; then
500                echo -e "${RED}✗ BLOCKED: AWS Access Key detected in $file${NC}"
501                ISSUES=$((ISSUES + 1))
502            fi
503
504            # Generic API key patterns
505            if echo "$STAGED_CONTENT" | grep -qiE '(api[_-]?key|api[_-]?secret|secret[_-]?key)\s*[:=]\s*['"'"'"]?\w{20,}'; then
506                echo -e "${YELLOW}⚠ WARNING: Possible API key in $file${NC}"
507                echo -e "  → Review and use environment variables instead"
508            fi
509
510            # Private keys
511            if echo "$STAGED_CONTENT" | grep -qE '-----BEGIN (RSA|DSA|EC|OPENSSH|PGP) PRIVATE KEY-----'; then
512                echo -e "${RED}✗ BLOCKED: Private key detected in $file${NC}"
513                ISSUES=$((ISSUES + 1))
514            fi
515
516            # Password/token assignment
517            if echo "$STAGED_CONTENT" | grep -qiE '(password|passwd|pwd|token|auth[_-]?token)\s*[:=]\s*['"'"'"]?\S{4,}'; then
518                echo -e "${YELLOW}⚠ WARNING: Possible hardcoded password/token in $file${NC}"
519            fi
520        fi
521    fi
522done
523
524# ─── 3. Result ────────────────────────────────────────────────────────────────
525if [ $ISSUES -gt 0 ]; then
526    echo ""
527    echo -e "${RED}══════════════════════════════════════════════════${NC}"
528    echo -e "${RED}  COMMIT BLOQUÉ — $ISSUES problème(s) de sécurité${NC}"
529    echo -e "${RED}══════════════════════════════════════════════════${NC}"
530    echo ""
531    echo "Pour ignorer (déconseillé) : git commit --no-verify"
532    echo "Pour enlever un fichier du stage : git rm --cached <fichier>"
533    echo "Pour désinstaller ce hook : rm .git/hooks/pre-commit"
534    exit 1
535else
536    echo -e "${GREEN}✓ Aucun problème détecté — commit autorisé.${NC}"
537    exit 0
538fi
539"#;