Skip to main content

st/
security_scan.rs

1//! Security Scanner for detecting supply chain attack patterns
2//!
3//! Scans directories for malicious patterns including:
4//! - IPFS/IPNS phone-home endpoints
5//! - Fake cryptographic verification
6//! - Dynamic npm package execution
7//! - Known malicious package references
8
9use anyhow::Result;
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::fs;
14use std::path::{Path, PathBuf};
15use walkdir::WalkDir;
16
17/// Risk level for detected patterns
18#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
19pub enum RiskLevel {
20    Low,
21    Medium,
22    High,
23    Critical,
24}
25
26impl std::fmt::Display for RiskLevel {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        match self {
29            RiskLevel::Low => write!(f, "LOW"),
30            RiskLevel::Medium => write!(f, "MEDIUM"),
31            RiskLevel::High => write!(f, "HIGH"),
32            RiskLevel::Critical => write!(f, "CRITICAL"),
33        }
34    }
35}
36
37/// A detected security pattern
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct SecurityFinding {
40    pub file_path: PathBuf,
41    pub line_number: usize,
42    pub pattern_name: String,
43    pub matched_text: String,
44    pub risk_level: RiskLevel,
45    pub description: String,
46}
47
48/// Pattern definition for security scanning
49struct Pattern {
50    name: &'static str,
51    regex: Regex,
52    risk_level: RiskLevel,
53    description: &'static str,
54}
55
56/// Security scanner configuration
57pub struct SecurityScanner {
58    patterns: Vec<Pattern>,
59    /// Paths that indicate executable context (higher risk)
60    executable_paths: Vec<&'static str>,
61    /// Paths that indicate history/logs (lower risk)
62    history_paths: Vec<&'static str>,
63}
64
65impl SecurityScanner {
66    pub fn new() -> Self {
67        let patterns = vec![
68            // IPFS Gateway URLs - phone home endpoints
69            Pattern {
70                name: "IPFS Gateway",
71                regex: Regex::new(r"https?://(ipfs\.io|dweb\.link|cloudflare-ipfs\.com|gateway\.pinata\.cloud|w3s\.link|4everland\.io)").unwrap(),
72                risk_level: RiskLevel::High,
73                description: "IPFS gateway URL detected - may fetch remote content",
74            },
75            // IPNS name patterns (mutable addressing)
76            Pattern {
77                name: "IPNS Name",
78                regex: Regex::new(r"k51qzi5uqu5[a-z0-9]{40,}").unwrap(),
79                risk_level: RiskLevel::Critical,
80                description: "IPNS mutable name - content can be changed by key holder",
81            },
82            // Dynamic npm execution with volatile tags
83            Pattern {
84                name: "Dynamic NPX",
85                regex: Regex::new(r"npx\s+[\w@/-]+@(alpha|beta|latest|next|canary)").unwrap(),
86                risk_level: RiskLevel::High,
87                description: "Dynamic npm execution - package content can change anytime",
88            },
89            // Known malicious packages
90            Pattern {
91                name: "Known Risk Package",
92                regex: Regex::new(r"(claude-flow|agentic-flow|ruv-swarm|flow-nexus|hive-mind|superdisco|agent-booster)(@|\s|$|/)").unwrap(),
93                risk_level: RiskLevel::Critical,
94                description: "Known supply chain risk package with remote injection capability",
95            },
96            // Fake signature verification (length-only check)
97            Pattern {
98                name: "Fake Verification",
99                regex: Regex::new(r"\.length\s*===?\s*(64|128|256)\s*[;)}]").unwrap(),
100                risk_level: RiskLevel::Critical,
101                description: "Fake cryptographic verification - checks length instead of signature",
102            },
103            // Registry signature without actual verification
104            Pattern {
105                name: "Unverified Signature",
106                regex: Regex::new(r"registrySignature.*randomBytes|crypto\.randomBytes.*signature").unwrap(),
107                risk_level: RiskLevel::Critical,
108                description: "Random bytes used as signature - no actual verification",
109            },
110            // Pattern fetching from remote
111            Pattern {
112                name: "Remote Pattern Fetch",
113                regex: Regex::new(r"fetch.*pattern|pattern.*fetch|loadPattern.*http|http.*loadPattern").unwrap(),
114                risk_level: RiskLevel::High,
115                description: "Remote pattern/behavior fetching detected",
116            },
117            // Silent failure patterns (never throw on verification)
118            Pattern {
119                name: "Silent Failure",
120                regex: Regex::new(r"catch\s*\([^)]*\)\s*\{[^}]*return\s+(true|null|\[\]|\{\})[^}]*\}").unwrap(),
121                risk_level: RiskLevel::Medium,
122                description: "Silent failure on error - may hide security issues",
123            },
124            // Hooks that auto-execute
125            Pattern {
126                name: "Auto Hook",
127                regex: Regex::new(r"(PreToolUse|PostToolUse|UserPromptSubmit|SessionStart).*npx").unwrap(),
128                risk_level: RiskLevel::High,
129                description: "Hook configured to auto-execute npm package",
130            },
131            // Bootstrap registries (hardcoded IPNS endpoints)
132            Pattern {
133                name: "Bootstrap Registry",
134                regex: Regex::new(r"BOOTSTRAP_REGISTRIES|bootstrapRegistries|bootstrap.*registry").unwrap(),
135                risk_level: RiskLevel::Critical,
136                description: "Hardcoded bootstrap registry detected - potential phone-home mechanism",
137            },
138            // Fallback CID generation (fabricates fake CIDs)
139            Pattern {
140                name: "Fake CID Generation",
141                regex: Regex::new(r"generateFallbackCID|fallbackCid|bafybei.*sha256").unwrap(),
142                risk_level: RiskLevel::Critical,
143                description: "Fake CID generation - breaks IPFS content-addressing trust",
144            },
145            // Genesis registry patterns
146            Pattern {
147                name: "Genesis Registry",
148                regex: Regex::new(r"getGenesisRegistry|seraphine-genesis|genesis.*pattern").unwrap(),
149                risk_level: RiskLevel::Critical,
150                description: "Hardcoded genesis registry - guaranteed fallback payload",
151            },
152            // Pattern/behavior injection
153            Pattern {
154                name: "Behavior Injection",
155                regex: Regex::new(r"coordination.*trajectories|routing.*patterns|swarm.*patterns").unwrap(),
156                risk_level: RiskLevel::High,
157                description: "Behavioral pattern injection - may modify AI reasoning",
158            },
159        ];
160
161        Self {
162            patterns,
163            executable_paths: vec![
164                "commands",
165                "hooks",
166                "scripts",
167                "bin",
168                ".claude/commands",
169                "node_modules/.bin",
170            ],
171            history_paths: vec![
172                "shell-snapshots",
173                "history",
174                "logs",
175                ".bash_history",
176                ".zsh_history",
177            ],
178        }
179    }
180
181    /// Scan a directory for security patterns
182    /// Unlike normal st, this IGNORES gitignore and scans everything
183    pub fn scan_directory(&self, path: &Path) -> Result<Vec<SecurityFinding>> {
184        let mut findings = Vec::new();
185
186        // Walk directory without respecting gitignore
187        for entry in WalkDir::new(path)
188            .follow_links(false)
189            .into_iter()
190            .filter_map(|e| e.ok())
191        {
192            let file_path = entry.path();
193
194            // Skip binary files and very large files
195            if !self.should_scan_file(file_path) {
196                continue;
197            }
198
199            // Read and scan file contents
200            if let Ok(content) = fs::read_to_string(file_path) {
201                self.scan_content(file_path, &content, &mut findings);
202            }
203        }
204
205        // Sort by risk level (critical first)
206        findings.sort_by(|a, b| b.risk_level.cmp(&a.risk_level));
207
208        Ok(findings)
209    }
210
211    fn should_scan_file(&self, path: &Path) -> bool {
212        // Skip binary extensions
213        let skip_extensions = [
214            "png", "jpg", "jpeg", "gif", "ico", "webp", "svg", "woff", "woff2", "ttf", "otf",
215            "eot", "mp3", "mp4", "wav", "ogg", "webm", "zip", "tar", "gz", "bz2", "xz", "7z",
216            "exe", "dll", "so", "dylib", "pdf", "doc", "docx", "xls", "xlsx", "pyc", "pyo",
217            "class", "o", "a", "wasm", "mem8",
218        ];
219
220        if let Some(ext) = path.extension() {
221            if skip_extensions.contains(&ext.to_string_lossy().to_lowercase().as_str()) {
222                return false;
223            }
224        }
225
226        // Skip very large files (>10MB)
227        if let Ok(metadata) = fs::metadata(path) {
228            if metadata.len() > 10 * 1024 * 1024 {
229                return false;
230            }
231        }
232
233        // Must be a file
234        path.is_file()
235    }
236
237    /// Scan content for security patterns (public API)
238    /// Returns findings for a single file's content
239    pub fn scan_file_content(&self, file_path: &Path, content: &str) -> Vec<SecurityFinding> {
240        let mut findings = Vec::new();
241        self.scan_content(file_path, content, &mut findings);
242        findings
243    }
244
245    fn scan_content(&self, file_path: &Path, content: &str, findings: &mut Vec<SecurityFinding>) {
246        let path_str = file_path.to_string_lossy();
247
248        // Determine if this is an executable context, documentation, or history
249        let is_executable = self.executable_paths.iter().any(|p| path_str.contains(p));
250        let is_history = self.history_paths.iter().any(|p| path_str.contains(p));
251        let is_documentation = matches!(
252            file_path.extension().and_then(|e| e.to_str()),
253            Some("txt" | "md" | "rst" | "adoc" | "org")
254        ) || path_str.contains("docs/") || path_str.contains("doc/");
255
256        for (line_number, line) in content.lines().enumerate() {
257            for pattern in &self.patterns {
258                if let Some(m) = pattern.regex.find(line) {
259                    // Adjust risk based on context
260                    let adjusted_risk = if is_documentation {
261                        // Documentation/notes just *mention* packages — not a real threat
262                        match pattern.risk_level {
263                            RiskLevel::Critical => RiskLevel::Low,
264                            RiskLevel::High => RiskLevel::Low,
265                            _ => continue, // skip Medium/Low entirely for docs
266                        }
267                    } else if is_history {
268                        // History files are lower risk
269                        match pattern.risk_level {
270                            RiskLevel::Critical => RiskLevel::Medium,
271                            RiskLevel::High => RiskLevel::Low,
272                            other => other,
273                        }
274                    } else if is_executable {
275                        // Executable context keeps or elevates risk
276                        pattern.risk_level
277                    } else {
278                        pattern.risk_level
279                    };
280
281                    findings.push(SecurityFinding {
282                        file_path: file_path.to_path_buf(),
283                        line_number: line_number + 1,
284                        pattern_name: pattern.name.to_string(),
285                        matched_text: m.as_str().to_string(),
286                        risk_level: adjusted_risk,
287                        description: pattern.description.to_string(),
288                    });
289                }
290            }
291        }
292    }
293
294    /// Generate a summary report
295    pub fn generate_report(&self, findings: &[SecurityFinding]) -> String {
296        let mut report = String::new();
297
298        report.push_str("\n\u{1F50D} Security Scan Results\n");
299        report.push_str("═══════════════════════════════════════════════════════════════\n\n");
300
301        if findings.is_empty() {
302            report.push_str("\u{2705} No security patterns detected.\n");
303            return report;
304        }
305
306        // Count by risk level
307        let mut by_risk: HashMap<RiskLevel, Vec<&SecurityFinding>> = HashMap::new();
308        for finding in findings {
309            by_risk.entry(finding.risk_level).or_default().push(finding);
310        }
311
312        // Summary
313        report.push_str("\u{1F4CA} Summary:\n");
314        for level in [
315            RiskLevel::Critical,
316            RiskLevel::High,
317            RiskLevel::Medium,
318            RiskLevel::Low,
319        ] {
320            if let Some(findings) = by_risk.get(&level) {
321                let icon = match level {
322                    RiskLevel::Critical => "\u{1F6A8}",
323                    RiskLevel::High => "\u{26A0}\u{FE0F}",
324                    RiskLevel::Medium => "\u{1F7E1}",
325                    RiskLevel::Low => "\u{1F535}",
326                };
327                report.push_str(&format!(
328                    "  {} {}: {} findings\n",
329                    icon,
330                    level,
331                    findings.len()
332                ));
333            }
334        }
335        report.push('\n');
336
337        // Detailed findings by risk level
338        for level in [
339            RiskLevel::Critical,
340            RiskLevel::High,
341            RiskLevel::Medium,
342            RiskLevel::Low,
343        ] {
344            if let Some(findings) = by_risk.get(&level) {
345                let header = match level {
346                    RiskLevel::Critical => "\u{1F6A8} CRITICAL RISK",
347                    RiskLevel::High => "\u{26A0}\u{FE0F} HIGH RISK",
348                    RiskLevel::Medium => "\u{1F7E1} MEDIUM RISK",
349                    RiskLevel::Low => "\u{1F535} LOW RISK",
350                };
351                report.push_str(&format!("\n{}\n", header));
352                report.push_str(&"-".repeat(60));
353                report.push('\n');
354
355                // Group by pattern name
356                let mut by_pattern: HashMap<&str, Vec<&&SecurityFinding>> = HashMap::new();
357                for finding in findings {
358                    by_pattern
359                        .entry(&finding.pattern_name)
360                        .or_default()
361                        .push(finding);
362                }
363
364                for (pattern_name, pattern_findings) in by_pattern {
365                    report.push_str(&format!(
366                        "\n  \u{1F50E} {} ({} occurrences)\n",
367                        pattern_name,
368                        pattern_findings.len()
369                    ));
370                    report.push_str(&format!("     {}\n", pattern_findings[0].description));
371
372                    // Show first 5 files
373                    for (i, finding) in pattern_findings.iter().take(5).enumerate() {
374                        let short_path = finding.file_path.to_string_lossy();
375                        // Truncate long paths
376                        let display_path = if short_path.len() > 60 {
377                            format!("...{}", &short_path[short_path.len() - 57..])
378                        } else {
379                            short_path.to_string()
380                        };
381                        report.push_str(&format!(
382                            "     {}. {}:{}\n",
383                            i + 1,
384                            display_path,
385                            finding.line_number
386                        ));
387                        report.push_str(&format!(
388                            "        Match: {}\n",
389                            truncate(&finding.matched_text, 50)
390                        ));
391                    }
392                    if pattern_findings.len() > 5 {
393                        report.push_str(&format!(
394                            "     ... and {} more\n",
395                            pattern_findings.len() - 5
396                        ));
397                    }
398                }
399            }
400        }
401
402        // Recommendations
403        report.push_str("\n\n\u{1F6E1}\u{FE0F} Recommendations:\n");
404        report.push_str("═══════════════════════════════════════════════════════════════\n");
405
406        if by_risk.contains_key(&RiskLevel::Critical) || by_risk.contains_key(&RiskLevel::High) {
407            report.push_str("  1. Run: st --ai-install --cleanup\n");
408            report.push_str("     To review and remove untrusted MCP integrations\n\n");
409            report.push_str("  2. Manually audit ~/.claude/settings.json\n");
410            report.push_str("     Remove any hooks referencing suspicious packages\n\n");
411            report.push_str("  3. Delete ~/.claude/commands/ directories with risky content\n");
412            report.push_str("     These are active skills that execute on slash commands\n\n");
413            report.push_str("  4. DO NOT reinstall the flagged packages from npm\n");
414            report.push_str("     They will re-add themselves to your configuration\n");
415        } else {
416            report.push_str("  No critical actions required.\n");
417            report.push_str("  Continue monitoring for new patterns.\n");
418        }
419
420        report
421    }
422}
423
424impl Default for SecurityScanner {
425    fn default() -> Self {
426        Self::new()
427    }
428}
429
430fn truncate(s: &str, max_len: usize) -> String {
431    if s.len() > max_len {
432        format!("{}...", &s[..max_len - 3])
433    } else {
434        s.to_string()
435    }
436}
437
438#[cfg(test)]
439mod tests {
440    use super::*;
441
442    #[test]
443    fn test_ipfs_detection() {
444        let scanner = SecurityScanner::new();
445        let content = r#"const url = "https://ipfs.io/ipfs/QmTest";"#;
446        let mut findings = Vec::new();
447        scanner.scan_content(Path::new("test.js"), content, &mut findings);
448        assert!(!findings.is_empty());
449        assert_eq!(findings[0].pattern_name, "IPFS Gateway");
450    }
451
452    #[test]
453    fn test_claude_flow_detection() {
454        let scanner = SecurityScanner::new();
455        let content = "npx claude-flow@alpha swarm init";
456        let mut findings = Vec::new();
457        scanner.scan_content(Path::new("test.md"), content, &mut findings);
458        assert!(findings
459            .iter()
460            .any(|f| f.pattern_name == "Known Risk Package"));
461    }
462
463    #[test]
464    fn test_fake_verification_detection() {
465        let scanner = SecurityScanner::new();
466        let content = "return signature.length === 64;";
467        let mut findings = Vec::new();
468        scanner.scan_content(Path::new("verify.ts"), content, &mut findings);
469        assert!(findings
470            .iter()
471            .any(|f| f.pattern_name == "Fake Verification"));
472    }
473
474    #[test]
475    fn test_additional_malicious_packages() {
476        let scanner = SecurityScanner::new();
477        let content = "npm install hive-mind flow-nexus ruv-swarm";
478        let mut findings = Vec::new();
479        scanner.scan_content(Path::new("test.sh"), content, &mut findings);
480        assert!(findings
481            .iter()
482            .any(|f| f.pattern_name == "Known Risk Package"));
483    }
484
485    #[test]
486    fn test_additional_ipfs_gateways() {
487        let scanner = SecurityScanner::new();
488        let test_cases = vec![
489            "https://4everland.io/ipfs/Qm123",
490            "https://cloudflare-ipfs.com/ipfs/Qm456",
491            "https://gateway.pinata.cloud/ipfs/Qm789",
492            "https://w3s.link/ipfs/QmAbc",
493        ];
494        for content in test_cases {
495            let mut findings = Vec::new();
496            scanner.scan_content(Path::new("test.ts"), content, &mut findings);
497            assert!(
498                findings.iter().any(|f| f.pattern_name == "IPFS Gateway"),
499                "Failed to detect IPFS gateway in: {}",
500                content
501            );
502        }
503    }
504
505    #[test]
506    fn test_volatile_npm_tags() {
507        let scanner = SecurityScanner::new();
508        let content = "npx some-package@canary run-command";
509        let mut findings = Vec::new();
510        scanner.scan_content(Path::new("test.sh"), content, &mut findings);
511        assert!(findings.iter().any(|f| f.pattern_name == "Dynamic NPX"));
512    }
513
514    #[test]
515    fn test_bootstrap_registry_detection() {
516        let scanner = SecurityScanner::new();
517        let content = r#"
518            export const BOOTSTRAP_REGISTRIES = [
519                { name: 'test', ipnsName: 'k51...' }
520            ];
521        "#;
522        let mut findings = Vec::new();
523        scanner.scan_content(Path::new("registry.ts"), content, &mut findings);
524        assert!(findings
525            .iter()
526            .any(|f| f.pattern_name == "Bootstrap Registry"));
527    }
528
529    #[test]
530    fn test_fake_cid_generation() {
531        let scanner = SecurityScanner::new();
532        let content = r#"
533            const fallbackCid = generateFallbackCID(ipnsName);
534            const hash = crypto.createHash('sha256').update(input).digest();
535        "#;
536        let mut findings = Vec::new();
537        scanner.scan_content(Path::new("discovery.ts"), content, &mut findings);
538        assert!(findings
539            .iter()
540            .any(|f| f.pattern_name == "Fake CID Generation"));
541    }
542
543    #[test]
544    fn test_genesis_registry_detection() {
545        let scanner = SecurityScanner::new();
546        let content = r#"
547            private getGenesisRegistry(cid: string) {
548                return { id: 'seraphine-genesis-v1', ... };
549            }
550        "#;
551        let mut findings = Vec::new();
552        scanner.scan_content(Path::new("discovery.ts"), content, &mut findings);
553        assert!(findings
554            .iter()
555            .any(|f| f.pattern_name == "Genesis Registry"));
556    }
557
558    #[test]
559    fn test_behavior_injection_detection() {
560        let scanner = SecurityScanner::new();
561        let content = r#"
562            const patterns = {
563                "coordination trajectories": [...],
564                "routing patterns": [...]
565            };
566        "#;
567        let mut findings = Vec::new();
568        scanner.scan_content(Path::new("patterns.ts"), content, &mut findings);
569        assert!(findings
570            .iter()
571            .any(|f| f.pattern_name == "Behavior Injection"));
572    }
573
574    #[test]
575    fn test_auto_hook_detection() {
576        let scanner = SecurityScanner::new();
577        let content = r#"
578            "hooks": {
579                "PreToolUse": ["npx claude-flow@alpha ..."],
580                "SessionStart": ["npx agentic-flow@beta ..."]
581            }
582        "#;
583        let mut findings = Vec::new();
584        scanner.scan_content(Path::new("settings.json"), content, &mut findings);
585        assert!(findings.iter().any(|f| f.pattern_name == "Auto Hook"));
586    }
587}