Skip to main content

nyx_scanner/patterns/
mod.rs

1//! AST pattern matching: tree-sitter queries over dangerous structural shapes.
2//!
3//! Patterns match constructs based on syntax alone, with no dataflow or CFG.
4//! A match means the construct is present; it is not proof that it is
5//! reachable or exploitable. Patterns run in every analysis mode and are the
6//! only active detector in `--mode ast`.
7//!
8//! # Rule ID format
9//!
10//! ```text
11//! <lang>.<category>.<name>
12//! ```
13//!
14//! Examples: `js.code_exec.eval`, `py.deser.pickle_loads`, `c.memory.gets`,
15//! `java.sqli.execute_concat`.
16//!
17//! # Tiers
18//!
19//! - **Tier A**: structural presence alone is high-signal. `gets`, `eval`,
20//!   `pickle.loads`, `mem::transmute`. No guard needed.
21//! - **Tier B**: pattern includes a tree-sitter heuristic guard.
22//!   `java.sqli.execute_concat` fires only when `executeQuery` receives a
23//!   `binary_expression` (concatenation), not a literal or parameterized call.
24//!
25//! # Categories
26//!
27//! | Category | Examples |
28//! |----------|---------|
29//! | `CommandExec` | `system`, `os.system`, `Runtime.exec`, backticks |
30//! | `CodeExec` | `eval`, `Function`, PHP `assert("string")`, `class_eval` |
31//! | `Deserialization` | `pickle.loads`, `yaml.load`, `Marshal.load`, `readObject` |
32//! | `SqlInjection` | `executeQuery` with concatenated argument (Tier B) |
33//! | `PathTraversal` | PHP `include $var` |
34//! | `Xss` | `innerHTML`, `document.write`, `insertAdjacentHTML` |
35//! | `Crypto` | `md5`, `sha1`, `Math.random` for security use |
36//! | `Secrets` | Hardcoded API keys (Go, JS, TS) |
37//! | `InsecureTransport` | `InsecureSkipVerify`, `fetch("http://...")` |
38//! | `Reflection` | `Class.forName`, `Method.invoke`, `constantize` |
39//! | `MemorySafety` | `transmute`, `unsafe`, `gets`, `strcpy`, `sprintf` |
40//! | `Prototype` | `__proto__` assignment, `Object.prototype.*` |
41//! | `Config` | CORS dynamic origin, `rejectUnauthorized: false` |
42//! | `CodeQuality` | `unwrap`, `panic!`, `as any` |
43//!
44//! # Pattern loading
45//!
46//! Each language submodule exports a `patterns()` function returning
47//! `&'static [Pattern]`. [`load`] dispatches to the correct submodule by
48//! language slug. [`Pattern`] carries the rule ID, severity, confidence,
49//! category, and the tree-sitter query string.
50
51pub mod c;
52pub mod cpp;
53pub mod ejs;
54mod go;
55mod java;
56pub mod javascript;
57mod php;
58mod python;
59mod ruby;
60pub mod rust;
61pub mod typescript;
62
63use crate::evidence::Confidence;
64use console::style;
65use once_cell::sync::Lazy;
66use serde::{Deserialize, Serialize};
67use std::collections::HashMap;
68use std::fmt;
69use std::str::FromStr;
70
71#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
72pub enum Severity {
73    High,
74    Medium,
75    Low,
76}
77
78impl Severity {
79    /// Bracketed, colored, fixed-width tag for aligned console output.
80    ///
81    /// Returns e.g. `"[HIGH]  "` or `"[MEDIUM]"`, always 8 visible characters
82    /// so the column after the tag lines up regardless of severity.
83    #[allow(dead_code)] // public API for lib consumers
84    pub fn colored_tag(self) -> String {
85        // Visible widths: "[HIGH]" = 6, "[MEDIUM]" = 8, "[LOW]" = 5.
86        // Pad the *whole* tag to 8 visible chars (the longest, "[MEDIUM]").
87        let (label, styled_fn): (&str, fn(&str) -> String) = match self {
88            Severity::High => ("HIGH", |s| style(s).red().bold().to_string()),
89            Severity::Medium => ("MEDIUM", |s| style(s).color256(208).bold().to_string()),
90            Severity::Low => ("LOW", |s| style(s).color256(67).to_string()),
91        };
92        let bracket_len = label.len() + 2; // "[" + label + "]"
93        let pad = 8usize.saturating_sub(bracket_len);
94        format!("[{}]{:pad$}", styled_fn(label), "", pad = pad)
95    }
96}
97
98impl fmt::Display for Severity {
99    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
100        let styled = match *self {
101            Severity::High => style("HIGH").red().bold().to_string(),
102            Severity::Medium => style("MEDIUM").color256(208).bold().to_string(),
103            Severity::Low => style("LOW").color256(67).to_string(),
104        };
105        f.write_str(&styled)
106    }
107}
108
109impl Severity {
110    /// Textual value stored in SQLite.
111    pub fn as_db_str(self) -> &'static str {
112        match self {
113            Severity::High => "HIGH",
114            Severity::Medium => "MEDIUM",
115            Severity::Low => "LOW",
116        }
117    }
118}
119
120impl FromStr for Severity {
121    type Err = String;
122
123    fn from_str(input: &str) -> Result<Self, Self::Err> {
124        match input.trim().to_ascii_uppercase().as_str() {
125            "HIGH" => Ok(Severity::High),
126            "MEDIUM" | "MED" => Ok(Severity::Medium),
127            "LOW" => Ok(Severity::Low),
128            other => Err(format!("unknown severity: '{other}'")),
129        }
130    }
131}
132
133/// A parsed severity filter expression.
134///
135/// Supports three forms:
136///   - Single level: `"HIGH"`, matches only that level
137///   - Comma list: `"HIGH,MEDIUM"`, matches any listed level
138///   - Threshold: `">=MEDIUM"`, matches that level and above
139///
140/// Parsing is case-insensitive and tolerates whitespace around tokens.
141#[derive(Debug, Clone, PartialEq, Eq)]
142pub enum SeverityFilter {
143    /// Match findings at or above this level (High >= Medium >= Low).
144    AtLeast(Severity),
145    /// Match findings whose severity is in this exact set.
146    AnyOf(Vec<Severity>),
147}
148
149impl SeverityFilter {
150    /// Parse a severity filter expression.
151    ///
152    /// Examples: `"HIGH"`, `"high,medium"`, `">=MEDIUM"`, `">= low"`.
153    pub fn parse(expr: &str) -> Result<Self, String> {
154        let trimmed = expr.trim();
155        if trimmed.is_empty() {
156            return Err("empty severity expression".into());
157        }
158
159        // Threshold form: >=LEVEL
160        if let Some(rest) = trimmed.strip_prefix(">=") {
161            let level: Severity = rest.parse()?;
162            return Ok(SeverityFilter::AtLeast(level));
163        }
164
165        // Comma-separated list (also handles single value)
166        let levels: Result<Vec<Severity>, String> = trimmed
167            .split(',')
168            .map(|tok| tok.trim().parse::<Severity>())
169            .collect();
170        let levels = levels?;
171        if levels.is_empty() {
172            return Err("empty severity expression".into());
173        }
174        // Optimise single-value list
175        if levels.len() == 1 {
176            return Ok(SeverityFilter::AnyOf(levels));
177        }
178        Ok(SeverityFilter::AnyOf(levels))
179    }
180
181    /// Returns `true` if the given severity passes this filter.
182    pub fn matches(&self, sev: Severity) -> bool {
183        match self {
184            SeverityFilter::AtLeast(threshold) => {
185                // Severity ordering: High < Medium < Low (derived Ord).
186                // "at least Medium" means sev <= Medium in Ord terms.
187                sev <= *threshold
188            }
189            SeverityFilter::AnyOf(set) => set.contains(&sev),
190        }
191    }
192}
193
194/// Pattern confidence tier.
195///
196/// * **A** – Structural presence alone is high-signal (e.g. `gets()`, `eval()`).
197/// * **B** – Requires a simple heuristic guard in the query (e.g. SQL with
198///   concatenated arg, file-open with non-literal path).
199#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
200pub enum PatternTier {
201    A,
202    B,
203}
204
205/// High-level finding category for noise reduction and prioritization.
206#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
207pub enum FindingCategory {
208    Security,
209    Reliability,
210    Quality,
211}
212
213impl std::fmt::Display for FindingCategory {
214    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
215        match self {
216            FindingCategory::Security => write!(f, "Security"),
217            FindingCategory::Reliability => write!(f, "Reliability"),
218            FindingCategory::Quality => write!(f, "Quality"),
219        }
220    }
221}
222
223/// Vulnerability class that a pattern detects.
224#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
225pub enum PatternCategory {
226    CommandExec,
227    CodeExec,
228    Deserialization,
229    SqlInjection,
230    PathTraversal,
231    Xss,
232    Crypto,
233    Secrets,
234    InsecureTransport,
235    Reflection,
236    MemorySafety,
237    Prototype,
238    InsecureConfig,
239    CodeQuality,
240}
241
242impl PatternCategory {
243    /// Map this vulnerability class to a high-level finding category.
244    pub fn finding_category(self) -> FindingCategory {
245        match self {
246            PatternCategory::CodeQuality => FindingCategory::Quality,
247            _ => FindingCategory::Security,
248        }
249    }
250}
251
252/// One AST pattern with a tree-sitter query and meta-data.
253#[derive(Debug, Clone, Serialize, PartialEq)]
254pub struct Pattern {
255    /// Unique identifier, `<lang>.<category>.<specific>` preferred.
256    pub id: &'static str,
257    /// Human-readable explanation.
258    pub description: &'static str,
259    /// tree-sitter query string.
260    pub query: &'static str,
261    /// Rough severity bucket.
262    pub severity: Severity,
263    /// Confidence tier (A = structural, B = heuristic-guarded).
264    pub tier: PatternTier,
265    /// Vulnerability class.
266    pub category: PatternCategory,
267    /// Confidence level for findings produced by this pattern.
268    pub confidence: Confidence,
269}
270
271/// Global, lazily-initialised registry: lang-name → pattern slice
272static REGISTRY: Lazy<HashMap<&'static str, &'static [Pattern]>> = Lazy::new(|| {
273    let mut m = HashMap::new();
274
275    // ---- Rust ----
276    m.insert("rust", rust::PATTERNS);
277
278    // ---- TypeScript ----
279    m.insert("typescript", typescript::PATTERNS);
280    m.insert("ts", typescript::PATTERNS);
281    m.insert("tsx", typescript::PATTERNS);
282
283    // ---- JavaScript ----
284    m.insert("javascript", javascript::PATTERNS);
285    m.insert("js", javascript::PATTERNS);
286
287    // ---- C & C++ ----
288    m.insert("c", c::PATTERNS);
289    m.insert("cpp", cpp::PATTERNS);
290    m.insert("c++", cpp::PATTERNS);
291
292    // ---- Other patterns in the folder ----
293    m.insert("java", java::PATTERNS);
294    m.insert("go", go::PATTERNS);
295    m.insert("php", php::PATTERNS);
296    m.insert("python", python::PATTERNS);
297    m.insert("py", python::PATTERNS);
298    m.insert("ruby", ruby::PATTERNS);
299    m.insert("rb", ruby::PATTERNS);
300
301    tracing::debug!("AST-pattern registry initialised ({} patterns)", m.len());
302
303    m
304});
305
306/// Return all patterns for the requested language (case-insensitive).
307///
308/// Unknown languages yield an **empty** `Vec`. This function cannot
309/// fail: the registry is pure static data (no tree-sitter queries are
310/// compiled here). Compilation is deferred to `crate::utils::query_cache`,
311/// which drops malformed queries via `filter_map` + warn-log rather
312/// than panicking.
313pub fn load(lang: &str) -> Vec<Pattern> {
314    let key = lang.to_ascii_lowercase();
315    REGISTRY.get(key.as_str()).copied().unwrap_or(&[]).to_vec()
316}
317
318#[test]
319fn severity_as_db_str_roundtrip() {
320    for &s in &[Severity::High, Severity::Medium, Severity::Low] {
321        let db = s.as_db_str();
322        assert!(matches!(db, "HIGH" | "MEDIUM" | "LOW"));
323
324        assert_eq!(db.parse::<Severity>().unwrap(), s);
325        assert_eq!(db.to_lowercase().parse::<Severity>().unwrap(), s);
326    }
327}
328
329#[test]
330fn severity_display_contains_uppercase_name() {
331    assert!(Severity::High.to_string().contains("HIGH"));
332    assert!(Severity::Medium.to_string().contains("MEDIUM"));
333    assert!(Severity::Low.to_string().contains("LOW"));
334}
335
336#[test]
337fn load_returns_correct_pattern_slices() {
338    let rust = load("rust");
339    assert!(!rust.is_empty(), "Rust patterns should be loaded");
340
341    let ts = load("typescript");
342    let tsx = load("tsx");
343    assert_eq!(ts, tsx, "alias ‘tsx’ must map to TypeScript patterns");
344
345    assert_eq!(load("RUST"), rust);
346
347    assert!(load("brainfuck").is_empty());
348}
349
350#[test]
351fn severity_from_str_rejects_unknown() {
352    assert!("garbage".parse::<Severity>().is_err());
353}
354
355#[test]
356fn severity_filter_single() {
357    let f = SeverityFilter::parse("HIGH").unwrap();
358    assert!(f.matches(Severity::High));
359    assert!(!f.matches(Severity::Medium));
360    assert!(!f.matches(Severity::Low));
361}
362
363#[test]
364fn severity_filter_comma_list() {
365    let f = SeverityFilter::parse("HIGH,MEDIUM").unwrap();
366    assert!(f.matches(Severity::High));
367    assert!(f.matches(Severity::Medium));
368    assert!(!f.matches(Severity::Low));
369}
370
371#[test]
372fn severity_filter_threshold() {
373    let f = SeverityFilter::parse(">=MEDIUM").unwrap();
374    assert!(f.matches(Severity::High));
375    assert!(f.matches(Severity::Medium));
376    assert!(!f.matches(Severity::Low));
377
378    let f2 = SeverityFilter::parse(">=LOW").unwrap();
379    assert!(f2.matches(Severity::High));
380    assert!(f2.matches(Severity::Medium));
381    assert!(f2.matches(Severity::Low));
382
383    let f3 = SeverityFilter::parse(">=HIGH").unwrap();
384    assert!(f3.matches(Severity::High));
385    assert!(!f3.matches(Severity::Medium));
386}
387
388#[test]
389fn severity_filter_case_insensitive_and_whitespace() {
390    let f = SeverityFilter::parse("  high , medium  ").unwrap();
391    assert!(f.matches(Severity::High));
392    assert!(f.matches(Severity::Medium));
393    assert!(!f.matches(Severity::Low));
394
395    let f2 = SeverityFilter::parse(">= medium").unwrap();
396    assert!(f2.matches(Severity::High));
397    assert!(f2.matches(Severity::Medium));
398}
399
400#[test]
401fn severity_filter_rejects_empty() {
402    assert!(SeverityFilter::parse("").is_err());
403    assert!(SeverityFilter::parse("  ").is_err());
404}
405
406#[test]
407fn severity_filter_rejects_invalid_level() {
408    assert!(SeverityFilter::parse("CRITICAL").is_err());
409    assert!(SeverityFilter::parse("HIGH,CRITICAL").is_err());
410    assert!(SeverityFilter::parse(">=BOGUS").is_err());
411}