Skip to main content

batuta/bug_hunter/
languages.rs

1//! Multi-Language Pattern Support
2//!
3//! Provides language-specific bug patterns for Python, TypeScript, Go, and Rust.
4
5use super::types::{DefectCategory, FindingSeverity};
6
7/// Supported programming languages for bug hunting.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Language {
10    Rust,
11    Python,
12    TypeScript,
13    Go,
14}
15
16impl Language {
17    /// Detect language from file extension.
18    pub fn from_extension(ext: &str) -> Option<Self> {
19        match ext.to_lowercase().as_str() {
20            "rs" => Some(Language::Rust),
21            "py" => Some(Language::Python),
22            "ts" | "tsx" | "js" | "jsx" => Some(Language::TypeScript),
23            "go" => Some(Language::Go),
24            _ => None,
25        }
26    }
27
28    /// Get file extensions for this language.
29    pub fn extensions(&self) -> &'static [&'static str] {
30        match self {
31            Language::Rust => &["rs"],
32            Language::Python => &["py"],
33            Language::TypeScript => &["ts", "tsx", "js", "jsx"],
34            Language::Go => &["go"],
35        }
36    }
37
38    /// Get glob patterns for this language.
39    pub fn glob_patterns(&self) -> Vec<&'static str> {
40        match self {
41            Language::Rust => vec!["**/*.rs"],
42            Language::Python => vec!["**/*.py"],
43            Language::TypeScript => vec!["**/*.ts", "**/*.tsx", "**/*.js", "**/*.jsx"],
44            Language::Go => vec!["**/*.go"],
45        }
46    }
47}
48
49/// A language-specific pattern.
50pub struct LangPattern {
51    pub pattern: &'static str,
52    pub category: DefectCategory,
53    pub severity: FindingSeverity,
54    pub suspiciousness: f64,
55    pub language: Option<Language>, // None = applies to all languages
56}
57
58/// Get patterns applicable to a specific language.
59pub fn patterns_for_language(
60    lang: Language,
61) -> Vec<(&'static str, DefectCategory, FindingSeverity, f64)> {
62    let mut patterns = vec![];
63
64    // Universal patterns (all languages)
65    patterns.extend(universal_patterns());
66
67    // Language-specific patterns
68    match lang {
69        Language::Rust => patterns.extend(rust_patterns()),
70        Language::Python => patterns.extend(python_patterns()),
71        Language::TypeScript => patterns.extend(typescript_patterns()),
72        Language::Go => patterns.extend(go_patterns()),
73    }
74
75    patterns
76}
77
78/// Patterns that apply to all languages.
79fn universal_patterns() -> Vec<(&'static str, DefectCategory, FindingSeverity, f64)> {
80    vec![
81        // Universal debt markers
82        ("TODO", DefectCategory::LogicErrors, FindingSeverity::Low, 0.3),
83        ("FIXME", DefectCategory::LogicErrors, FindingSeverity::Medium, 0.5),
84        ("HACK", DefectCategory::LogicErrors, FindingSeverity::Medium, 0.5),
85        ("XXX", DefectCategory::LogicErrors, FindingSeverity::Medium, 0.5),
86        ("BUG", DefectCategory::LogicErrors, FindingSeverity::High, 0.7),
87        // Hidden debt euphemisms
88        ("placeholder", DefectCategory::HiddenDebt, FindingSeverity::High, 0.75),
89        ("stub", DefectCategory::HiddenDebt, FindingSeverity::High, 0.7),
90        ("dummy", DefectCategory::HiddenDebt, FindingSeverity::High, 0.7),
91        ("temporary", DefectCategory::HiddenDebt, FindingSeverity::Medium, 0.6),
92        ("hardcoded", DefectCategory::HiddenDebt, FindingSeverity::Medium, 0.5),
93        ("workaround", DefectCategory::HiddenDebt, FindingSeverity::Medium, 0.6),
94        ("tech debt", DefectCategory::HiddenDebt, FindingSeverity::High, 0.8),
95    ]
96}
97
98/// Rust-specific patterns.
99fn rust_patterns() -> Vec<(&'static str, DefectCategory, FindingSeverity, f64)> {
100    vec![
101        ("unwrap()", DefectCategory::LogicErrors, FindingSeverity::Medium, 0.4),
102        ("expect(", DefectCategory::LogicErrors, FindingSeverity::Low, 0.3),
103        // SAFETY: no actual unsafe code -- string pattern for defect detection
104        ("unsafe {", DefectCategory::MemorySafety, FindingSeverity::High, 0.7),
105        ("transmute", DefectCategory::MemorySafety, FindingSeverity::High, 0.8),
106        ("panic!", DefectCategory::LogicErrors, FindingSeverity::Medium, 0.5),
107        ("unreachable!", DefectCategory::LogicErrors, FindingSeverity::Low, 0.3),
108        ("unimplemented!", DefectCategory::HiddenDebt, FindingSeverity::Critical, 0.9),
109        ("todo!", DefectCategory::HiddenDebt, FindingSeverity::High, 0.7),
110        ("#[ignore]", DefectCategory::TestDebt, FindingSeverity::High, 0.7),
111        (".unwrap_or_else(|_|", DefectCategory::SilentDegradation, FindingSeverity::High, 0.7),
112        ("Err(_) => {}", DefectCategory::SilentDegradation, FindingSeverity::High, 0.75),
113    ]
114}
115
116/// Python-specific patterns.
117fn python_patterns() -> Vec<(&'static str, DefectCategory, FindingSeverity, f64)> {
118    vec![
119        // Exception handling
120        ("except:", DefectCategory::SilentDegradation, FindingSeverity::High, 0.8),
121        ("except Exception:", DefectCategory::SilentDegradation, FindingSeverity::Medium, 0.6),
122        ("except BaseException:", DefectCategory::SilentDegradation, FindingSeverity::High, 0.8),
123        ("pass  # TODO", DefectCategory::HiddenDebt, FindingSeverity::High, 0.7),
124        // Security
125        ("eval(", DefectCategory::SecurityVulnerabilities, FindingSeverity::Critical, 0.95),
126        ("exec(", DefectCategory::SecurityVulnerabilities, FindingSeverity::Critical, 0.95),
127        ("pickle.loads", DefectCategory::SecurityVulnerabilities, FindingSeverity::High, 0.8),
128        ("shell=True", DefectCategory::SecurityVulnerabilities, FindingSeverity::High, 0.85),
129        ("__import__", DefectCategory::SecurityVulnerabilities, FindingSeverity::Medium, 0.6),
130        // Anti-patterns
131        ("global ", DefectCategory::LogicErrors, FindingSeverity::Medium, 0.5),
132        ("import *", DefectCategory::LogicErrors, FindingSeverity::Medium, 0.5),
133        ("assert ", DefectCategory::TestDebt, FindingSeverity::Low, 0.3), // in production code
134        ("# type: ignore", DefectCategory::TypeErrors, FindingSeverity::Medium, 0.5),
135        // Test debt
136        ("@pytest.mark.skip", DefectCategory::TestDebt, FindingSeverity::High, 0.7),
137        ("@unittest.skip", DefectCategory::TestDebt, FindingSeverity::High, 0.7),
138        ("raise NotImplementedError", DefectCategory::HiddenDebt, FindingSeverity::High, 0.8),
139        // Threading issues
140        ("threading.Thread(", DefectCategory::ConcurrencyBugs, FindingSeverity::Medium, 0.5),
141    ]
142}
143
144/// TypeScript/JavaScript-specific patterns.
145fn typescript_patterns() -> Vec<(&'static str, DefectCategory, FindingSeverity, f64)> {
146    vec![
147        // Type safety
148        ("any", DefectCategory::TypeErrors, FindingSeverity::Medium, 0.5),
149        ("as any", DefectCategory::TypeErrors, FindingSeverity::High, 0.7),
150        ("// @ts-ignore", DefectCategory::TypeErrors, FindingSeverity::High, 0.75),
151        ("// @ts-nocheck", DefectCategory::TypeErrors, FindingSeverity::Critical, 0.9),
152        ("@ts-expect-error", DefectCategory::TypeErrors, FindingSeverity::Medium, 0.5),
153        // Security
154        ("eval(", DefectCategory::SecurityVulnerabilities, FindingSeverity::Critical, 0.95),
155        ("innerHTML", DefectCategory::SecurityVulnerabilities, FindingSeverity::High, 0.8),
156        (
157            "dangerouslySetInnerHTML",
158            DefectCategory::SecurityVulnerabilities,
159            FindingSeverity::High,
160            0.8,
161        ),
162        ("document.write", DefectCategory::SecurityVulnerabilities, FindingSeverity::High, 0.8),
163        // Anti-patterns
164        ("console.log", DefectCategory::LogicErrors, FindingSeverity::Low, 0.3),
165        ("debugger", DefectCategory::LogicErrors, FindingSeverity::High, 0.7),
166        ("== null", DefectCategory::LogicErrors, FindingSeverity::Medium, 0.5),
167        ("!= null", DefectCategory::LogicErrors, FindingSeverity::Medium, 0.5),
168        // Test debt
169        ("it.skip", DefectCategory::TestDebt, FindingSeverity::High, 0.7),
170        ("describe.skip", DefectCategory::TestDebt, FindingSeverity::High, 0.7),
171        ("test.skip", DefectCategory::TestDebt, FindingSeverity::High, 0.7),
172        (".only(", DefectCategory::TestDebt, FindingSeverity::High, 0.8),
173        // Promise anti-patterns
174        (".catch(() => {", DefectCategory::SilentDegradation, FindingSeverity::High, 0.75),
175        (".catch(e => {})", DefectCategory::SilentDegradation, FindingSeverity::High, 0.8),
176    ]
177}
178
179/// Go-specific patterns.
180fn go_patterns() -> Vec<(&'static str, DefectCategory, FindingSeverity, f64)> {
181    vec![
182        // Error handling
183        ("_ = err", DefectCategory::SilentDegradation, FindingSeverity::Critical, 0.9),
184        ("err != nil { return", DefectCategory::LogicErrors, FindingSeverity::Low, 0.3), // OK pattern, low priority
185        ("panic(", DefectCategory::LogicErrors, FindingSeverity::High, 0.7),
186        ("log.Fatal", DefectCategory::LogicErrors, FindingSeverity::Medium, 0.5),
187        // Concurrency
188        ("go func()", DefectCategory::ConcurrencyBugs, FindingSeverity::Medium, 0.5),
189        ("sync.Mutex", DefectCategory::ConcurrencyBugs, FindingSeverity::Low, 0.3),
190        ("data race", DefectCategory::ConcurrencyBugs, FindingSeverity::Critical, 0.95),
191        // Security
192        ("sql.Query(", DefectCategory::SecurityVulnerabilities, FindingSeverity::High, 0.7), // potential SQL injection
193        ("http.Get(", DefectCategory::SecurityVulnerabilities, FindingSeverity::Medium, 0.5), // SSRF potential
194        ("exec.Command(", DefectCategory::SecurityVulnerabilities, FindingSeverity::High, 0.8),
195        // Anti-patterns
196        ("interface{}", DefectCategory::TypeErrors, FindingSeverity::Medium, 0.4),
197        ("//nolint", DefectCategory::LogicErrors, FindingSeverity::Medium, 0.5),
198        // Test debt
199        ("t.Skip", DefectCategory::TestDebt, FindingSeverity::High, 0.7),
200        ("testing.Short()", DefectCategory::TestDebt, FindingSeverity::Low, 0.3),
201    ]
202}
203
204/// Get all supported file extensions as glob patterns.
205pub fn all_language_globs() -> Vec<String> {
206    vec![
207        "**/*.rs".to_string(),
208        "**/*.py".to_string(),
209        "**/*.ts".to_string(),
210        "**/*.tsx".to_string(),
211        "**/*.js".to_string(),
212        "**/*.jsx".to_string(),
213        "**/*.go".to_string(),
214    ]
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220
221    #[test]
222    fn test_language_from_extension() {
223        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
224        assert_eq!(Language::from_extension("py"), Some(Language::Python));
225        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
226        assert_eq!(Language::from_extension("go"), Some(Language::Go));
227        assert_eq!(Language::from_extension("txt"), None);
228    }
229
230    #[test]
231    fn test_patterns_for_rust() {
232        let patterns = patterns_for_language(Language::Rust);
233        assert!(patterns.iter().any(|(p, _, _, _)| *p == "unwrap()"));
234        assert!(patterns.iter().any(|(p, _, _, _)| *p == "TODO")); // universal
235    }
236
237    #[test]
238    fn test_patterns_for_python() {
239        let patterns = patterns_for_language(Language::Python);
240        assert!(patterns.iter().any(|(p, _, _, _)| *p == "eval("));
241        assert!(patterns.iter().any(|(p, _, _, _)| *p == "TODO")); // universal
242    }
243
244    #[test]
245    fn test_patterns_for_typescript() {
246        let patterns = patterns_for_language(Language::TypeScript);
247        assert!(patterns.iter().any(|(p, _, _, _)| *p == "as any"));
248        assert!(patterns.iter().any(|(p, _, _, _)| *p == "TODO")); // universal
249    }
250
251    #[test]
252    fn test_patterns_for_go() {
253        let patterns = patterns_for_language(Language::Go);
254        assert!(patterns.iter().any(|(p, _, _, _)| *p == "panic("));
255        assert!(patterns.iter().any(|(p, _, _, _)| *p == "TODO")); // universal
256    }
257
258    #[test]
259    fn test_all_language_globs() {
260        let globs = all_language_globs();
261        assert!(globs.contains(&"**/*.rs".to_string()));
262        assert!(globs.contains(&"**/*.py".to_string()));
263        assert!(globs.contains(&"**/*.go".to_string()));
264    }
265
266    // =========================================================================
267    // Coverage gap: Language::from_extension edge cases
268    // =========================================================================
269
270    #[test]
271    fn test_language_from_extension_tsx() {
272        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
273    }
274
275    #[test]
276    fn test_language_from_extension_js() {
277        assert_eq!(Language::from_extension("js"), Some(Language::TypeScript));
278    }
279
280    #[test]
281    fn test_language_from_extension_jsx() {
282        assert_eq!(Language::from_extension("jsx"), Some(Language::TypeScript));
283    }
284
285    #[test]
286    fn test_language_from_extension_case_insensitive() {
287        assert_eq!(Language::from_extension("RS"), Some(Language::Rust));
288        assert_eq!(Language::from_extension("PY"), Some(Language::Python));
289        assert_eq!(Language::from_extension("TS"), Some(Language::TypeScript));
290        assert_eq!(Language::from_extension("GO"), Some(Language::Go));
291    }
292
293    #[test]
294    fn test_language_from_extension_unknown() {
295        assert_eq!(Language::from_extension("c"), None);
296        assert_eq!(Language::from_extension("java"), None);
297        assert_eq!(Language::from_extension("rb"), None);
298        assert_eq!(Language::from_extension(""), None);
299    }
300
301    // =========================================================================
302    // Coverage gap: Language::extensions()
303    // =========================================================================
304
305    #[test]
306    fn test_language_extensions_rust() {
307        assert_eq!(Language::Rust.extensions(), &["rs"]);
308    }
309
310    #[test]
311    fn test_language_extensions_python() {
312        assert_eq!(Language::Python.extensions(), &["py"]);
313    }
314
315    #[test]
316    fn test_language_extensions_typescript() {
317        let exts = Language::TypeScript.extensions();
318        assert_eq!(exts, &["ts", "tsx", "js", "jsx"]);
319    }
320
321    #[test]
322    fn test_language_extensions_go() {
323        assert_eq!(Language::Go.extensions(), &["go"]);
324    }
325
326    // =========================================================================
327    // Coverage gap: Language::glob_patterns()
328    // =========================================================================
329
330    #[test]
331    fn test_language_glob_patterns_rust() {
332        assert_eq!(Language::Rust.glob_patterns(), vec!["**/*.rs"]);
333    }
334
335    #[test]
336    fn test_language_glob_patterns_python() {
337        assert_eq!(Language::Python.glob_patterns(), vec!["**/*.py"]);
338    }
339
340    #[test]
341    fn test_language_glob_patterns_typescript() {
342        let patterns = Language::TypeScript.glob_patterns();
343        assert_eq!(patterns.len(), 4);
344        assert!(patterns.contains(&"**/*.ts"));
345        assert!(patterns.contains(&"**/*.tsx"));
346        assert!(patterns.contains(&"**/*.js"));
347        assert!(patterns.contains(&"**/*.jsx"));
348    }
349
350    #[test]
351    fn test_language_glob_patterns_go() {
352        assert_eq!(Language::Go.glob_patterns(), vec!["**/*.go"]);
353    }
354
355    // =========================================================================
356    // Coverage gap: all_language_globs completeness
357    // =========================================================================
358
359    #[test]
360    fn test_all_language_globs_complete() {
361        let globs = all_language_globs();
362        assert_eq!(globs.len(), 7);
363        assert!(globs.contains(&"**/*.ts".to_string()));
364        assert!(globs.contains(&"**/*.tsx".to_string()));
365        assert!(globs.contains(&"**/*.js".to_string()));
366        assert!(globs.contains(&"**/*.jsx".to_string()));
367    }
368
369    // =========================================================================
370    // Coverage gap: pattern content verification
371    // =========================================================================
372
373    #[test]
374    fn test_universal_patterns_content() {
375        let patterns = universal_patterns();
376        // Verify all expected universal patterns are present
377        let names: Vec<&str> = patterns.iter().map(|(p, _, _, _)| *p).collect();
378        assert!(names.contains(&"TODO"));
379        assert!(names.contains(&"FIXME"));
380        assert!(names.contains(&"HACK"));
381        assert!(names.contains(&"XXX"));
382        assert!(names.contains(&"BUG"));
383        assert!(names.contains(&"placeholder"));
384        assert!(names.contains(&"stub"));
385        assert!(names.contains(&"dummy"));
386        assert!(names.contains(&"temporary"));
387        assert!(names.contains(&"hardcoded"));
388        assert!(names.contains(&"workaround"));
389        assert!(names.contains(&"tech debt"));
390    }
391
392    #[test]
393    fn test_rust_patterns_content() {
394        let patterns = rust_patterns();
395        let names: Vec<&str> = patterns.iter().map(|(p, _, _, _)| *p).collect();
396        // SAFETY: no actual unsafe code -- asserting pattern detection includes unsafe keyword
397        assert!(names.contains(&"unsafe {"));
398        assert!(names.contains(&"transmute"));
399        assert!(names.contains(&"panic!"));
400        assert!(names.contains(&"unreachable!"));
401        assert!(names.contains(&"unimplemented!"));
402        assert!(names.contains(&"todo!"));
403        assert!(names.contains(&"#[ignore]"));
404        assert!(names.contains(&".unwrap_or_else(|_|"));
405        assert!(names.contains(&"Err(_) => {}"));
406    }
407
408    #[test]
409    fn test_python_patterns_content() {
410        let patterns = python_patterns();
411        let names: Vec<&str> = patterns.iter().map(|(p, _, _, _)| *p).collect();
412        assert!(names.contains(&"except:"));
413        assert!(names.contains(&"except Exception:"));
414        assert!(names.contains(&"except BaseException:"));
415        assert!(names.contains(&"pickle.loads"));
416        assert!(names.contains(&"shell=True"));
417        assert!(names.contains(&"__import__"));
418        assert!(names.contains(&"global "));
419        assert!(names.contains(&"import *"));
420        assert!(names.contains(&"# type: ignore"));
421        assert!(names.contains(&"@pytest.mark.skip"));
422        assert!(names.contains(&"@unittest.skip"));
423        assert!(names.contains(&"raise NotImplementedError"));
424        assert!(names.contains(&"threading.Thread("));
425    }
426
427    #[test]
428    fn test_typescript_patterns_content() {
429        let patterns = typescript_patterns();
430        let names: Vec<&str> = patterns.iter().map(|(p, _, _, _)| *p).collect();
431        assert!(names.contains(&"// @ts-ignore"));
432        assert!(names.contains(&"// @ts-nocheck"));
433        assert!(names.contains(&"@ts-expect-error"));
434        assert!(names.contains(&"innerHTML"));
435        assert!(names.contains(&"dangerouslySetInnerHTML"));
436        assert!(names.contains(&"document.write"));
437        assert!(names.contains(&"console.log"));
438        assert!(names.contains(&"debugger"));
439        assert!(names.contains(&"== null"));
440        assert!(names.contains(&"!= null"));
441        assert!(names.contains(&"it.skip"));
442        assert!(names.contains(&"describe.skip"));
443        assert!(names.contains(&"test.skip"));
444        assert!(names.contains(&".only("));
445        assert!(names.contains(&".catch(() => {"));
446        assert!(names.contains(&".catch(e => {})"));
447    }
448
449    #[test]
450    fn test_go_patterns_content() {
451        let patterns = go_patterns();
452        let names: Vec<&str> = patterns.iter().map(|(p, _, _, _)| *p).collect();
453        assert!(names.contains(&"_ = err"));
454        assert!(names.contains(&"panic("));
455        assert!(names.contains(&"log.Fatal"));
456        assert!(names.contains(&"go func()"));
457        assert!(names.contains(&"sync.Mutex"));
458        assert!(names.contains(&"data race"));
459        assert!(names.contains(&"sql.Query("));
460        assert!(names.contains(&"http.Get("));
461        assert!(names.contains(&"exec.Command("));
462        assert!(names.contains(&"interface{}"));
463        assert!(names.contains(&"//nolint"));
464        assert!(names.contains(&"t.Skip"));
465        assert!(names.contains(&"testing.Short()"));
466    }
467
468    // =========================================================================
469    // Coverage gap: pattern severity/category verification
470    // =========================================================================
471
472    #[test]
473    fn test_patterns_have_valid_suspiciousness() {
474        for lang in [Language::Rust, Language::Python, Language::TypeScript, Language::Go] {
475            let patterns = patterns_for_language(lang);
476            for (name, _cat, _sev, sus) in &patterns {
477                assert!(
478                    *sus >= 0.0 && *sus <= 1.0,
479                    "Pattern '{}' has invalid suspiciousness: {}",
480                    name,
481                    sus
482                );
483            }
484        }
485    }
486
487    #[test]
488    fn test_language_equality() {
489        assert_eq!(Language::Rust, Language::Rust);
490        assert_ne!(Language::Rust, Language::Python);
491        assert_ne!(Language::Python, Language::TypeScript);
492        assert_ne!(Language::TypeScript, Language::Go);
493    }
494}