1use std::collections::HashMap;
18use std::fs;
19use std::path::{Path, PathBuf};
20
21use anyhow::Result;
22use clap::Args;
23use regex::Regex;
24use tldr_core::walker::walk_project;
25use tldr_core::Language;
26
27use super::error::RemainingError;
28use super::types::{
29 APICheckReport, APICheckSummary, APIRule, MisuseCategory, MisuseFinding, MisuseSeverity,
30};
31
32use crate::output::OutputWriter;
33
34const MAX_DIRECTORY_FILES: u32 = 1000;
40
41const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub(crate) enum ApiLanguage {
46 Python,
47 Rust,
48 Go,
49 Java,
50 JavaScript,
51 TypeScript,
52 C,
53 Cpp,
54 Ruby,
55 Php,
56 Kotlin,
57 Swift,
58 CSharp,
59 Scala,
60 Elixir,
61 Lua,
62 Luau,
63 Ocaml,
64}
65
66#[derive(Clone, Copy)]
67struct RegexRuleSpec {
68 id: &'static str,
69 name: &'static str,
70 category: MisuseCategory,
71 severity: MisuseSeverity,
72 description: &'static str,
73 correct_usage: &'static str,
74 pattern: &'static str,
75 api_call: &'static str,
76 message: &'static str,
77 fix_suggestion: &'static str,
78}
79
80impl RegexRuleSpec {
81 fn rule(self) -> APIRule {
82 APIRule {
83 id: self.id.to_string(),
84 name: self.name.to_string(),
85 category: self.category,
86 severity: self.severity,
87 description: self.description.to_string(),
88 correct_usage: self.correct_usage.to_string(),
89 }
90 }
91}
92
93fn rule_applies_to_language(rule_id: &str, language: ApiLanguage) -> bool {
102 let prefix_lang: &[&str] = match language {
108 ApiLanguage::Python => &["PY"],
109 ApiLanguage::Rust => &["RS"],
110 ApiLanguage::Go => &["GO"],
111 ApiLanguage::Java => &["JV"],
112 ApiLanguage::JavaScript => &["JS"],
113 ApiLanguage::TypeScript => &["TS"],
114 ApiLanguage::C => &["C"],
115 ApiLanguage::Cpp => &["CPP"],
116 ApiLanguage::Ruby => &["RB"],
117 ApiLanguage::Php => &["PH"],
118 ApiLanguage::Kotlin => &["KT"],
119 ApiLanguage::Swift => &["SW"],
120 ApiLanguage::CSharp => &["CS"],
121 ApiLanguage::Scala => &["SC"],
122 ApiLanguage::Elixir => &["EX"],
123 ApiLanguage::Lua | ApiLanguage::Luau => &["LU"],
124 ApiLanguage::Ocaml => &["OC"],
125 };
126 for prefix in prefix_lang {
127 if let Some(rest) = rule_id.strip_prefix(prefix) {
128 if rest.chars().next().is_some_and(|c| c.is_ascii_digit()) {
131 return true;
132 }
133 }
134 }
135 false
136}
137
138const GO_RULE_SPECS: &[RegexRuleSpec] = &[
139 RegexRuleSpec {
140 id: "GO001",
141 name: "deprecated-ioutil-readfile",
142 category: MisuseCategory::Resources,
143 severity: MisuseSeverity::Low,
144 description: "ioutil.ReadFile is deprecated and encourages unbounded whole-file reads",
145 correct_usage: "Use os.ReadFile or stream with bufio.Scanner/Reader",
146 pattern: r"\bioutil\.ReadFile\s*\(",
147 api_call: "ioutil.ReadFile",
148 message: "ioutil.ReadFile is deprecated and can load unbounded content into memory",
149 fix_suggestion: "Use os.ReadFile for simple reads or bufio.Reader for bounded streaming",
150 },
151 RegexRuleSpec {
152 id: "GO002",
153 name: "http-get-without-timeout",
154 category: MisuseCategory::Parameters,
155 severity: MisuseSeverity::Medium,
156 description: "http.Get uses the default client and provides no call-specific timeout",
157 correct_usage: "Use an http.Client with Timeout or context-aware requests",
158 pattern: r"\bhttp\.Get\s*\(",
159 api_call: "http.Get",
160 message: "http.Get without an explicit timeout can hang indefinitely",
161 fix_suggestion: "Use an http.Client{Timeout: ...} or NewRequestWithContext",
162 },
163 RegexRuleSpec {
164 id: "GO003",
165 name: "exec-command",
166 category: MisuseCategory::Security,
167 severity: MisuseSeverity::High,
168 description: "exec.Command is risky when arguments or executable names come from input",
169 correct_usage: "Prefer direct library APIs or strictly validate allowed commands",
170 pattern: r"\bexec\.Command\s*\(",
171 api_call: "exec.Command",
172 message: "exec.Command can enable command injection when fed user-controlled values",
173 fix_suggestion: "Validate commands against an allowlist and avoid shell-like execution",
174 },
175 RegexRuleSpec {
176 id: "GO004",
177 name: "template-html-cast",
178 category: MisuseCategory::Security,
179 severity: MisuseSeverity::High,
180 description: "template.HTML bypasses html/template escaping guarantees",
181 correct_usage: "Pass plain strings to templates and let html/template escape them",
182 pattern: r"\btemplate\.HTML\s*\(",
183 api_call: "template.HTML",
184 message: "template.HTML disables escaping and can introduce XSS",
185 fix_suggestion: "Remove the cast and rely on html/template auto-escaping",
186 },
187 RegexRuleSpec {
188 id: "GO005",
189 name: "sql-query-without-context",
190 category: MisuseCategory::CallOrder,
191 severity: MisuseSeverity::Medium,
192 description:
193 "sql.DB.Query lacks cancellation and timeout propagation compared with QueryContext",
194 correct_usage: "Use db.QueryContext(ctx, query, args...)",
195 pattern: r"\bsql\.Query\s*\(",
196 api_call: "sql.Query",
197 message: "sql.Query omits context-driven cancellation and timeout handling",
198 fix_suggestion: "Use QueryContext/ExecContext with a bounded context",
199 },
200];
201
202const JAVA_RULE_SPECS: &[RegexRuleSpec] = &[
203 RegexRuleSpec {
204 id: "JV001",
205 name: "string-comparison-with-double-equals",
206 category: MisuseCategory::CallOrder,
207 severity: MisuseSeverity::Medium,
208 description: "Using == on strings compares references instead of values",
209 correct_usage: "Use value.equals(other) or Objects.equals(a, b)",
210 pattern: r#"(?:".*"|\b\w+\b)\s*==\s*(?:".*"|\b\w+\b)"#,
211 api_call: "==",
212 message: "String comparison with == checks reference identity, not value equality",
213 fix_suggestion: "Use .equals(...) or Objects.equals(...) for string values",
214 },
215 RegexRuleSpec {
216 id: "JV002",
217 name: "runtime-exec",
218 category: MisuseCategory::Security,
219 severity: MisuseSeverity::High,
220 description: "Runtime.exec is dangerous with dynamic input and hard to sandbox correctly",
221 correct_usage: "Use structured APIs or a ProcessBuilder with validated arguments",
222 pattern: r"\bRuntime\.getRuntime\(\)\.exec\s*\(",
223 api_call: "Runtime.exec",
224 message: "Runtime.exec is a common command injection footgun",
225 fix_suggestion: "Prefer library APIs or tightly validated ProcessBuilder arguments",
226 },
227 RegexRuleSpec {
228 id: "JV003",
229 name: "objectinputstream-deserialization",
230 category: MisuseCategory::Security,
231 severity: MisuseSeverity::High,
232 description:
233 "ObjectInputStream on untrusted data can trigger unsafe deserialization gadgets",
234 correct_usage: "Use safer formats like JSON with explicit schemas",
235 pattern: r"\bnew\s+ObjectInputStream\s*\(",
236 api_call: "ObjectInputStream",
237 message: "ObjectInputStream enables unsafe native Java deserialization",
238 fix_suggestion: "Replace native object deserialization with a schema-driven format",
239 },
240 RegexRuleSpec {
241 id: "JV004",
242 name: "create-statement",
243 category: MisuseCategory::Security,
244 severity: MisuseSeverity::Medium,
245 description:
246 "createStatement often leads to string-built SQL instead of prepared statements",
247 correct_usage: "Use prepareStatement with placeholders",
248 pattern: r"\bcreateStatement\s*\(",
249 api_call: "createStatement",
250 message: "createStatement encourages dynamic SQL and weak parameter handling",
251 fix_suggestion: "Use prepareStatement with bound parameters",
252 },
253 RegexRuleSpec {
254 id: "JV005",
255 name: "system-gc-call",
256 category: MisuseCategory::Resources,
257 severity: MisuseSeverity::Low,
258 description: "System.gc() is usually a performance smell and not a reliable memory fix",
259 correct_usage: "Remove manual GC triggers and profile allocations instead",
260 pattern: r"\bSystem\.gc\s*\(",
261 api_call: "System.gc",
262 message: "System.gc() is an unreliable manual GC hint and often harms latency",
263 fix_suggestion: "Remove the call and fix the underlying allocation or lifetime issue",
264 },
265];
266
267const JAVASCRIPT_RULE_SPECS: &[RegexRuleSpec] = &[
268 RegexRuleSpec {
269 id: "JS001",
270 name: "loose-equality",
271 category: MisuseCategory::CallOrder,
272 severity: MisuseSeverity::Medium,
273 description: "Loose equality allows coercions that frequently hide correctness bugs",
274 correct_usage: "Use === / !== except in deliberately reviewed coercion cases",
275 pattern: r"\s==\s|\s!=\s",
276 api_call: "==",
277 message: "Loose equality can coerce values unexpectedly",
278 fix_suggestion: "Use === or !== and handle explicit type conversion",
279 },
280 RegexRuleSpec {
281 id: "JS002",
282 name: "parseint-without-radix",
283 category: MisuseCategory::Parameters,
284 severity: MisuseSeverity::Low,
285 description: "parseInt without a radix is ambiguous and less explicit than required",
286 correct_usage: "Use parseInt(value, 10)",
287 pattern: r"\bparseInt\s*\(\s*[^,\)]+\)",
288 api_call: "parseInt",
289 message: "parseInt called without an explicit radix",
290 fix_suggestion: "Pass a radix explicitly, usually parseInt(value, 10)",
291 },
292 RegexRuleSpec {
293 id: "JS003",
294 name: "json-parse-without-guard",
295 category: MisuseCategory::ErrorHandling,
296 severity: MisuseSeverity::Low,
297 description: "JSON.parse throws on malformed input and should usually be guarded",
298 correct_usage: "Wrap JSON.parse in try/catch when input is not fully trusted",
299 pattern: r"\bJSON\.parse\s*\(",
300 api_call: "JSON.parse",
301 message: "JSON.parse can throw and should be guarded for untrusted input",
302 fix_suggestion: "Use try/catch or validated parsing for untrusted payloads",
303 },
304 RegexRuleSpec {
305 id: "JS004",
306 name: "document-write",
307 category: MisuseCategory::Security,
308 severity: MisuseSeverity::High,
309 description: "document.write is legacy, brittle, and can inject unsanitized HTML",
310 correct_usage: "Use DOM APIs like textContent/appendChild instead",
311 pattern: r"\bdocument\.write(?:ln)?\s*\(",
312 api_call: "document.write",
313 message: "document.write is unsafe and can enable XSS",
314 fix_suggestion: "Use safe DOM APIs instead of writing raw HTML strings",
315 },
316 RegexRuleSpec {
317 id: "JS005",
318 name: "eval-call",
319 category: MisuseCategory::Security,
320 severity: MisuseSeverity::High,
321 description: "eval executes dynamic code and should be avoided",
322 correct_usage: "Use structured data parsing or explicit dispatch tables",
323 pattern: r"\beval\s*\(",
324 api_call: "eval",
325 message: "eval executes dynamic code and creates major security risk",
326 fix_suggestion: "Replace eval with data parsing or explicit function dispatch",
327 },
328];
329
330const TYPESCRIPT_RULE_SPECS: &[RegexRuleSpec] = &[
331 RegexRuleSpec {
332 id: "TS001",
333 name: "loose-equality",
334 category: MisuseCategory::CallOrder,
335 severity: MisuseSeverity::Medium,
336 description: "Loose equality allows coercions that frequently hide correctness bugs",
337 correct_usage: "Use === / !== except in deliberately reviewed coercion cases",
338 pattern: r"\s==\s|\s!=\s",
339 api_call: "==",
340 message: "Loose equality can coerce values unexpectedly",
341 fix_suggestion: "Use === or !== and handle explicit type conversion",
342 },
343 RegexRuleSpec {
344 id: "TS002",
345 name: "parseint-without-radix",
346 category: MisuseCategory::Parameters,
347 severity: MisuseSeverity::Low,
348 description: "parseInt without a radix is ambiguous and less explicit than required",
349 correct_usage: "Use parseInt(value, 10)",
350 pattern: r"\bparseInt\s*\(\s*[^,\)]+\)",
351 api_call: "parseInt",
352 message: "parseInt called without an explicit radix",
353 fix_suggestion: "Pass a radix explicitly, usually parseInt(value, 10)",
354 },
355 RegexRuleSpec {
356 id: "TS003",
357 name: "json-parse-without-guard",
358 category: MisuseCategory::ErrorHandling,
359 severity: MisuseSeverity::Low,
360 description: "JSON.parse throws on malformed input and should usually be guarded",
361 correct_usage: "Wrap JSON.parse in try/catch when input is not fully trusted",
362 pattern: r"\bJSON\.parse\s*\(",
363 api_call: "JSON.parse",
364 message: "JSON.parse can throw and should be guarded for untrusted input",
365 fix_suggestion: "Use try/catch or validated parsing for untrusted payloads",
366 },
367 RegexRuleSpec {
368 id: "TS004",
369 name: "document-write",
370 category: MisuseCategory::Security,
371 severity: MisuseSeverity::High,
372 description: "document.write is legacy, brittle, and can inject unsanitized HTML",
373 correct_usage: "Use DOM APIs like textContent/appendChild instead",
374 pattern: r"\bdocument\.write(?:ln)?\s*\(",
375 api_call: "document.write",
376 message: "document.write is unsafe and can enable XSS",
377 fix_suggestion: "Use safe DOM APIs instead of writing raw HTML strings",
378 },
379 RegexRuleSpec {
380 id: "TS005",
381 name: "eval-call",
382 category: MisuseCategory::Security,
383 severity: MisuseSeverity::High,
384 description: "eval executes dynamic code and should be avoided",
385 correct_usage: "Use structured data parsing or explicit dispatch tables",
386 pattern: r"\beval\s*\(",
387 api_call: "eval",
388 message: "eval executes dynamic code and creates major security risk",
389 fix_suggestion: "Replace eval with data parsing or explicit function dispatch",
390 },
391];
392
393const C_RULE_SPECS: &[RegexRuleSpec] = &[
394 RegexRuleSpec {
395 id: "C001",
396 name: "gets-call",
397 category: MisuseCategory::Security,
398 severity: MisuseSeverity::High,
399 description: "gets cannot bound input and has been removed from the standard library",
400 correct_usage: "Use fgets with an explicit buffer length",
401 pattern: r"\bgets\s*\(",
402 api_call: "gets",
403 message: "gets is inherently unsafe and enables buffer overflows",
404 fix_suggestion: "Use fgets(buffer, size, stdin) or another bounded API",
405 },
406 RegexRuleSpec {
407 id: "C002",
408 name: "strcpy-call",
409 category: MisuseCategory::Security,
410 severity: MisuseSeverity::High,
411 description: "strcpy performs unbounded copies and easily overflows buffers",
412 correct_usage: "Use snprintf, strlcpy, or explicit bounds checks",
413 pattern: r"\bstrcpy\s*\(",
414 api_call: "strcpy",
415 message: "strcpy performs an unbounded copy",
416 fix_suggestion: "Replace strcpy with a bounded copy strategy",
417 },
418 RegexRuleSpec {
419 id: "C003",
420 name: "sprintf-call",
421 category: MisuseCategory::Security,
422 severity: MisuseSeverity::High,
423 description: "sprintf writes formatted data without a size bound",
424 correct_usage: "Use snprintf with the destination buffer size",
425 pattern: r"\bsprintf\s*\(",
426 api_call: "sprintf",
427 message: "sprintf can overflow fixed-size buffers",
428 fix_suggestion: "Use snprintf(buffer, size, ...) instead",
429 },
430 RegexRuleSpec {
431 id: "C004",
432 name: "scanf-string-without-width",
433 category: MisuseCategory::Security,
434 severity: MisuseSeverity::High,
435 description: "scanf with %s and no width limit can overflow the destination buffer",
436 correct_usage: "Provide a width specifier or use fgets",
437 pattern: r#"\bscanf\s*\(\s*"%s"#,
438 api_call: "scanf",
439 message: "scanf(\"%s\") reads unbounded input into a buffer",
440 fix_suggestion: "Add a width limit or use fgets plus parsing",
441 },
442 RegexRuleSpec {
443 id: "C005",
444 name: "system-call",
445 category: MisuseCategory::Security,
446 severity: MisuseSeverity::High,
447 description: "system executes a shell command and is dangerous with dynamic input",
448 correct_usage: "Use execve-family APIs with validated arguments where possible",
449 pattern: r"\bsystem\s*\(",
450 api_call: "system",
451 message: "system executes a shell and is a common command injection vector",
452 fix_suggestion: "Avoid shell execution or tightly validate the command source",
453 },
454];
455
456const CPP_RULE_SPECS: &[RegexRuleSpec] = &[
457 RegexRuleSpec {
458 id: "CPP001",
459 name: "strcpy-call",
460 category: MisuseCategory::Security,
461 severity: MisuseSeverity::High,
462 description: "strcpy performs unbounded copies and easily overflows buffers",
463 correct_usage: "Use std::string, snprintf, or another bounded copy strategy",
464 pattern: r"\bstrcpy\s*\(",
465 api_call: "strcpy",
466 message: "strcpy performs an unbounded copy",
467 fix_suggestion: "Use std::string or a bounded copy API instead",
468 },
469 RegexRuleSpec {
470 id: "CPP002",
471 name: "sprintf-call",
472 category: MisuseCategory::Security,
473 severity: MisuseSeverity::High,
474 description: "sprintf writes formatted data without a size bound",
475 correct_usage: "Use snprintf or std::format into a bounded container",
476 pattern: r"\bsprintf\s*\(",
477 api_call: "sprintf",
478 message: "sprintf can overflow fixed-size buffers",
479 fix_suggestion: "Use snprintf or a safer formatting abstraction",
480 },
481 RegexRuleSpec {
482 id: "CPP003",
483 name: "auto-ptr",
484 category: MisuseCategory::Resources,
485 severity: MisuseSeverity::Medium,
486 description: "std::auto_ptr is obsolete and has broken transfer semantics",
487 correct_usage: "Use std::unique_ptr or std::shared_ptr",
488 pattern: r"\bstd::auto_ptr\s*<",
489 api_call: "std::auto_ptr",
490 message: "std::auto_ptr is obsolete and unsafe by modern ownership standards",
491 fix_suggestion: "Replace std::auto_ptr with std::unique_ptr or std::shared_ptr",
492 },
493 RegexRuleSpec {
494 id: "CPP004",
495 name: "raw-new",
496 category: MisuseCategory::Resources,
497 severity: MisuseSeverity::Medium,
498 description: "Raw new often leads to leaks and exception-safety issues",
499 correct_usage: "Use std::make_unique or stack allocation where possible",
500 pattern: r"\bnew\s+\w",
501 api_call: "new",
502 message: "Raw new makes ownership and exception safety harder to reason about",
503 fix_suggestion: "Use std::make_unique, containers, or stack allocation",
504 },
505 RegexRuleSpec {
506 id: "CPP005",
507 name: "system-call",
508 category: MisuseCategory::Security,
509 severity: MisuseSeverity::High,
510 description: "system executes a shell command and is dangerous with dynamic input",
511 correct_usage: "Use direct process APIs with validated arguments when possible",
512 pattern: r"(?:\bstd::)?system\s*\(",
513 api_call: "system",
514 message: "system executes a shell and is a common command injection vector",
515 fix_suggestion: "Avoid shell execution or tightly validate all command components",
516 },
517];
518
519const RUBY_RULE_SPECS: &[RegexRuleSpec] = &[
520 RegexRuleSpec {
521 id: "RB001",
522 name: "eval-call",
523 category: MisuseCategory::Security,
524 severity: MisuseSeverity::High,
525 description: "eval executes dynamic Ruby code and should be avoided",
526 correct_usage: "Use explicit dispatch or data parsing instead of dynamic code execution",
527 pattern: r"\beval\s*\(",
528 api_call: "eval",
529 message: "eval executes dynamic code and creates major security risk",
530 fix_suggestion: "Replace eval with explicit dispatch or structured parsing",
531 },
532 RegexRuleSpec {
533 id: "RB002",
534 name: "dynamic-send",
535 category: MisuseCategory::Security,
536 severity: MisuseSeverity::Medium,
537 description: "send can invoke arbitrary methods when fed untrusted method names",
538 correct_usage: "Use public_send on a strict allowlist of method names",
539 pattern: r"\.send\s*\(",
540 api_call: "send",
541 message: "send can dispatch to unsafe or unexpected methods",
542 fix_suggestion: "Use public_send with a reviewed allowlist",
543 },
544 RegexRuleSpec {
545 id: "RB003",
546 name: "system-call",
547 category: MisuseCategory::Security,
548 severity: MisuseSeverity::High,
549 description: "system executes a shell command and is dangerous with interpolated input",
550 correct_usage: "Use array-form process APIs with validated arguments",
551 pattern: r"\bsystem\s*\(",
552 api_call: "system",
553 message: "system is a common command injection footgun",
554 fix_suggestion: "Avoid shell execution or pass validated argv-style arguments",
555 },
556 RegexRuleSpec {
557 id: "RB004",
558 name: "yaml-load",
559 category: MisuseCategory::Security,
560 severity: MisuseSeverity::High,
561 description: "YAML.load can instantiate arbitrary objects from untrusted input",
562 correct_usage: "Use YAML.safe_load with permitted classes",
563 pattern: r"\bYAML\.load\s*\(",
564 api_call: "YAML.load",
565 message: "YAML.load can deserialize unsafe objects",
566 fix_suggestion: "Use YAML.safe_load and restrict allowed classes",
567 },
568 RegexRuleSpec {
569 id: "RB005",
570 name: "marshal-load",
571 category: MisuseCategory::Security,
572 severity: MisuseSeverity::High,
573 description: "Marshal.load on untrusted data is unsafe deserialization",
574 correct_usage: "Use JSON or another safe, schema-checked format",
575 pattern: r"\bMarshal\.load\s*\(",
576 api_call: "Marshal.load",
577 message: "Marshal.load performs unsafe native deserialization",
578 fix_suggestion: "Replace Marshal.load with a safer serialization format",
579 },
580];
581
582const PHP_RULE_SPECS: &[RegexRuleSpec] = &[
583 RegexRuleSpec {
584 id: "PH001",
585 name: "deprecated-mysql-functions",
586 category: MisuseCategory::Security,
587 severity: MisuseSeverity::High,
588 description: "mysql_* APIs are removed and encourage unsafe query construction",
589 correct_usage: "Use PDO or mysqli with prepared statements",
590 pattern: r"\bmysql_[a-z_]+\s*\(",
591 api_call: "mysql_*",
592 message: "mysql_* functions are removed and unsafe by modern standards",
593 fix_suggestion: "Migrate to PDO or mysqli prepared statements",
594 },
595 RegexRuleSpec {
596 id: "PH002",
597 name: "extract-call",
598 category: MisuseCategory::Security,
599 severity: MisuseSeverity::Medium,
600 description: "extract pollutes local scope and can overwrite important variables",
601 correct_usage: "Read array keys explicitly instead of splatting them into scope",
602 pattern: r"\bextract\s*\(",
603 api_call: "extract",
604 message: "extract can overwrite local variables and hide data flow",
605 fix_suggestion: "Assign required keys explicitly instead of using extract",
606 },
607 RegexRuleSpec {
608 id: "PH003",
609 name: "eval-call",
610 category: MisuseCategory::Security,
611 severity: MisuseSeverity::High,
612 description: "eval executes dynamic PHP code and should be avoided",
613 correct_usage: "Use explicit dispatch or data parsing instead of dynamic code execution",
614 pattern: r"\beval\s*\(",
615 api_call: "eval",
616 message: "eval executes dynamic code and creates major security risk",
617 fix_suggestion: "Replace eval with explicit dispatch or structured parsing",
618 },
619 RegexRuleSpec {
620 id: "PH004",
621 name: "variable-variables",
622 category: MisuseCategory::Security,
623 severity: MisuseSeverity::Medium,
624 description: "Variable variables make scope mutation hard to reason about",
625 correct_usage: "Use associative arrays or explicit variables instead",
626 pattern: r"\$\$[A-Za-z_]",
627 api_call: "$$",
628 message: "Variable variables obscure data flow and can enable unsafe access patterns",
629 fix_suggestion: "Use an array/map or explicit variable names instead",
630 },
631 RegexRuleSpec {
632 id: "PH005",
633 name: "unserialize-call",
634 category: MisuseCategory::Security,
635 severity: MisuseSeverity::High,
636 description: "unserialize on untrusted data can trigger object injection chains",
637 correct_usage: "Use json_decode or a safer schema-checked format",
638 pattern: r"\bunserialize\s*\(",
639 api_call: "unserialize",
640 message: "unserialize enables unsafe object deserialization",
641 fix_suggestion: "Replace unserialize with json_decode or a safe serializer",
642 },
643];
644
645const KOTLIN_RULE_SPECS: &[RegexRuleSpec] = &[
646 RegexRuleSpec {
647 id: "KT001",
648 name: "force-unwrapped-null",
649 category: MisuseCategory::ErrorHandling,
650 severity: MisuseSeverity::Medium,
651 description: "!! converts nullable values into runtime crashes",
652 correct_usage: "Use safe calls, let, requireNotNull, or explicit branching",
653 pattern: r"!!",
654 api_call: "!!",
655 message: "!! will throw NullPointerException on null values",
656 fix_suggestion: "Use safe calls or explicit null handling instead of !!",
657 },
658 RegexRuleSpec {
659 id: "KT002",
660 name: "lateinit-var",
661 category: MisuseCategory::ErrorHandling,
662 severity: MisuseSeverity::Low,
663 description: "lateinit shifts initialization failures to runtime",
664 correct_usage: "Prefer constructor injection or nullable/state wrappers",
665 pattern: r"\blateinit\s+var\b",
666 api_call: "lateinit",
667 message: "lateinit can fail at runtime if the property is read before initialization",
668 fix_suggestion: "Prefer constructor injection or explicit nullable state",
669 },
670 RegexRuleSpec {
671 id: "KT003",
672 name: "globalscope-launch",
673 category: MisuseCategory::Concurrency,
674 severity: MisuseSeverity::Medium,
675 description: "GlobalScope.launch escapes structured concurrency and leaks work",
676 correct_usage: "Launch from a lifecycle-bound CoroutineScope",
677 pattern: r"\bGlobalScope\.launch\s*\(",
678 api_call: "GlobalScope.launch",
679 message: "GlobalScope.launch detaches work from structured concurrency",
680 fix_suggestion: "Use a lifecycle-bound CoroutineScope instead",
681 },
682 RegexRuleSpec {
683 id: "KT004",
684 name: "runtime-exec",
685 category: MisuseCategory::Security,
686 severity: MisuseSeverity::High,
687 description: "Runtime.exec is dangerous with dynamic input and hard to sandbox correctly",
688 correct_usage: "Use structured APIs or strictly validated ProcessBuilder arguments",
689 pattern: r"\bRuntime\.getRuntime\(\)\.exec\s*\(",
690 api_call: "Runtime.exec",
691 message: "Runtime.exec is a common command injection footgun",
692 fix_suggestion: "Prefer library APIs or tightly validated ProcessBuilder arguments",
693 },
694 RegexRuleSpec {
695 id: "KT005",
696 name: "thread-sleep",
697 category: MisuseCategory::Concurrency,
698 severity: MisuseSeverity::Low,
699 description:
700 "Thread.sleep blocks threads directly and is usually wrong in coroutine-based code",
701 correct_usage: "Use delay(...) in coroutines or higher-level scheduling",
702 pattern: r"\bThread\.sleep\s*\(",
703 api_call: "Thread.sleep",
704 message: "Thread.sleep blocks the current thread directly",
705 fix_suggestion: "Use delay(...) or a proper scheduler instead",
706 },
707];
708
709const SWIFT_RULE_SPECS: &[RegexRuleSpec] = &[
710 RegexRuleSpec {
711 id: "SW001",
712 name: "forced-cast",
713 category: MisuseCategory::ErrorHandling,
714 severity: MisuseSeverity::Medium,
715 description: "as! crashes at runtime when the cast fails",
716 correct_usage: "Use as? with conditional handling",
717 pattern: r"\bas!\b",
718 api_call: "as!",
719 message: "Forced casts crash when the runtime type is different",
720 fix_suggestion: "Use as? and handle the nil case explicitly",
721 },
722 RegexRuleSpec {
723 id: "SW002",
724 name: "forced-try",
725 category: MisuseCategory::ErrorHandling,
726 severity: MisuseSeverity::Medium,
727 description: "try! crashes when the call throws",
728 correct_usage: "Use do/catch or try? with explicit fallback",
729 pattern: r"\btry!\b",
730 api_call: "try!",
731 message: "try! crashes the process on thrown errors",
732 fix_suggestion: "Use do/catch or try? and handle failure explicitly",
733 },
734 RegexRuleSpec {
735 id: "SW003",
736 name: "force-unwrap",
737 category: MisuseCategory::ErrorHandling,
738 severity: MisuseSeverity::Medium,
739 description: "Force unwrapping optionals crashes at runtime on nil",
740 correct_usage: "Use if let, guard let, or nil-coalescing",
741 pattern: r"\b[A-Za-z_][A-Za-z0-9_]*!",
742 api_call: "!",
743 message: "Force unwraps crash when the optional is nil",
744 fix_suggestion: "Use optional binding or nil-coalescing instead of force unwraps",
745 },
746 RegexRuleSpec {
747 id: "SW004",
748 name: "nskeyedunarchiver",
749 category: MisuseCategory::Security,
750 severity: MisuseSeverity::High,
751 description: "Legacy NSKeyedUnarchiver APIs on untrusted data are unsafe",
752 correct_usage: "Use secure decoding APIs with requiresSecureCoding",
753 pattern: r"\bNSKeyedUnarchiver\.unarchiveObject",
754 api_call: "NSKeyedUnarchiver",
755 message: "Legacy unarchiving can deserialize unexpected object graphs",
756 fix_suggestion: "Use secure coding APIs and schema-checked decoding",
757 },
758 RegexRuleSpec {
759 id: "SW005",
760 name: "fatalerror-call",
761 category: MisuseCategory::ErrorHandling,
762 severity: MisuseSeverity::Low,
763 description:
764 "fatalError terminates the process and is risky outside clearly impossible states",
765 correct_usage: "Return/throw recoverable errors where possible",
766 pattern: r"\bfatalError\s*\(",
767 api_call: "fatalError",
768 message: "fatalError terminates the process immediately",
769 fix_suggestion: "Use recoverable error handling unless the state is truly unreachable",
770 },
771];
772
773const CSHARP_RULE_SPECS: &[RegexRuleSpec] = &[
774 RegexRuleSpec {
775 id: "CS001",
776 name: "binaryformatter",
777 category: MisuseCategory::Security,
778 severity: MisuseSeverity::High,
779 description: "BinaryFormatter is insecure and obsolete for untrusted data",
780 correct_usage: "Use System.Text.Json or another safe serializer",
781 pattern: r"\bBinaryFormatter\b",
782 api_call: "BinaryFormatter",
783 message: "BinaryFormatter is insecure and should not be used",
784 fix_suggestion: "Use System.Text.Json or another safe serializer",
785 },
786 RegexRuleSpec {
787 id: "CS002",
788 name: "gc-collect",
789 category: MisuseCategory::Resources,
790 severity: MisuseSeverity::Low,
791 description: "GC.Collect is rarely the right fix and often harms latency",
792 correct_usage: "Remove manual GC triggers and profile the real allocation issue",
793 pattern: r"\bGC\.Collect\s*\(",
794 api_call: "GC.Collect",
795 message: "GC.Collect is an unreliable manual GC hint and often harms performance",
796 fix_suggestion: "Remove the call and fix the underlying allocation issue",
797 },
798 RegexRuleSpec {
799 id: "CS003",
800 name: "task-result",
801 category: MisuseCategory::Concurrency,
802 severity: MisuseSeverity::Medium,
803 description: "Task.Result blocks synchronously and can deadlock async flows",
804 correct_usage: "Use await instead of blocking on Task.Result",
805 pattern: r"\.Result\b",
806 api_call: "Task.Result",
807 message: "Task.Result blocks synchronously and can deadlock async contexts",
808 fix_suggestion: "Use await and keep the async chain asynchronous",
809 },
810 RegexRuleSpec {
811 id: "CS004",
812 name: "task-wait",
813 category: MisuseCategory::Concurrency,
814 severity: MisuseSeverity::Medium,
815 description: "Task.Wait blocks synchronously and can deadlock async flows",
816 correct_usage: "Use await or WhenAll/WhenAny instead of blocking waits",
817 pattern: r"\.Wait\s*\(",
818 api_call: "Task.Wait",
819 message: "Task.Wait blocks synchronously and can deadlock async contexts",
820 fix_suggestion: "Use await or asynchronous coordination primitives instead",
821 },
822 RegexRuleSpec {
823 id: "CS005",
824 name: "process-start",
825 category: MisuseCategory::Security,
826 severity: MisuseSeverity::High,
827 description: "Process.Start is dangerous with untrusted paths or arguments",
828 correct_usage: "Use strict allowlists and avoid shell execution semantics",
829 pattern: r"\bProcess\.Start\s*\(",
830 api_call: "Process.Start",
831 message: "Process.Start can enable command injection with untrusted inputs",
832 fix_suggestion: "Validate executable and arguments against a strict allowlist",
833 },
834];
835
836const SCALA_RULE_SPECS: &[RegexRuleSpec] = &[
837 RegexRuleSpec {
838 id: "SC001",
839 name: "null-usage",
840 category: MisuseCategory::ErrorHandling,
841 severity: MisuseSeverity::Low,
842 description: "null bypasses Scala's stronger option-based absence modeling",
843 correct_usage: "Use Option instead of null",
844 pattern: r"\bnull\b",
845 api_call: "null",
846 message: "null reintroduces runtime absence bugs into Scala code",
847 fix_suggestion: "Use Option and explicit pattern matching instead",
848 },
849 RegexRuleSpec {
850 id: "SC002",
851 name: "asinstanceof-cast",
852 category: MisuseCategory::ErrorHandling,
853 severity: MisuseSeverity::Medium,
854 description: "asInstanceOf crashes at runtime when the type assumption is wrong",
855 correct_usage: "Use pattern matching or TypeTag/ClassTag-aware APIs",
856 pattern: r"\basInstanceOf\[",
857 api_call: "asInstanceOf",
858 message: "asInstanceOf creates unchecked runtime casts",
859 fix_suggestion: "Use pattern matching or safer typed abstractions",
860 },
861 RegexRuleSpec {
862 id: "SC003",
863 name: "await-result",
864 category: MisuseCategory::Concurrency,
865 severity: MisuseSeverity::Medium,
866 description: "Await.result blocks threads and can collapse asynchronous throughput",
867 correct_usage: "Compose futures asynchronously instead of blocking",
868 pattern: r"\bAwait\.result\s*\(",
869 api_call: "Await.result",
870 message: "Await.result blocks threads and can create deadlocks or latency spikes",
871 fix_suggestion: "Use map/flatMap/for-comprehensions instead of blocking",
872 },
873 RegexRuleSpec {
874 id: "SC004",
875 name: "mutable-collection",
876 category: MisuseCategory::Concurrency,
877 severity: MisuseSeverity::Low,
878 description: "scala.collection.mutable structures are harder to reason about under concurrency",
879 correct_usage: "Prefer immutable collections unless mutation is intentionally scoped",
880 pattern: r"\bscala\.collection\.mutable\.",
881 api_call: "scala.collection.mutable",
882 message: "Mutable collections can hide shared-state bugs",
883 fix_suggestion: "Prefer immutable collections or encapsulate mutation carefully",
884 },
885 RegexRuleSpec {
886 id: "SC005",
887 name: "sys-process",
888 category: MisuseCategory::Security,
889 severity: MisuseSeverity::High,
890 description: "sys.process.Process executes external commands and is dangerous with input-derived values",
891 correct_usage: "Use library APIs or validate commands and arguments against an allowlist",
892 pattern: r"\bsys\.process\.Process\s*\(",
893 api_call: "sys.process.Process",
894 message: "sys.process.Process can enable command injection with untrusted input",
895 fix_suggestion: "Avoid shell-style execution or strictly validate all command parts",
896 },
897];
898
899const ELIXIR_RULE_SPECS: &[RegexRuleSpec] = &[
900 RegexRuleSpec {
901 id: "EX001",
902 name: "string-to-atom",
903 category: MisuseCategory::Security,
904 severity: MisuseSeverity::High,
905 description: "String.to_atom on untrusted input can exhaust the VM atom table",
906 correct_usage: "Use String.to_existing_atom only for reviewed values or keep strings",
907 pattern: r"\bString\.to_atom\s*\(",
908 api_call: "String.to_atom",
909 message: "String.to_atom can permanently grow the atom table from user input",
910 fix_suggestion: "Keep values as strings or use a reviewed to_existing_atom path",
911 },
912 RegexRuleSpec {
913 id: "EX002",
914 name: "code-eval-string",
915 category: MisuseCategory::Security,
916 severity: MisuseSeverity::High,
917 description: "Code.eval_string executes dynamic Elixir code and should be avoided",
918 correct_usage: "Use explicit dispatch or data parsing instead of dynamic evaluation",
919 pattern: r"\bCode\.eval_string\s*\(",
920 api_call: "Code.eval_string",
921 message: "Code.eval_string executes dynamic code and is a major security risk",
922 fix_suggestion: "Replace dynamic evaluation with explicit dispatch or parsing",
923 },
924 RegexRuleSpec {
925 id: "EX003",
926 name: "binary-to-term",
927 category: MisuseCategory::Security,
928 severity: MisuseSeverity::High,
929 description: ":erlang.binary_to_term on untrusted data is unsafe deserialization",
930 correct_usage: "Use safe formats like JSON or term_to_binary only for trusted data",
931 pattern: r":erlang\.binary_to_term\s*\(",
932 api_call: ":erlang.binary_to_term",
933 message: ":erlang.binary_to_term can deserialize unsafe terms from untrusted input",
934 fix_suggestion: "Use a safer serialization format for external input",
935 },
936 RegexRuleSpec {
937 id: "EX004",
938 name: "file-read-bang",
939 category: MisuseCategory::ErrorHandling,
940 severity: MisuseSeverity::Low,
941 description: "Bang file APIs raise instead of returning tagged tuples",
942 correct_usage: "Prefer File.read/1 with explicit {:ok, data} / {:error, reason} handling",
943 pattern: r"\bFile\.read!\s*\(",
944 api_call: "File.read!",
945 message: "File.read! raises on failure instead of returning a recoverable error",
946 fix_suggestion: "Use File.read/1 and handle the returned tuple explicitly",
947 },
948 RegexRuleSpec {
949 id: "EX005",
950 name: "task-await-infinity",
951 category: MisuseCategory::Concurrency,
952 severity: MisuseSeverity::Medium,
953 description: "Task.await with :infinity can stall callers indefinitely",
954 correct_usage: "Use bounded timeouts and supervised retry/cancellation behavior",
955 pattern: r"\bTask\.await\s*\([^,]+,\s*:infinity\s*\)",
956 api_call: "Task.await",
957 message: "Task.await(..., :infinity) can block forever",
958 fix_suggestion: "Use a bounded timeout and explicit failure handling",
959 },
960];
961
962const LUA_RULE_SPECS: &[RegexRuleSpec] = &[
963 RegexRuleSpec {
964 id: "LU001",
965 name: "implicit-global",
966 category: MisuseCategory::CallOrder,
967 severity: MisuseSeverity::Low,
968 description: "Assigning without local leaks mutable globals and creates hidden coupling",
969 correct_usage: "Declare locals explicitly with local name = ...",
970 pattern: r"^[A-Za-z_][A-Za-z0-9_]*\s*=",
971 api_call: "global assignment",
972 message: "Implicit global assignment leaks state outside local scope",
973 fix_suggestion: "Prefix the binding with local to keep scope explicit",
974 },
975 RegexRuleSpec {
976 id: "LU002",
977 name: "dynamic-load",
978 category: MisuseCategory::Security,
979 severity: MisuseSeverity::High,
980 description: "load/loadstring execute dynamic Lua code and should be avoided",
981 correct_usage: "Use structured parsing or explicit dispatch instead of dynamic evaluation",
982 pattern: r"\b(?:loadstring|load)\s*\(",
983 api_call: "load",
984 message: "Dynamic code loading executes attacker-controlled Lua if fed untrusted input",
985 fix_suggestion: "Replace dynamic evaluation with explicit dispatch or parsing",
986 },
987 RegexRuleSpec {
988 id: "LU003",
989 name: "os-execute",
990 category: MisuseCategory::Security,
991 severity: MisuseSeverity::High,
992 description: "os.execute shells out and is dangerous with dynamic input",
993 correct_usage: "Avoid shell execution or validate every command component",
994 pattern: r"\bos\.execute\s*\(",
995 api_call: "os.execute",
996 message: "os.execute can enable command injection with untrusted input",
997 fix_suggestion: "Avoid shelling out or strictly validate the command source",
998 },
999 RegexRuleSpec {
1000 id: "LU004",
1001 name: "io-popen",
1002 category: MisuseCategory::Security,
1003 severity: MisuseSeverity::High,
1004 description: "io.popen launches shell commands and should be treated as high risk",
1005 correct_usage: "Use safer process APIs or validate all command components",
1006 pattern: r"\bio\.popen\s*\(",
1007 api_call: "io.popen",
1008 message: "io.popen can enable command injection with untrusted input",
1009 fix_suggestion: "Avoid shell execution or validate every command component",
1010 },
1011 RegexRuleSpec {
1012 id: "LU005",
1013 name: "dofile-loadfile",
1014 category: MisuseCategory::Security,
1015 severity: MisuseSeverity::Medium,
1016 description:
1017 "dofile/loadfile execute external files and are risky with user-controlled paths",
1018 correct_usage: "Validate file origins strictly before executing them",
1019 pattern: r"\b(?:dofile|loadfile)\s*\(",
1020 api_call: "dofile",
1021 message: "Executing external files is dangerous when the path is not fully trusted",
1022 fix_suggestion: "Avoid dynamic file execution or tightly validate trusted origins",
1023 },
1024];
1025
1026const OCAML_RULE_SPECS: &[RegexRuleSpec] = &[
1027 RegexRuleSpec {
1028 id: "OC001",
1029 name: "marshal-from-string",
1030 category: MisuseCategory::Security,
1031 severity: MisuseSeverity::High,
1032 description: "Marshal.from_string on untrusted data is unsafe native deserialization",
1033 correct_usage: "Use a safe, schema-checked serialization format",
1034 pattern: r"\bMarshal\.from_string\b",
1035 api_call: "Marshal.from_string",
1036 message: "Marshal.from_string can deserialize unsafe values from untrusted input",
1037 fix_suggestion: "Use a safer serialization format for external input",
1038 },
1039 RegexRuleSpec {
1040 id: "OC002",
1041 name: "marshal-from-channel",
1042 category: MisuseCategory::Security,
1043 severity: MisuseSeverity::High,
1044 description: "Marshal.from_channel on untrusted data is unsafe native deserialization",
1045 correct_usage: "Use a safe, schema-checked serialization format",
1046 pattern: r"\bMarshal\.from_channel\b",
1047 api_call: "Marshal.from_channel",
1048 message: "Marshal.from_channel can deserialize unsafe values from untrusted input",
1049 fix_suggestion: "Use a safer serialization format for external input",
1050 },
1051 RegexRuleSpec {
1052 id: "OC003",
1053 name: "sys-command",
1054 category: MisuseCategory::Security,
1055 severity: MisuseSeverity::High,
1056 description: "Sys.command executes a shell command and is dangerous with dynamic input",
1057 correct_usage: "Prefer direct library APIs or validate allowed commands strictly",
1058 pattern: r"\bSys\.command\b",
1059 api_call: "Sys.command",
1060 message: "Sys.command can enable command injection with untrusted input",
1061 fix_suggestion: "Avoid shell execution or tightly validate the command source",
1062 },
1063 RegexRuleSpec {
1064 id: "OC004",
1065 name: "obj-magic",
1066 category: MisuseCategory::ErrorHandling,
1067 severity: MisuseSeverity::High,
1068 description: "Obj.magic bypasses the type system and can produce memory-unsound behavior",
1069 correct_usage: "Use typed abstractions or explicit variant handling",
1070 pattern: r"\bObj\.magic\b",
1071 api_call: "Obj.magic",
1072 message: "Obj.magic bypasses type safety and can create undefined behavior",
1073 fix_suggestion: "Refactor to a typed abstraction instead of coercing with Obj.magic",
1074 },
1075 RegexRuleSpec {
1076 id: "OC005",
1077 name: "open-in-out",
1078 category: MisuseCategory::Resources,
1079 severity: MisuseSeverity::Low,
1080 description: "open_in/open_out require explicit close calls and are easy to leak",
1081 correct_usage: "Use In_channel.with_open_* or Out_channel.with_open_* helpers",
1082 pattern: r"\b(?:open_in|open_out)\b",
1083 api_call: "open_in",
1084 message: "open_in/open_out require explicit close handling and are easy to leak",
1085 fix_suggestion: "Use with_open_* helpers to scope the channel lifetime",
1086 },
1087];
1088
1089const ALL_API_LANGUAGES: &[ApiLanguage] = &[
1090 ApiLanguage::Python,
1091 ApiLanguage::Rust,
1092 ApiLanguage::Go,
1093 ApiLanguage::Java,
1094 ApiLanguage::JavaScript,
1095 ApiLanguage::TypeScript,
1096 ApiLanguage::C,
1097 ApiLanguage::Cpp,
1098 ApiLanguage::Ruby,
1099 ApiLanguage::Php,
1100 ApiLanguage::Kotlin,
1101 ApiLanguage::Swift,
1102 ApiLanguage::CSharp,
1103 ApiLanguage::Scala,
1104 ApiLanguage::Elixir,
1105 ApiLanguage::Lua,
1106 ApiLanguage::Luau,
1107 ApiLanguage::Ocaml,
1108];
1109
1110fn python_rules() -> Vec<APIRule> {
1116 vec![
1117 APIRule {
1118 id: "PY001".to_string(),
1119 name: "missing-timeout".to_string(),
1120 category: MisuseCategory::Parameters,
1121 severity: MisuseSeverity::High,
1122 description: "requests.get/post/etc without timeout parameter can hang indefinitely"
1123 .to_string(),
1124 correct_usage: "requests.get(url, timeout=30)".to_string(),
1125 },
1126 APIRule {
1127 id: "PY002".to_string(),
1128 name: "bare-except".to_string(),
1129 category: MisuseCategory::ErrorHandling,
1130 severity: MisuseSeverity::Medium,
1131 description: "Bare except clause catches all exceptions including KeyboardInterrupt"
1132 .to_string(),
1133 correct_usage: "except Exception as e:".to_string(),
1134 },
1135 APIRule {
1136 id: "PY003".to_string(),
1137 name: "weak-hash-md5".to_string(),
1138 category: MisuseCategory::Crypto,
1139 severity: MisuseSeverity::High,
1140 description: "MD5 is cryptographically broken, don't use for security purposes"
1141 .to_string(),
1142 correct_usage: "hashlib.sha256() or bcrypt for passwords".to_string(),
1143 },
1144 APIRule {
1145 id: "PY004".to_string(),
1146 name: "weak-hash-sha1".to_string(),
1147 category: MisuseCategory::Crypto,
1148 severity: MisuseSeverity::High,
1149 description: "SHA1 is cryptographically weak, don't use for security purposes"
1150 .to_string(),
1151 correct_usage: "hashlib.sha256() or stronger".to_string(),
1152 },
1153 APIRule {
1154 id: "PY005".to_string(),
1155 name: "unclosed-file".to_string(),
1156 category: MisuseCategory::Resources,
1157 severity: MisuseSeverity::Medium,
1158 description: "File opened without context manager may not be properly closed"
1159 .to_string(),
1160 correct_usage: "with open(path) as f:".to_string(),
1161 },
1162 APIRule {
1163 id: "PY006".to_string(),
1164 name: "insecure-random".to_string(),
1165 category: MisuseCategory::Security,
1166 severity: MisuseSeverity::High,
1167 description: "random module is not cryptographically secure".to_string(),
1168 correct_usage: "secrets.token_bytes() or secrets.token_hex()".to_string(),
1169 },
1170 ]
1171}
1172
1173fn rust_rules() -> Vec<APIRule> {
1175 vec![
1176 APIRule {
1177 id: "RS001".to_string(),
1178 name: "mutex-lock-unwrap".to_string(),
1179 category: MisuseCategory::Concurrency,
1180 severity: MisuseSeverity::Medium,
1181 description: "Mutex::lock().unwrap() can panic and amplify lock contention (CWE-833)"
1182 .to_string(),
1183 correct_usage:
1184 "Prefer try_lock()/error handling or explicit poison recovery instead of unwrap()"
1185 .to_string(),
1186 },
1187 APIRule {
1188 id: "RS002".to_string(),
1189 name: "file-open-without-context".to_string(),
1190 category: MisuseCategory::ErrorHandling,
1191 severity: MisuseSeverity::Low,
1192 description:
1193 "File::open without contextual error mapping makes failures hard to triage"
1194 .to_string(),
1195 correct_usage:
1196 "File::open(path).with_context(|| format!(\"opening {}\", path.display()))?"
1197 .to_string(),
1198 },
1199 APIRule {
1200 id: "RS003".to_string(),
1201 name: "unbounded-with-capacity".to_string(),
1202 category: MisuseCategory::Resources,
1203 severity: MisuseSeverity::High,
1204 description:
1205 "Vec::with_capacity fed from unbounded input can cause memory exhaustion (CWE-770)"
1206 .to_string(),
1207 correct_usage: "Clamp capacity input before allocation (e.g. min(user_len, MAX))"
1208 .to_string(),
1209 },
1210 APIRule {
1211 id: "RS004".to_string(),
1212 name: "detached-tokio-spawn".to_string(),
1213 category: MisuseCategory::Concurrency,
1214 severity: MisuseSeverity::Medium,
1215 description: "tokio::spawn without retaining JoinHandle risks silent task failures"
1216 .to_string(),
1217 correct_usage: "Store JoinHandle values and await/join them".to_string(),
1218 },
1219 APIRule {
1220 id: "RS005".to_string(),
1221 name: "hashmap-order-dependence".to_string(),
1222 category: MisuseCategory::CallOrder,
1223 severity: MisuseSeverity::Low,
1224 description:
1225 "HashMap iteration order is non-deterministic; relying on it can break logic"
1226 .to_string(),
1227 correct_usage:
1228 "Collect keys and sort them, or use BTreeMap/IndexMap when stable order is required"
1229 .to_string(),
1230 },
1231 APIRule {
1232 id: "RS006".to_string(),
1233 name: "clone-in-hot-loop".to_string(),
1234 category: MisuseCategory::Resources,
1235 severity: MisuseSeverity::Low,
1236 description: "clone() inside loop bodies can create avoidable allocation pressure"
1237 .to_string(),
1238 correct_usage: "Borrow or move values instead of cloning in tight loops".to_string(),
1239 },
1240 ]
1241}
1242
1243fn regex_rule_specs_for_language(language: ApiLanguage) -> &'static [RegexRuleSpec] {
1244 match language {
1245 ApiLanguage::Python | ApiLanguage::Rust => &[],
1246 ApiLanguage::Go => GO_RULE_SPECS,
1247 ApiLanguage::Java => JAVA_RULE_SPECS,
1248 ApiLanguage::JavaScript => JAVASCRIPT_RULE_SPECS,
1249 ApiLanguage::TypeScript => TYPESCRIPT_RULE_SPECS,
1250 ApiLanguage::C => C_RULE_SPECS,
1251 ApiLanguage::Cpp => CPP_RULE_SPECS,
1252 ApiLanguage::Ruby => RUBY_RULE_SPECS,
1253 ApiLanguage::Php => PHP_RULE_SPECS,
1254 ApiLanguage::Kotlin => KOTLIN_RULE_SPECS,
1255 ApiLanguage::Swift => SWIFT_RULE_SPECS,
1256 ApiLanguage::CSharp => CSHARP_RULE_SPECS,
1257 ApiLanguage::Scala => SCALA_RULE_SPECS,
1258 ApiLanguage::Elixir => ELIXIR_RULE_SPECS,
1259 ApiLanguage::Lua | ApiLanguage::Luau => LUA_RULE_SPECS,
1260 ApiLanguage::Ocaml => OCAML_RULE_SPECS,
1261 }
1262}
1263
1264fn all_api_languages() -> &'static [ApiLanguage] {
1265 ALL_API_LANGUAGES
1266}
1267
1268#[derive(Debug, Args)]
1285pub struct ApiCheckArgs {
1286 #[arg(value_name = "path")]
1288 pub path: PathBuf,
1289
1290 #[arg(long, value_delimiter = ',')]
1292 pub category: Option<Vec<MisuseCategory>>,
1293
1294 #[arg(long, value_delimiter = ',')]
1296 pub severity: Option<Vec<MisuseSeverity>>,
1297
1298 #[arg(long, short = 'O')]
1300 pub output: Option<PathBuf>,
1301}
1302
1303impl ApiCheckArgs {
1304 pub fn run(
1306 &self,
1307 format: crate::output::OutputFormat,
1308 quiet: bool,
1309 global_lang: Option<Language>,
1310 ) -> Result<()> {
1311 let writer = OutputWriter::new(format, quiet);
1312
1313 writer.progress(&format!(
1314 "Checking {} for API misuse patterns...",
1315 self.path.display()
1316 ));
1317
1318 if !self.path.exists() {
1320 return Err(RemainingError::file_not_found(&self.path).into());
1321 }
1322
1323 let lang_filter: Option<ApiLanguage> = global_lang.and_then(map_language_to_api_language);
1333
1334 let all_rules_count = all_api_languages()
1335 .iter()
1336 .map(|language| rules_for_language(*language).len() as u32)
1337 .sum();
1338
1339 let files = collect_files(&self.path)?;
1341 writer.progress(&format!("Found {} files to analyze", files.len()));
1342
1343 let mut all_findings: Vec<MisuseFinding> = Vec::new();
1345 let mut files_scanned = 0u32;
1346
1347 for file_path in &files {
1348 let Some(language) = detect_language(file_path) else {
1349 continue;
1350 };
1351 if let Some(want) = lang_filter {
1354 if language != want {
1355 continue;
1356 }
1357 }
1358 let rules = rules_for_language(language);
1359 match analyze_file(file_path, &rules, language) {
1360 Ok(findings) => {
1361 all_findings.extend(findings);
1362 files_scanned += 1;
1363 }
1364 Err(e) => {
1365 writer.progress(&format!(
1366 "Warning: Failed to analyze {}: {}",
1367 file_path.display(),
1368 e
1369 ));
1370 }
1371 }
1372 }
1373
1374 let filtered_findings = filter_findings(
1376 all_findings,
1377 self.category.as_deref(),
1378 self.severity.as_deref(),
1379 );
1380
1381 let summary = build_summary(&filtered_findings, files_scanned);
1383
1384 let report = APICheckReport {
1386 findings: filtered_findings,
1387 summary,
1388 rules_applied: all_rules_count,
1389 };
1390
1391 if let Some(ref output_path) = self.output {
1393 if writer.is_text() {
1394 let text = format_api_check_text(&report);
1395 fs::write(output_path, text)?;
1396 } else {
1397 let json = serde_json::to_string_pretty(&report)?;
1398 fs::write(output_path, json)?;
1399 }
1400 } else if writer.is_text() {
1401 let text = format_api_check_text(&report);
1402 writer.write_text(&text)?;
1403 } else {
1404 writer.write(&report)?;
1405 }
1406
1407 Ok(())
1408 }
1409}
1410
1411fn collect_files(path: &Path) -> Result<Vec<PathBuf>> {
1417 let mut files = Vec::new();
1418
1419 if path.is_file() {
1420 if is_supported_file(path) {
1421 files.push(path.to_path_buf());
1422 }
1423 } else if path.is_dir() {
1424 for entry in walk_project(path) {
1425 if files.len() >= MAX_DIRECTORY_FILES as usize {
1426 break;
1427 }
1428
1429 let entry_path = entry.path();
1430 if entry_path.is_file() && is_supported_file(entry_path) {
1431 if let Ok(metadata) = fs::metadata(entry_path) {
1433 if metadata.len() <= MAX_FILE_SIZE {
1434 files.push(entry_path.to_path_buf());
1435 }
1436 }
1437 }
1438 }
1439 }
1440
1441 Ok(files)
1442}
1443
1444fn is_supported_file(path: &Path) -> bool {
1446 detect_language(path).is_some()
1447}
1448
1449fn map_language_to_api_language(lang: Language) -> Option<ApiLanguage> {
1456 match lang {
1457 Language::Python => Some(ApiLanguage::Python),
1458 Language::Rust => Some(ApiLanguage::Rust),
1459 Language::Go => Some(ApiLanguage::Go),
1460 Language::Java => Some(ApiLanguage::Java),
1461 Language::JavaScript => Some(ApiLanguage::JavaScript),
1462 Language::TypeScript => Some(ApiLanguage::TypeScript),
1463 Language::C => Some(ApiLanguage::C),
1464 Language::Cpp => Some(ApiLanguage::Cpp),
1465 Language::Ruby => Some(ApiLanguage::Ruby),
1466 Language::Php => Some(ApiLanguage::Php),
1467 Language::Kotlin => Some(ApiLanguage::Kotlin),
1468 Language::Swift => Some(ApiLanguage::Swift),
1469 Language::CSharp => Some(ApiLanguage::CSharp),
1470 Language::Scala => Some(ApiLanguage::Scala),
1471 Language::Elixir => Some(ApiLanguage::Elixir),
1472 Language::Lua => Some(ApiLanguage::Lua),
1473 Language::Luau => Some(ApiLanguage::Luau),
1474 Language::Ocaml => Some(ApiLanguage::Ocaml),
1475 }
1476}
1477
1478pub(crate) fn detect_language(path: &Path) -> Option<ApiLanguage> {
1479 match path.extension().and_then(|e| e.to_str()) {
1480 Some("py") => Some(ApiLanguage::Python),
1481 Some("rs") => Some(ApiLanguage::Rust),
1482 Some("go") => Some(ApiLanguage::Go),
1483 Some("java") => Some(ApiLanguage::Java),
1484 Some("js") | Some("jsx") | Some("mjs") | Some("cjs") => Some(ApiLanguage::JavaScript),
1485 Some("ts") | Some("tsx") => Some(ApiLanguage::TypeScript),
1486 Some("c") | Some("h") => Some(ApiLanguage::C),
1487 Some("cpp") | Some("hpp") | Some("cc") | Some("cxx") => Some(ApiLanguage::Cpp),
1488 Some("rb") => Some(ApiLanguage::Ruby),
1489 Some("php") => Some(ApiLanguage::Php),
1490 Some("kt") | Some("kts") => Some(ApiLanguage::Kotlin),
1491 Some("swift") => Some(ApiLanguage::Swift),
1492 Some("cs") => Some(ApiLanguage::CSharp),
1493 Some("scala") => Some(ApiLanguage::Scala),
1494 Some("ex") | Some("exs") => Some(ApiLanguage::Elixir),
1495 Some("lua") => Some(ApiLanguage::Lua),
1496 Some("luau") => Some(ApiLanguage::Luau),
1497 Some("ml") | Some("mli") => Some(ApiLanguage::Ocaml),
1498 _ => None,
1499 }
1500}
1501
1502pub(crate) fn rules_for_language(language: ApiLanguage) -> Vec<APIRule> {
1503 match language {
1504 ApiLanguage::Python => python_rules(),
1505 ApiLanguage::Rust => rust_rules(),
1506 _ => regex_rule_specs_for_language(language)
1507 .iter()
1508 .copied()
1509 .map(RegexRuleSpec::rule)
1510 .collect(),
1511 }
1512}
1513
1514fn language_fastpath_needles(language: ApiLanguage) -> Vec<String> {
1545 match language {
1546 ApiLanguage::Python => ["requests.", "except:", "md5", "sha1", "open(", "random."]
1550 .iter()
1551 .map(|s| (*s).to_string())
1552 .collect(),
1553 ApiLanguage::Rust => [
1556 "Mutex",
1557 "File::open",
1558 "with_capacity",
1559 "tokio::spawn",
1560 "HashMap",
1561 ".clone(",
1562 ]
1563 .iter()
1564 .map(|s| (*s).to_string())
1565 .collect(),
1566 _ => regex_rule_specs_for_language(language)
1572 .iter()
1573 .map(|spec| extract_literal_from_regex(spec.pattern))
1574 .collect(),
1575 }
1576}
1577
1578fn extract_literal_from_regex(pattern: &str) -> String {
1602 let bytes = pattern.as_bytes();
1603 let n = bytes.len();
1604
1605 let mut depth = 0i32;
1612 let mut k = 0usize;
1613 while k < n {
1614 match bytes[k] {
1615 b'\\' if k + 1 < n => k += 2,
1616 b'[' => {
1617 k += 1;
1618 while k < n && bytes[k] != b']' {
1619 if bytes[k] == b'\\' && k + 1 < n {
1620 k += 2;
1621 } else {
1622 k += 1;
1623 }
1624 }
1625 if k < n {
1626 k += 1;
1627 }
1628 }
1629 b'(' => {
1630 depth += 1;
1631 k += 1;
1632 }
1633 b')' => {
1634 depth -= 1;
1635 k += 1;
1636 }
1637 b'|' if depth == 0 => return String::new(),
1638 _ => k += 1,
1639 }
1640 }
1641
1642 let mut best = String::new();
1643 let mut run = String::new();
1644
1645 let close_run = |run: &mut String, best: &mut String| {
1646 if run.len() > best.len() {
1647 *best = run.clone();
1648 }
1649 run.clear();
1650 };
1651
1652 let mut i = 0usize;
1653 while i < n {
1654 let b = bytes[i];
1655 match b {
1656 b'^' | b'$' => {
1658 close_run(&mut run, &mut best);
1659 i += 1;
1660 }
1661 b'\\' if i + 1 < n => {
1662 let esc = bytes[i + 1];
1663 match esc {
1664 b'b' | b'B' | b'A' | b'Z' | b'z' => {
1666 close_run(&mut run, &mut best);
1667 i += 2;
1668 }
1669 b's' | b'S' | b'd' | b'D' | b'w' | b'W' => {
1672 close_run(&mut run, &mut best);
1673 i += 2;
1674 }
1675 _ => {
1678 run.push(esc as char);
1679 i += 2;
1680 }
1681 }
1682 }
1683 b'*' | b'+' | b'?' | b'{' => {
1687 if !run.is_empty() {
1688 run.pop();
1689 }
1690 close_run(&mut run, &mut best);
1691 if b == b'{' {
1694 while i < n && bytes[i] != b'}' {
1695 i += 1;
1696 }
1697 }
1698 i += 1;
1699 }
1700 b'|' | b'(' | b')' | b']' => {
1702 close_run(&mut run, &mut best);
1703 if b == b'(' && i + 2 < n && bytes[i + 1] == b'?' {
1707 i += 3;
1708 } else {
1709 i += 1;
1710 }
1711 }
1712 b'[' => {
1715 close_run(&mut run, &mut best);
1716 i += 1;
1717 if i < n && bytes[i] == b'^' {
1719 i += 1;
1720 }
1721 if i < n && bytes[i] == b']' {
1723 i += 1;
1724 }
1725 while i < n && bytes[i] != b']' {
1727 if bytes[i] == b'\\' && i + 1 < n {
1728 i += 2;
1729 } else {
1730 i += 1;
1731 }
1732 }
1733 if i < n {
1734 i += 1; }
1736 }
1737 b'.' => {
1739 close_run(&mut run, &mut best);
1740 i += 1;
1741 }
1742 _ => {
1744 run.push(b as char);
1745 i += 1;
1746 }
1747 }
1748 }
1749 close_run(&mut run, &mut best);
1750
1751 if best.len() < 2 {
1755 return String::new();
1756 }
1757 best
1758}
1759
1760pub(crate) fn analyze_file(
1762 path: &Path,
1763 rules: &[APIRule],
1764 language: ApiLanguage,
1765) -> Result<Vec<MisuseFinding>> {
1766 if let tldr_core::fs::oversize::SizeCheck::Oversize { .. } =
1774 tldr_core::fs::oversize::check_size(path)
1775 {
1776 return Ok(Vec::new());
1777 }
1778
1779 let content = fs::read_to_string(path)?;
1780
1781 let needles = language_fastpath_needles(language);
1796 let any_needle_admits_universally = needles.iter().any(|n| n.is_empty());
1797 let any_needle_hit = needles
1798 .iter()
1799 .any(|n| !n.is_empty() && content.contains(n.as_str()));
1800 if !needles.is_empty() && !any_needle_admits_universally && !any_needle_hit {
1801 return Ok(Vec::new());
1802 }
1803
1804 let file_str = path.display().to_string();
1805 let mut findings = Vec::new();
1806 let mut prev_trimmed = String::new();
1807 let file_has_hashmap = matches!(language, ApiLanguage::Rust) && content.contains("HashMap");
1808
1809 let regex_specs: Vec<(&'static RegexRuleSpec, Regex)> =
1818 regex_rule_specs_for_language(language)
1819 .iter()
1820 .filter_map(|spec| Regex::new(spec.pattern).ok().map(|re| (spec, re)))
1821 .collect();
1822
1823 let py_line_ctx: Vec<PyLineContext> = if matches!(language, ApiLanguage::Python) {
1831 compute_python_line_contexts(&content)
1832 } else {
1833 Vec::new()
1834 };
1835
1836 let block_comment_ctx: Vec<bool> = if language_uses_c_block_comments(language) {
1846 compute_c_block_comment_lines(&content)
1847 } else {
1848 Vec::new()
1849 };
1850
1851 for (line_num, line) in content.lines().enumerate() {
1852 let line_number = (line_num + 1) as u32;
1853 let trimmed = line.trim();
1854 if block_comment_ctx
1857 .get(line_num)
1858 .copied()
1859 .unwrap_or(false)
1860 {
1861 prev_trimmed = trimmed.to_string();
1864 continue;
1865 }
1866 let rust_ctx = RustLineContext {
1867 file_has_hashmap,
1868 previous_line: prev_trimmed.as_str(),
1869 previous_is_loop: prev_trimmed.starts_with("for ")
1870 || prev_trimmed.starts_with("while "),
1871 };
1872 let py_ctx = py_line_ctx
1873 .get(line_num)
1874 .copied()
1875 .unwrap_or_default();
1876
1877 for rule in rules {
1879 if let Some(finding) = check_rule(
1880 rule,
1881 &file_str,
1882 line_number,
1883 line,
1884 language,
1885 &rust_ctx,
1886 py_ctx,
1887 ®ex_specs,
1888 ) {
1889 findings.push(finding);
1890 }
1891 }
1892 prev_trimmed = trimmed.to_string();
1893 }
1894
1895 Ok(findings)
1896}
1897
1898#[derive(Debug, Clone, Copy, Default)]
1910pub(crate) struct PyLineContext {
1911 pub in_docstring: bool,
1912 pub is_def_or_class_signature: bool,
1913}
1914
1915fn language_uses_c_block_comments(language: ApiLanguage) -> bool {
1925 matches!(
1926 language,
1927 ApiLanguage::Rust
1928 | ApiLanguage::Go
1929 | ApiLanguage::Java
1930 | ApiLanguage::JavaScript
1931 | ApiLanguage::TypeScript
1932 | ApiLanguage::C
1933 | ApiLanguage::Cpp
1934 | ApiLanguage::Kotlin
1935 | ApiLanguage::Swift
1936 | ApiLanguage::CSharp
1937 | ApiLanguage::Scala
1938 | ApiLanguage::Php
1939 )
1940}
1941
1942pub(crate) fn compute_c_block_comment_lines(content: &str) -> Vec<bool> {
1961 let mut out = Vec::new();
1962 let mut in_block = false;
1963 for line in content.lines() {
1964 let line_starts_in_block = in_block;
1965 let mut any_in_block = in_block;
1966 let bytes = line.as_bytes();
1967 let mut i = 0usize;
1968 let mut in_dq = false;
1969 let mut in_sq = false;
1970 while i < bytes.len() {
1971 let b = bytes[i];
1972 if in_block {
1973 if b == b'*' && i + 1 < bytes.len() && bytes[i + 1] == b'/' {
1975 in_block = false;
1976 i += 2;
1977 continue;
1978 }
1979 i += 1;
1980 continue;
1981 }
1982 if !in_sq && b == b'"' {
1985 in_dq = !in_dq;
1986 i += 1;
1987 continue;
1988 }
1989 if !in_dq && b == b'\'' {
1990 in_sq = !in_sq;
1991 i += 1;
1992 continue;
1993 }
1994 if !in_dq && !in_sq {
1995 if b == b'/' && i + 1 < bytes.len() && bytes[i + 1] == b'/' {
1998 break;
1999 }
2000 if b == b'/' && i + 1 < bytes.len() && bytes[i + 1] == b'*' {
2002 in_block = true;
2003 any_in_block = true;
2004 i += 2;
2005 continue;
2006 }
2007 }
2008 i += 1;
2009 }
2010 let _ = line_starts_in_block; out.push(any_in_block);
2021 }
2022 out
2023}
2024
2025pub(crate) fn compute_python_line_contexts(content: &str) -> Vec<PyLineContext> {
2041 let mut out = Vec::new();
2042 let mut state: u8 = 0;
2044 for line in content.lines() {
2045 let stripped = strip_line_comment(line);
2046 let line_starts_in_docstring = state != 0;
2047
2048 let bytes = stripped.as_bytes();
2050 let mut i = 0;
2051 while i + 2 < bytes.len() {
2052 let triple_dq = bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"';
2053 let triple_sq = bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'';
2054 match state {
2055 0 if triple_dq => {
2056 state = 1;
2057 i += 3;
2058 continue;
2059 }
2060 0 if triple_sq => {
2061 state = 2;
2062 i += 3;
2063 continue;
2064 }
2065 1 if triple_dq => {
2066 state = 0;
2067 i += 3;
2068 continue;
2069 }
2070 2 if triple_sq => {
2071 state = 0;
2072 i += 3;
2073 continue;
2074 }
2075 _ => {}
2076 }
2077 i += 1;
2078 }
2079 let line_ends_in_docstring = state != 0;
2081
2082 let in_docstring = line_starts_in_docstring || line_ends_in_docstring;
2087
2088 let trimmed = line.trim_start();
2089 let is_def_or_class_signature = trimmed.starts_with("def ")
2090 || trimmed.starts_with("async def ")
2091 || trimmed.starts_with("class ");
2092
2093 out.push(PyLineContext {
2094 in_docstring,
2095 is_def_or_class_signature,
2096 });
2097 }
2098 out
2099}
2100
2101fn strip_line_comment(line: &str) -> String {
2107 let mut out = String::with_capacity(line.len());
2108 let mut in_single = false;
2109 let mut in_double = false;
2110 for c in line.chars() {
2111 if c == '\'' && !in_double {
2112 in_single = !in_single;
2113 } else if c == '"' && !in_single {
2114 in_double = !in_double;
2115 } else if c == '#' && !in_single && !in_double {
2116 break;
2117 }
2118 out.push(c);
2119 }
2120 out
2121}
2122
2123struct RustLineContext<'a> {
2124 file_has_hashmap: bool,
2125 previous_line: &'a str,
2126 previous_is_loop: bool,
2127}
2128
2129fn check_rule(
2131 rule: &APIRule,
2132 file: &str,
2133 line: u32,
2134 line_text: &str,
2135 language: ApiLanguage,
2136 rust_ctx: &RustLineContext<'_>,
2137 py_ctx: PyLineContext,
2138 regex_specs: &[(&'static RegexRuleSpec, Regex)],
2139) -> Option<MisuseFinding> {
2140 let trimmed = line_text.trim();
2141
2142 if !rule_applies_to_language(rule.id.as_str(), language) {
2149 return None;
2150 }
2151
2152 if is_comment_line(trimmed, language) {
2154 return None;
2155 }
2156
2157 if matches!(language, ApiLanguage::Python)
2162 && py_rule_skips_docstring_and_signatures(rule.id.as_str())
2163 && (py_ctx.in_docstring || py_ctx.is_def_or_class_signature)
2164 {
2165 return None;
2166 }
2167
2168 match rule.id.as_str() {
2169 "PY001" => check_missing_timeout(rule, file, line, trimmed),
2170 "PY002" => check_bare_except(rule, file, line, trimmed),
2171 "PY003" => check_md5_usage(rule, file, line, trimmed),
2172 "PY004" => check_sha1_usage(rule, file, line, trimmed),
2173 "PY005" => check_unclosed_file(rule, file, line, trimmed),
2174 "PY006" => check_insecure_random(rule, file, line, trimmed),
2175 "RS001" => check_mutex_lock_unwrap(rule, file, line, trimmed),
2176 "RS002" => check_file_open_without_context(rule, file, line, trimmed),
2177 "RS003" => check_unbounded_with_capacity(rule, file, line, trimmed),
2178 "RS004" => check_detached_tokio_spawn(rule, file, line, trimmed),
2179 "RS005" => check_hashmap_order_dependence(rule, file, line, trimmed, rust_ctx),
2180 "RS006" => check_clone_in_hot_loop(rule, file, line, trimmed, rust_ctx),
2181 _ => check_regex_rule(rule, file, line, trimmed, regex_specs),
2182 }
2183}
2184
2185fn find_standalone_call(line_text: &str, name: &str) -> Option<usize> {
2193 let needle = format!("{}(", name);
2194 let bytes = line_text.as_bytes();
2195 let mut start = 0usize;
2196 while let Some(rel) = line_text[start..].find(&needle) {
2197 let abs = start + rel;
2198 let prev_ok = abs == 0
2199 || {
2200 let p = bytes[abs - 1];
2201 !(p.is_ascii_alphanumeric() || p == b'_')
2202 };
2203 if prev_ok {
2204 return Some(abs);
2205 }
2206 start = abs + 1;
2207 }
2208 None
2209}
2210
2211fn py_rule_skips_docstring_and_signatures(rule_id: &str) -> bool {
2219 matches!(rule_id, "PY003" | "PY004" | "PY005" | "PY006")
2220}
2221
2222fn is_comment_line(trimmed: &str, language: ApiLanguage) -> bool {
2223 match language {
2224 ApiLanguage::Python | ApiLanguage::Ruby | ApiLanguage::Elixir => trimmed.starts_with('#'),
2225 ApiLanguage::Rust
2226 | ApiLanguage::Go
2227 | ApiLanguage::Java
2228 | ApiLanguage::JavaScript
2229 | ApiLanguage::TypeScript
2230 | ApiLanguage::C
2231 | ApiLanguage::Cpp
2232 | ApiLanguage::Kotlin
2233 | ApiLanguage::Swift
2234 | ApiLanguage::CSharp
2235 | ApiLanguage::Scala => trimmed.starts_with("//"),
2236 ApiLanguage::Php => trimmed.starts_with("//") || trimmed.starts_with('#'),
2237 ApiLanguage::Lua | ApiLanguage::Luau => trimmed.starts_with("--"),
2238 ApiLanguage::Ocaml => trimmed.starts_with("(*"),
2239 }
2240}
2241
2242fn check_regex_rule(
2243 rule: &APIRule,
2244 file: &str,
2245 line: u32,
2246 line_text: &str,
2247 regex_specs: &[(&'static RegexRuleSpec, Regex)],
2248) -> Option<MisuseFinding> {
2249 let (spec, regex) = regex_specs.iter().find(|(spec, _)| spec.id == rule.id)?;
2252 if !regex.is_match(line_text) {
2253 return None;
2254 }
2255
2256 if rule.id == "JV001" {
2268 if line_has_null_comparison(line_text) {
2272 return None;
2273 }
2274 }
2275
2276 let column = regex.find(line_text).map(|m| m.start()).unwrap_or(0) as u32;
2277 Some(MisuseFinding {
2278 file: file.to_string(),
2279 line,
2280 column,
2281 rule: (*rule).clone(),
2282 api_call: spec.api_call.to_string(),
2283 message: spec.message.to_string(),
2284 fix_suggestion: spec.fix_suggestion.to_string(),
2285 code_context: line_text.to_string(),
2286 })
2287}
2288
2289fn line_has_null_comparison(line_text: &str) -> bool {
2294 let bytes = line_text.as_bytes();
2300 let mut i = 0;
2301 while i + 1 < bytes.len() {
2302 let is_eq = bytes[i] == b'=' && bytes[i + 1] == b'=';
2303 let is_neq = bytes[i] == b'!' && bytes[i + 1] == b'=';
2304 if !is_eq && !is_neq {
2305 i += 1;
2306 continue;
2307 }
2308 if is_eq && bytes.get(i + 2) == Some(&b'=') {
2310 i += 3;
2311 continue;
2312 }
2313 let lo = i.saturating_sub(16);
2315 let hi = (i + 2 + 16).min(bytes.len());
2316 let left = std::str::from_utf8(&bytes[lo..i]).unwrap_or("");
2317 let right = std::str::from_utf8(&bytes[i + 2..hi]).unwrap_or("");
2318 if has_word_null(left) || has_word_null(right) {
2319 return true;
2320 }
2321 i += 2;
2322 }
2323 false
2324}
2325
2326fn has_word_null(s: &str) -> bool {
2329 let bytes = s.as_bytes();
2330 let mut i = 0usize;
2331 while i + 4 <= bytes.len() {
2332 if &bytes[i..i + 4] == b"null" {
2333 let before_ok = i == 0
2334 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
2335 let after_ok = i + 4 == bytes.len()
2336 || !bytes[i + 4].is_ascii_alphanumeric() && bytes[i + 4] != b'_';
2337 if before_ok && after_ok {
2338 return true;
2339 }
2340 }
2341 i += 1;
2342 }
2343 false
2344}
2345
2346fn check_missing_timeout(
2348 rule: &APIRule,
2349 file: &str,
2350 line: u32,
2351 line_text: &str,
2352) -> Option<MisuseFinding> {
2353 let request_patterns = [
2355 "requests.get(",
2356 "requests.post(",
2357 "requests.put(",
2358 "requests.delete(",
2359 "requests.patch(",
2360 "requests.head(",
2361 "requests.options(",
2362 ];
2363
2364 for pattern in &request_patterns {
2365 if line_text.contains(pattern) && !line_text.contains("timeout") {
2366 let column = line_text.find(pattern).unwrap_or(0) as u32;
2367 return Some(MisuseFinding {
2368 file: file.to_string(),
2369 line,
2370 column,
2371 rule: rule.clone(),
2372 api_call: pattern.trim_end_matches('(').to_string(),
2373 message: format!(
2374 "{} called without timeout parameter",
2375 pattern.trim_end_matches('(')
2376 ),
2377 fix_suggestion: format!("Add timeout parameter: {}url, timeout=30)", pattern),
2378 code_context: line_text.to_string(),
2379 });
2380 }
2381 }
2382
2383 None
2384}
2385
2386fn check_bare_except(
2388 rule: &APIRule,
2389 file: &str,
2390 line: u32,
2391 line_text: &str,
2392) -> Option<MisuseFinding> {
2393 if line_text.starts_with("except:") || line_text.contains(" except:") {
2396 let column = line_text.find("except:").unwrap_or(0) as u32;
2397 return Some(MisuseFinding {
2398 file: file.to_string(),
2399 line,
2400 column,
2401 rule: rule.clone(),
2402 api_call: "except".to_string(),
2403 message: "Bare except clause catches all exceptions including KeyboardInterrupt and SystemExit".to_string(),
2404 fix_suggestion: "Use 'except Exception as e:' to catch only program exceptions".to_string(),
2405 code_context: line_text.to_string(),
2406 });
2407 }
2408
2409 None
2410}
2411
2412fn check_md5_usage(
2414 rule: &APIRule,
2415 file: &str,
2416 line: u32,
2417 line_text: &str,
2418) -> Option<MisuseFinding> {
2419 let has_qualified = line_text.contains("hashlib.md5");
2426 let has_standalone_call = find_standalone_call(line_text, "md5").is_some();
2427 if has_qualified || has_standalone_call {
2428 let column = line_text
2429 .find("hashlib.md5")
2430 .or_else(|| find_standalone_call(line_text, "md5"))
2431 .unwrap_or(0) as u32;
2432 return Some(MisuseFinding {
2433 file: file.to_string(),
2434 line,
2435 column,
2436 rule: rule.clone(),
2437 api_call: "hashlib.md5".to_string(),
2438 message: "MD5 is cryptographically broken and should not be used for security purposes"
2439 .to_string(),
2440 fix_suggestion: "Use hashlib.sha256() or stronger. For passwords, use bcrypt or argon2"
2441 .to_string(),
2442 code_context: line_text.to_string(),
2443 });
2444 }
2445
2446 None
2447}
2448
2449fn check_sha1_usage(
2451 rule: &APIRule,
2452 file: &str,
2453 line: u32,
2454 line_text: &str,
2455) -> Option<MisuseFinding> {
2456 let has_qualified = line_text.contains("hashlib.sha1");
2463 let has_standalone_call = find_standalone_call(line_text, "sha1").is_some();
2464 if has_qualified || has_standalone_call {
2465 let column = line_text
2466 .find("hashlib.sha1")
2467 .or_else(|| find_standalone_call(line_text, "sha1"))
2468 .unwrap_or(0) as u32;
2469 return Some(MisuseFinding {
2470 file: file.to_string(),
2471 line,
2472 column,
2473 rule: rule.clone(),
2474 api_call: "hashlib.sha1".to_string(),
2475 message: "SHA1 is cryptographically weak and should not be used for security purposes"
2476 .to_string(),
2477 fix_suggestion: "Use hashlib.sha256() or stronger".to_string(),
2478 code_context: line_text.to_string(),
2479 });
2480 }
2481
2482 None
2483}
2484
2485fn check_unclosed_file(
2487 rule: &APIRule,
2488 file: &str,
2489 line: u32,
2490 line_text: &str,
2491) -> Option<MisuseFinding> {
2492 if line_text.contains("open(")
2495 && !line_text.contains("with ")
2496 && !line_text.starts_with("with ")
2497 {
2498 if line_text.contains("= open(") || line_text.contains("=open(") {
2500 let column = line_text.find("open(").unwrap_or(0) as u32;
2501 return Some(MisuseFinding {
2502 file: file.to_string(),
2503 line,
2504 column,
2505 rule: rule.clone(),
2506 api_call: "open".to_string(),
2507 message: "File opened without context manager may not be properly closed"
2508 .to_string(),
2509 fix_suggestion: "Use 'with open(path) as f:' to ensure file is closed".to_string(),
2510 code_context: line_text.to_string(),
2511 });
2512 }
2513 }
2514
2515 None
2516}
2517
2518fn check_insecure_random(
2520 rule: &APIRule,
2521 file: &str,
2522 line: u32,
2523 line_text: &str,
2524) -> Option<MisuseFinding> {
2525 let insecure_patterns = [
2527 "random.randint(",
2528 "random.random(",
2529 "random.choice(",
2530 "random.randrange(",
2531 ];
2532
2533 let security_indicators = ["token", "secret", "password", "key", "auth", "session"];
2536
2537 for pattern in &insecure_patterns {
2538 if line_text.contains(pattern) {
2539 let line_lower = line_text.to_lowercase();
2541 for indicator in &security_indicators {
2542 if line_lower.contains(indicator) {
2543 let column = line_text.find(pattern).unwrap_or(0) as u32;
2544 return Some(MisuseFinding {
2545 file: file.to_string(),
2546 line,
2547 column,
2548 rule: rule.clone(),
2549 api_call: pattern.trim_end_matches('(').to_string(),
2550 message: format!(
2551 "{} is not cryptographically secure, don't use for security purposes",
2552 pattern.trim_end_matches('(')
2553 ),
2554 fix_suggestion:
2555 "Use secrets.token_bytes() or secrets.token_hex() for security"
2556 .to_string(),
2557 code_context: line_text.to_string(),
2558 });
2559 }
2560 }
2561 }
2562 }
2563
2564 None
2565}
2566
2567fn check_mutex_lock_unwrap(
2569 rule: &APIRule,
2570 file: &str,
2571 line: u32,
2572 line_text: &str,
2573) -> Option<MisuseFinding> {
2574 if line_text.contains(".lock().unwrap()") {
2575 let column = line_text.find(".lock().unwrap()").unwrap_or(0) as u32;
2576 return Some(MisuseFinding {
2577 file: file.to_string(),
2578 line,
2579 column,
2580 rule: rule.clone(),
2581 api_call: "Mutex::lock".to_string(),
2582 message:
2583 "Mutex::lock().unwrap() can panic on poisoned locks and hide deadlock behavior"
2584 .to_string(),
2585 fix_suggestion:
2586 "Handle lock errors explicitly (match/if let), or use try_lock with backoff"
2587 .to_string(),
2588 code_context: line_text.to_string(),
2589 });
2590 }
2591 None
2592}
2593
2594fn check_file_open_without_context(
2596 rule: &APIRule,
2597 file: &str,
2598 line: u32,
2599 line_text: &str,
2600) -> Option<MisuseFinding> {
2601 if line_text.contains("File::open(")
2602 && !line_text.contains(".context(")
2603 && !line_text.contains(".with_context(")
2604 && !line_text.contains("map_err(")
2605 {
2606 let column = line_text.find("File::open(").unwrap_or(0) as u32;
2607 return Some(MisuseFinding {
2608 file: file.to_string(),
2609 line,
2610 column,
2611 rule: rule.clone(),
2612 api_call: "File::open".to_string(),
2613 message: "File::open used without contextual error mapping".to_string(),
2614 fix_suggestion:
2615 "Wrap errors with context (with_context/context/map_err) before propagating"
2616 .to_string(),
2617 code_context: line_text.to_string(),
2618 });
2619 }
2620 None
2621}
2622
2623fn check_unbounded_with_capacity(
2625 rule: &APIRule,
2626 file: &str,
2627 line: u32,
2628 line_text: &str,
2629) -> Option<MisuseFinding> {
2630 if line_text.contains("Vec::with_capacity(") {
2631 let line_lower = line_text.to_lowercase();
2632 let user_input_markers = ["input", "args", "user", "request", "len", "size"];
2633 if user_input_markers.iter().any(|m| line_lower.contains(m)) {
2634 let column = line_text.find("Vec::with_capacity(").unwrap_or(0) as u32;
2635 return Some(MisuseFinding {
2636 file: file.to_string(),
2637 line,
2638 column,
2639 rule: rule.clone(),
2640 api_call: "Vec::with_capacity".to_string(),
2641 message: "Vec::with_capacity appears to use unbounded external input".to_string(),
2642 fix_suggestion:
2643 "Clamp requested capacity with a hard upper bound before allocation".to_string(),
2644 code_context: line_text.to_string(),
2645 });
2646 }
2647 }
2648 None
2649}
2650
2651fn check_detached_tokio_spawn(
2653 rule: &APIRule,
2654 file: &str,
2655 line: u32,
2656 line_text: &str,
2657) -> Option<MisuseFinding> {
2658 if line_text.contains("tokio::spawn(")
2659 && !line_text.contains('=')
2660 && !line_text.contains("handles.push")
2661 {
2662 let column = line_text.find("tokio::spawn(").unwrap_or(0) as u32;
2663 return Some(MisuseFinding {
2664 file: file.to_string(),
2665 line,
2666 column,
2667 rule: rule.clone(),
2668 api_call: "tokio::spawn".to_string(),
2669 message: "tokio::spawn used without keeping JoinHandle".to_string(),
2670 fix_suggestion: "Store JoinHandle values and await them to surface task errors"
2671 .to_string(),
2672 code_context: line_text.to_string(),
2673 });
2674 }
2675 None
2676}
2677
2678fn check_hashmap_order_dependence(
2680 rule: &APIRule,
2681 file: &str,
2682 line: u32,
2683 line_text: &str,
2684 rust_ctx: &RustLineContext<'_>,
2685) -> Option<MisuseFinding> {
2686 let looks_like_hashmap_iteration = line_text.contains(".iter()")
2687 && (line_text.contains("for ") || rust_ctx.previous_line.starts_with("for "))
2688 && rust_ctx.file_has_hashmap;
2689 if looks_like_hashmap_iteration {
2690 let column = line_text.find(".iter()").unwrap_or(0) as u32;
2691 return Some(MisuseFinding {
2692 file: file.to_string(),
2693 line,
2694 column,
2695 rule: rule.clone(),
2696 api_call: "HashMap::iter".to_string(),
2697 message: "Potential logic dependence on HashMap iteration order".to_string(),
2698 fix_suggestion: "Use BTreeMap/IndexMap or sort keys before ordered operations"
2699 .to_string(),
2700 code_context: line_text.to_string(),
2701 });
2702 }
2703 None
2704}
2705
2706fn check_clone_in_hot_loop(
2708 rule: &APIRule,
2709 file: &str,
2710 line: u32,
2711 line_text: &str,
2712 rust_ctx: &RustLineContext<'_>,
2713) -> Option<MisuseFinding> {
2714 if line_text.contains(".clone()")
2715 && (line_text.contains("for ") || line_text.contains("while ") || rust_ctx.previous_is_loop)
2716 {
2717 let column = line_text.find(".clone()").unwrap_or(0) as u32;
2718 return Some(MisuseFinding {
2719 file: file.to_string(),
2720 line,
2721 column,
2722 rule: rule.clone(),
2723 api_call: "clone".to_string(),
2724 message: "clone() in loop context may create avoidable allocation overhead".to_string(),
2725 fix_suggestion: "Prefer borrowing/references or move semantics inside hot loops"
2726 .to_string(),
2727 code_context: line_text.to_string(),
2728 });
2729 }
2730 None
2731}
2732
2733fn filter_findings(
2739 findings: Vec<MisuseFinding>,
2740 categories: Option<&[MisuseCategory]>,
2741 severities: Option<&[MisuseSeverity]>,
2742) -> Vec<MisuseFinding> {
2743 findings
2744 .into_iter()
2745 .filter(|f| {
2746 if let Some(cats) = categories {
2748 if !cats.contains(&f.rule.category) {
2749 return false;
2750 }
2751 }
2752
2753 if let Some(sevs) = severities {
2755 if !sevs.contains(&f.rule.severity) {
2756 return false;
2757 }
2758 }
2759
2760 true
2761 })
2762 .collect()
2763}
2764
2765fn serialize_misuse_category(cat: &MisuseCategory) -> String {
2774 match cat {
2775 MisuseCategory::CallOrder => "call_order".to_string(),
2776 MisuseCategory::ErrorHandling => "error_handling".to_string(),
2777 MisuseCategory::Parameters => "parameters".to_string(),
2778 MisuseCategory::Resources => "resources".to_string(),
2779 MisuseCategory::Crypto => "crypto".to_string(),
2780 MisuseCategory::Concurrency => "concurrency".to_string(),
2781 MisuseCategory::Security => "security".to_string(),
2782 }
2783}
2784
2785fn serialize_misuse_severity(sev: &MisuseSeverity) -> String {
2788 match sev {
2789 MisuseSeverity::Info => "info".to_string(),
2790 MisuseSeverity::Low => "low".to_string(),
2791 MisuseSeverity::Medium => "medium".to_string(),
2792 MisuseSeverity::High => "high".to_string(),
2793 }
2794}
2795
2796fn build_summary(findings: &[MisuseFinding], files_scanned: u32) -> APICheckSummary {
2798 let mut by_category: HashMap<String, u32> = HashMap::new();
2799 let mut by_severity: HashMap<String, u32> = HashMap::new();
2800 let mut apis_checked: Vec<String> = Vec::new();
2801
2802 for finding in findings {
2803 let cat_str = serialize_misuse_category(&finding.rule.category);
2809 *by_category.entry(cat_str).or_insert(0) += 1;
2810
2811 let sev_str = serialize_misuse_severity(&finding.rule.severity);
2813 *by_severity.entry(sev_str).or_insert(0) += 1;
2814
2815 if !apis_checked.contains(&finding.api_call) {
2817 apis_checked.push(finding.api_call.clone());
2818 }
2819 }
2820
2821 APICheckSummary {
2822 total_findings: findings.len() as u32,
2823 by_category,
2824 by_severity,
2825 apis_checked,
2826 files_scanned,
2827 }
2828}
2829
2830fn format_api_check_text(report: &APICheckReport) -> String {
2836 let mut output = String::new();
2837
2838 output.push_str("=== API Check Report ===\n\n");
2839
2840 output.push_str(&format!(
2842 "Files scanned: {}\n",
2843 report.summary.files_scanned
2844 ));
2845 output.push_str(&format!("Rules applied: {}\n", report.rules_applied));
2846 output.push_str(&format!(
2847 "Total findings: {}\n\n",
2848 report.summary.total_findings
2849 ));
2850
2851 if !report.summary.by_severity.is_empty() {
2853 output.push_str("By Severity:\n");
2854 for (severity, count) in &report.summary.by_severity {
2855 output.push_str(&format!(" {}: {}\n", severity, count));
2856 }
2857 output.push('\n');
2858 }
2859
2860 if !report.summary.by_category.is_empty() {
2862 output.push_str("By Category:\n");
2863 for (category, count) in &report.summary.by_category {
2864 output.push_str(&format!(" {}: {}\n", category, count));
2865 }
2866 output.push('\n');
2867 }
2868
2869 if !report.findings.is_empty() {
2871 output.push_str("Findings:\n");
2872 output.push_str(&"-".repeat(60));
2873 output.push('\n');
2874
2875 for finding in &report.findings {
2876 output.push_str(&format!(
2877 "[{:?}] {} ({})\n",
2878 finding.rule.severity, finding.rule.name, finding.rule.id
2879 ));
2880 output.push_str(&format!(
2881 " Location: {}:{}:{}\n",
2882 finding.file, finding.line, finding.column
2883 ));
2884 output.push_str(&format!(" API: {}\n", finding.api_call));
2885 output.push_str(&format!(" Message: {}\n", finding.message));
2886 output.push_str(&format!(" Fix: {}\n", finding.fix_suggestion));
2887 if !finding.code_context.is_empty() {
2888 output.push_str(&format!(" Context: {}\n", finding.code_context.trim()));
2889 }
2890 output.push('\n');
2891 }
2892 } else {
2893 output.push_str("No API misuse patterns detected.\n");
2894 }
2895
2896 output
2897}
2898
2899#[cfg(test)]
2904mod tests {
2905 use super::*;
2906 use tempfile::TempDir;
2907
2908 #[test]
2909 fn test_python_rules_defined() {
2910 let rules = python_rules();
2911 assert!(!rules.is_empty());
2912 assert!(rules.iter().any(|r| r.id == "PY001")); assert!(rules.iter().any(|r| r.id == "PY002")); assert!(rules.iter().any(|r| r.id == "PY003")); assert!(rules.iter().any(|r| r.id == "PY005")); }
2917
2918 #[test]
2919 fn test_rust_rules_defined() {
2920 let rules = rust_rules();
2921 assert!(!rules.is_empty());
2922 assert!(rules.iter().any(|r| r.id == "RS001"));
2923 assert!(rules.iter().any(|r| r.id == "RS002"));
2924 assert!(rules.iter().any(|r| r.id == "RS003"));
2925 assert!(rules.iter().any(|r| r.id == "RS004"));
2926 assert!(rules.iter().any(|r| r.id == "RS005"));
2927 assert!(rules.iter().any(|r| r.id == "RS006"));
2928 }
2929
2930 #[test]
2931 fn test_all_supported_languages_have_rules() {
2932 for language in all_api_languages() {
2933 let rules = rules_for_language(*language);
2934 assert!(
2935 !rules.is_empty(),
2936 "expected at least one api-check rule for {:?}",
2937 language
2938 );
2939 }
2940 }
2941
2942 #[test]
2943 fn test_detect_language_extended_extensions() {
2944 let cases = [
2945 ("main.go", ApiLanguage::Go),
2946 ("Main.java", ApiLanguage::Java),
2947 ("app.js", ApiLanguage::JavaScript),
2948 ("component.tsx", ApiLanguage::TypeScript),
2949 ("main.c", ApiLanguage::C),
2950 ("main.cpp", ApiLanguage::Cpp),
2951 ("app.rb", ApiLanguage::Ruby),
2952 ("index.php", ApiLanguage::Php),
2953 ("Main.kt", ApiLanguage::Kotlin),
2954 ("main.swift", ApiLanguage::Swift),
2955 ("Program.cs", ApiLanguage::CSharp),
2956 ("Main.scala", ApiLanguage::Scala),
2957 ("app.ex", ApiLanguage::Elixir),
2958 ("main.lua", ApiLanguage::Lua),
2959 ("game.luau", ApiLanguage::Luau),
2960 ("main.ml", ApiLanguage::Ocaml),
2961 ];
2962
2963 for (path, expected) in cases {
2964 assert_eq!(detect_language(Path::new(path)), Some(expected), "{path}");
2965 }
2966 }
2967
2968 #[test]
2969 fn test_check_missing_timeout() {
2970 let rule = &python_rules()[0]; let finding = check_missing_timeout(rule, "test.py", 1, "response = requests.get(url)");
2974 assert!(finding.is_some());
2975
2976 let finding = check_missing_timeout(
2978 rule,
2979 "test.py",
2980 1,
2981 "response = requests.get(url, timeout=30)",
2982 );
2983 assert!(finding.is_none());
2984 }
2985
2986 #[test]
2987 fn test_check_bare_except() {
2988 let rule = &python_rules()[1]; let finding = check_bare_except(rule, "test.py", 1, "except:");
2992 assert!(finding.is_some());
2993
2994 let finding = check_bare_except(rule, "test.py", 1, "except Exception:");
2996 assert!(finding.is_none());
2997 }
2998
2999 #[test]
3000 fn test_check_md5_usage() {
3001 let rule = &python_rules()[2]; let finding = check_md5_usage(rule, "test.py", 1, "hash = hashlib.md5(data)");
3005 assert!(finding.is_some());
3006
3007 let finding = check_md5_usage(rule, "test.py", 1, "hash = hashlib.sha256(data)");
3009 assert!(finding.is_none());
3010 }
3011
3012 #[test]
3013 fn test_check_unclosed_file() {
3014 let rule = &python_rules()[4]; let finding = check_unclosed_file(rule, "test.py", 1, "f = open('data.txt')");
3018 assert!(finding.is_some());
3019
3020 let finding = check_unclosed_file(rule, "test.py", 1, "with open('data.txt') as f:");
3022 assert!(finding.is_none());
3023 }
3024
3025 #[test]
3026 fn test_filter_by_category() {
3027 let findings = vec![
3028 MisuseFinding {
3029 file: "test.py".to_string(),
3030 line: 1,
3031 column: 0,
3032 rule: APIRule {
3033 id: "PY001".to_string(),
3034 name: "test".to_string(),
3035 category: MisuseCategory::Parameters,
3036 severity: MisuseSeverity::High,
3037 description: "test".to_string(),
3038 correct_usage: "test".to_string(),
3039 },
3040 api_call: "test".to_string(),
3041 message: "test".to_string(),
3042 fix_suggestion: "test".to_string(),
3043 code_context: "test".to_string(),
3044 },
3045 MisuseFinding {
3046 file: "test.py".to_string(),
3047 line: 2,
3048 column: 0,
3049 rule: APIRule {
3050 id: "PY003".to_string(),
3051 name: "test".to_string(),
3052 category: MisuseCategory::Crypto,
3053 severity: MisuseSeverity::High,
3054 description: "test".to_string(),
3055 correct_usage: "test".to_string(),
3056 },
3057 api_call: "test".to_string(),
3058 message: "test".to_string(),
3059 fix_suggestion: "test".to_string(),
3060 code_context: "test".to_string(),
3061 },
3062 ];
3063
3064 let filtered = filter_findings(findings, Some(&[MisuseCategory::Crypto]), None);
3065 assert_eq!(filtered.len(), 1);
3066 assert_eq!(filtered[0].rule.category, MisuseCategory::Crypto);
3067 }
3068
3069 #[test]
3070 fn test_build_summary() {
3071 let findings = vec![MisuseFinding {
3072 file: "test.py".to_string(),
3073 line: 1,
3074 column: 0,
3075 rule: APIRule {
3076 id: "PY001".to_string(),
3077 name: "test".to_string(),
3078 category: MisuseCategory::Parameters,
3079 severity: MisuseSeverity::High,
3080 description: "test".to_string(),
3081 correct_usage: "test".to_string(),
3082 },
3083 api_call: "requests.get".to_string(),
3084 message: "test".to_string(),
3085 fix_suggestion: "test".to_string(),
3086 code_context: "test".to_string(),
3087 }];
3088
3089 let summary = build_summary(&findings, 5);
3090 assert_eq!(summary.total_findings, 1);
3091 assert_eq!(summary.files_scanned, 5);
3092 assert!(summary.apis_checked.contains(&"requests.get".to_string()));
3093 }
3094
3095 #[test]
3096 fn test_collect_files_includes_rust() {
3097 let temp = TempDir::new().unwrap();
3098 let py = temp.path().join("a.py");
3099 let rs = temp.path().join("b.rs");
3100 let go = temp.path().join("c.go");
3101 let txt = temp.path().join("c.txt");
3102 fs::write(&py, "print('ok')").unwrap();
3103 fs::write(&rs, "fn main() {}").unwrap();
3104 fs::write(&go, "package main").unwrap();
3105 fs::write(&txt, "ignore").unwrap();
3106
3107 let files = collect_files(temp.path()).unwrap();
3108 assert!(files.iter().any(|f| f.ends_with("a.py")));
3109 assert!(files.iter().any(|f| f.ends_with("b.rs")));
3110 assert!(files.iter().any(|f| f.ends_with("c.go")));
3111 assert!(!files.iter().any(|f| f.ends_with("c.txt")));
3112 }
3113
3114 #[test]
3115 fn test_check_mutex_lock_unwrap() {
3116 let rule = &rust_rules()[0];
3117 let finding =
3118 check_mutex_lock_unwrap(rule, "lib.rs", 10, "let guard = shared.lock().unwrap();");
3119 assert!(finding.is_some());
3120 }
3121
3122 #[test]
3123 fn test_check_file_open_without_context() {
3124 let rule = &rust_rules()[1];
3125 let finding = check_file_open_without_context(rule, "lib.rs", 8, "let f = File::open(p)?;");
3126 assert!(finding.is_some());
3127
3128 let contextual = check_file_open_without_context(
3129 rule,
3130 "lib.rs",
3131 9,
3132 "let f = File::open(p).with_context(|| \"open\".to_string())?;",
3133 );
3134 assert!(contextual.is_none());
3135 }
3136
3137 #[test]
3138 fn test_check_unbounded_with_capacity() {
3139 let rule = &rust_rules()[2];
3140 let finding =
3141 check_unbounded_with_capacity(rule, "lib.rs", 12, "let v = Vec::with_capacity(len);");
3142 assert!(finding.is_some());
3143
3144 let bounded =
3145 check_unbounded_with_capacity(rule, "lib.rs", 13, "let v = Vec::with_capacity(256);");
3146 assert!(bounded.is_none());
3147 }
3148
3149 #[test]
3150 fn test_check_tokio_spawn_detached() {
3151 let rule = &rust_rules()[3];
3152 let detached = check_detached_tokio_spawn(
3153 rule,
3154 "lib.rs",
3155 3,
3156 "tokio::spawn(async move { work().await; });",
3157 );
3158 let tracked = check_detached_tokio_spawn(
3159 rule,
3160 "lib.rs",
3161 4,
3162 "let handle = tokio::spawn(async move { work().await; });",
3163 );
3164 assert!(detached.is_some());
3165 assert!(tracked.is_none());
3166 }
3167
3168 #[test]
3169 fn test_check_hashmap_order_dependence() {
3170 let rule = &rust_rules()[4];
3171 let ctx = RustLineContext {
3172 file_has_hashmap: true,
3173 previous_line: "for (k, v) in map",
3174 previous_is_loop: true,
3175 };
3176 let finding = check_hashmap_order_dependence(rule, "lib.rs", 12, " .iter()", &ctx);
3177 assert!(finding.is_some());
3178 }
3179
3180 #[test]
3181 fn test_check_clone_in_hot_loop() {
3182 let rule = &rust_rules()[5];
3183 let ctx = RustLineContext {
3184 file_has_hashmap: false,
3185 previous_line: "for item in items {",
3186 previous_is_loop: true,
3187 };
3188 let finding = check_clone_in_hot_loop(rule, "lib.rs", 20, "value.clone()", &ctx);
3189 assert!(finding.is_some());
3190 }
3191
3192 fn assert_language_findings(
3193 filename: &str,
3194 language: ApiLanguage,
3195 source: &str,
3196 expected_rule_id: &str,
3197 ) {
3198 let temp = TempDir::new().unwrap();
3199 let path = temp.path().join(filename);
3200 fs::write(&path, source).unwrap();
3201 let rules = rules_for_language(language);
3202 let findings = analyze_file(&path, &rules, language).unwrap();
3203 assert!(
3204 findings
3205 .iter()
3206 .any(|finding| finding.rule.id == expected_rule_id),
3207 "expected {expected_rule_id} for {filename}, got {:?}",
3208 findings
3209 .iter()
3210 .map(|f| f.rule.id.clone())
3211 .collect::<Vec<_>>()
3212 );
3213 }
3214
3215 #[test]
3216 fn test_extended_language_rule_detection() {
3217 let cases = [
3218 (
3219 "main.go",
3220 ApiLanguage::Go,
3221 "data, _ := ioutil.ReadFile(path)",
3222 "GO001",
3223 ),
3224 (
3225 "Main.java",
3226 ApiLanguage::Java,
3227 "if (name == otherName) { }",
3228 "JV001",
3229 ),
3230 ("app.js", ApiLanguage::JavaScript, "if (a == b) {}", "JS001"),
3231 ("app.ts", ApiLanguage::TypeScript, "if (a == b) {}", "TS001"),
3232 ("main.c", ApiLanguage::C, "gets(buffer);", "C001"),
3233 (
3234 "main.cpp",
3235 ApiLanguage::Cpp,
3236 "std::auto_ptr<Foo> p;",
3237 "CPP003",
3238 ),
3239 ("app.rb", ApiLanguage::Ruby, "eval(params[:code])", "RB001"),
3240 (
3241 "index.php",
3242 ApiLanguage::Php,
3243 "unserialize($payload);",
3244 "PH005",
3245 ),
3246 ("Main.kt", ApiLanguage::Kotlin, "val name = user!!", "KT001"),
3247 (
3248 "main.swift",
3249 ApiLanguage::Swift,
3250 "let name = value!",
3251 "SW003",
3252 ),
3253 (
3254 "Program.cs",
3255 ApiLanguage::CSharp,
3256 "var x = task.Result;",
3257 "CS003",
3258 ),
3259 (
3260 "Main.scala",
3261 ApiLanguage::Scala,
3262 "val casted = value.asInstanceOf[String]",
3263 "SC002",
3264 ),
3265 (
3266 "app.ex",
3267 ApiLanguage::Elixir,
3268 "String.to_atom(param)",
3269 "EX001",
3270 ),
3271 ("main.lua", ApiLanguage::Lua, "value = 1", "LU001"),
3272 ("game.luau", ApiLanguage::Luau, "os.execute(cmd)", "LU003"),
3273 ("main.ml", ApiLanguage::Ocaml, "Obj.magic value", "OC004"),
3274 ];
3275
3276 for (filename, language, source, expected_rule_id) in cases {
3277 assert_language_findings(filename, language, source, expected_rule_id);
3278 }
3279 }
3280
3281 #[test]
3284 fn test_fastpath_extension_no_perf_regression_on_normal_input() {
3285 use std::time::Instant;
3286
3287 let temp = TempDir::new().unwrap();
3288 let root = temp.path();
3289
3290 fs::write(
3297 root.join("py_hits.py"),
3298 "import requests\nrequests.get('http://x')\nimport hashlib\nh = hashlib.md5(b'x').hexdigest()\n",
3299 )
3300 .unwrap();
3301 fs::write(
3302 root.join("rs_hits.rs"),
3303 "use std::sync::Mutex;\nlet lock = Mutex::new(0);\nlet v: Vec<u8> = Vec::with_capacity(input);\n",
3304 )
3305 .unwrap();
3306 fs::write(
3307 root.join("go_hits.go"),
3308 "package main\nimport \"io/ioutil\"\nfunc f() { _, _ = ioutil.ReadFile(\"/etc/passwd\") }\n",
3309 )
3310 .unwrap();
3311 fs::write(
3312 root.join("js_hits.js"),
3313 "function f(s) { eval(s); }\n",
3314 )
3315 .unwrap();
3316 fs::write(
3318 root.join("py_no_hits.py"),
3319 "def add(a, b):\n return a + b\n\nif __name__ == '__main__':\n print(add(1, 2))\n",
3320 )
3321 .unwrap();
3322
3323 let files = [
3324 (root.join("py_hits.py"), ApiLanguage::Python, true),
3325 (root.join("rs_hits.rs"), ApiLanguage::Rust, true),
3326 (root.join("go_hits.go"), ApiLanguage::Go, true),
3327 (root.join("js_hits.js"), ApiLanguage::JavaScript, true),
3328 (root.join("py_no_hits.py"), ApiLanguage::Python, false),
3329 ];
3330
3331 let start = Instant::now();
3332 for (path, lang, expect_findings) in files {
3333 let rules = rules_for_language(lang);
3334 let findings = analyze_file(&path, &rules, lang).unwrap();
3335 if expect_findings {
3336 assert!(
3337 !findings.is_empty(),
3338 "expected findings for {:?} (rule keyword present in source)",
3339 path.file_name()
3340 );
3341 } else {
3342 assert!(
3346 findings.is_empty(),
3347 "expected no findings for {:?}, got {:?}",
3348 path.file_name(),
3349 findings.iter().map(|f| f.rule.id.clone()).collect::<Vec<_>>()
3350 );
3351 }
3352 }
3353 let elapsed = start.elapsed();
3354 assert!(
3358 elapsed.as_secs() < 2,
3359 "fastpath-extend-non-vuln-v1: 5-file fixture took {:?}, expected <2s",
3360 elapsed
3361 );
3362 }
3363
3364 #[test]
3369 fn test_extract_literal_from_regex_recovers_useful_needles() {
3370 let cases: &[(&str, &str, &str)] = &[
3373 (r"\bioutil\.ReadFile\s*\(", "ioutil.ReadFile", "x := ioutil.ReadFile(p)"),
3374 (r"\bunserialize\s*\(", "unserialize", "unserialize($x);"),
3375 (r"\beval\s*\(", "eval", "eval(s)"),
3376 (
3377 r"\bRuntime\.getRuntime\(\)\.exec\s*\(",
3378 "Runtime.getRuntime().exec",
3379 "Runtime.getRuntime().exec(c)",
3380 ),
3381 (r"\s==\s|\s!=\s", "", "if (a == b)"),
3383 (r"\b[A-Za-z_][A-Za-z0-9_]*!", "", "value!"),
3385 ];
3386 for (pattern, expected, sample) in cases {
3387 let literal = extract_literal_from_regex(pattern);
3388 assert_eq!(
3389 literal.as_str(),
3390 *expected,
3391 "pattern {:?} should yield literal {:?}",
3392 pattern,
3393 expected
3394 );
3395 if !literal.is_empty() {
3396 assert!(
3397 sample.contains(literal.as_str()),
3398 "literal {:?} from pattern {:?} must be a substring of positive sample {:?}",
3399 literal,
3400 pattern,
3401 sample
3402 );
3403 }
3404 }
3405 }
3406
3407 #[test]
3411 fn test_language_fastpath_needles_cover_all_languages() {
3412 for &lang in all_api_languages() {
3413 let needles = language_fastpath_needles(lang);
3414 assert!(
3415 !needles.is_empty(),
3416 "language {:?} has no fastpath needles",
3417 lang
3418 );
3419 }
3420 }
3421}