1use std::collections::HashMap;
18use std::fs;
19use std::path::{Path, PathBuf};
20
21use anyhow::Result;
22use clap::Args;
23use regex::Regex;
24use tldr_core::walker::walk_project;
25
26use super::error::RemainingError;
27use super::types::{
28 APICheckReport, APICheckSummary, APIRule, MisuseCategory, MisuseFinding, MisuseSeverity,
29};
30
31use crate::output::OutputWriter;
32
33const MAX_DIRECTORY_FILES: u32 = 1000;
39
40const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub(crate) enum ApiLanguage {
45 Python,
46 Rust,
47 Go,
48 Java,
49 JavaScript,
50 TypeScript,
51 C,
52 Cpp,
53 Ruby,
54 Php,
55 Kotlin,
56 Swift,
57 CSharp,
58 Scala,
59 Elixir,
60 Lua,
61 Luau,
62 Ocaml,
63}
64
65#[derive(Clone, Copy)]
66struct RegexRuleSpec {
67 id: &'static str,
68 name: &'static str,
69 category: MisuseCategory,
70 severity: MisuseSeverity,
71 description: &'static str,
72 correct_usage: &'static str,
73 pattern: &'static str,
74 api_call: &'static str,
75 message: &'static str,
76 fix_suggestion: &'static str,
77}
78
79impl RegexRuleSpec {
80 fn rule(self) -> APIRule {
81 APIRule {
82 id: self.id.to_string(),
83 name: self.name.to_string(),
84 category: self.category,
85 severity: self.severity,
86 description: self.description.to_string(),
87 correct_usage: self.correct_usage.to_string(),
88 }
89 }
90}
91
92const GO_RULE_SPECS: &[RegexRuleSpec] = &[
93 RegexRuleSpec {
94 id: "GO001",
95 name: "deprecated-ioutil-readfile",
96 category: MisuseCategory::Resources,
97 severity: MisuseSeverity::Low,
98 description: "ioutil.ReadFile is deprecated and encourages unbounded whole-file reads",
99 correct_usage: "Use os.ReadFile or stream with bufio.Scanner/Reader",
100 pattern: r"\bioutil\.ReadFile\s*\(",
101 api_call: "ioutil.ReadFile",
102 message: "ioutil.ReadFile is deprecated and can load unbounded content into memory",
103 fix_suggestion: "Use os.ReadFile for simple reads or bufio.Reader for bounded streaming",
104 },
105 RegexRuleSpec {
106 id: "GO002",
107 name: "http-get-without-timeout",
108 category: MisuseCategory::Parameters,
109 severity: MisuseSeverity::Medium,
110 description: "http.Get uses the default client and provides no call-specific timeout",
111 correct_usage: "Use an http.Client with Timeout or context-aware requests",
112 pattern: r"\bhttp\.Get\s*\(",
113 api_call: "http.Get",
114 message: "http.Get without an explicit timeout can hang indefinitely",
115 fix_suggestion: "Use an http.Client{Timeout: ...} or NewRequestWithContext",
116 },
117 RegexRuleSpec {
118 id: "GO003",
119 name: "exec-command",
120 category: MisuseCategory::Security,
121 severity: MisuseSeverity::High,
122 description: "exec.Command is risky when arguments or executable names come from input",
123 correct_usage: "Prefer direct library APIs or strictly validate allowed commands",
124 pattern: r"\bexec\.Command\s*\(",
125 api_call: "exec.Command",
126 message: "exec.Command can enable command injection when fed user-controlled values",
127 fix_suggestion: "Validate commands against an allowlist and avoid shell-like execution",
128 },
129 RegexRuleSpec {
130 id: "GO004",
131 name: "template-html-cast",
132 category: MisuseCategory::Security,
133 severity: MisuseSeverity::High,
134 description: "template.HTML bypasses html/template escaping guarantees",
135 correct_usage: "Pass plain strings to templates and let html/template escape them",
136 pattern: r"\btemplate\.HTML\s*\(",
137 api_call: "template.HTML",
138 message: "template.HTML disables escaping and can introduce XSS",
139 fix_suggestion: "Remove the cast and rely on html/template auto-escaping",
140 },
141 RegexRuleSpec {
142 id: "GO005",
143 name: "sql-query-without-context",
144 category: MisuseCategory::CallOrder,
145 severity: MisuseSeverity::Medium,
146 description:
147 "sql.DB.Query lacks cancellation and timeout propagation compared with QueryContext",
148 correct_usage: "Use db.QueryContext(ctx, query, args...)",
149 pattern: r"\bsql\.Query\s*\(",
150 api_call: "sql.Query",
151 message: "sql.Query omits context-driven cancellation and timeout handling",
152 fix_suggestion: "Use QueryContext/ExecContext with a bounded context",
153 },
154];
155
156const JAVA_RULE_SPECS: &[RegexRuleSpec] = &[
157 RegexRuleSpec {
158 id: "JV001",
159 name: "string-comparison-with-double-equals",
160 category: MisuseCategory::CallOrder,
161 severity: MisuseSeverity::Medium,
162 description: "Using == on strings compares references instead of values",
163 correct_usage: "Use value.equals(other) or Objects.equals(a, b)",
164 pattern: r#"(?:".*"|\b\w+\b)\s*==\s*(?:".*"|\b\w+\b)"#,
165 api_call: "==",
166 message: "String comparison with == checks reference identity, not value equality",
167 fix_suggestion: "Use .equals(...) or Objects.equals(...) for string values",
168 },
169 RegexRuleSpec {
170 id: "JV002",
171 name: "runtime-exec",
172 category: MisuseCategory::Security,
173 severity: MisuseSeverity::High,
174 description: "Runtime.exec is dangerous with dynamic input and hard to sandbox correctly",
175 correct_usage: "Use structured APIs or a ProcessBuilder with validated arguments",
176 pattern: r"\bRuntime\.getRuntime\(\)\.exec\s*\(",
177 api_call: "Runtime.exec",
178 message: "Runtime.exec is a common command injection footgun",
179 fix_suggestion: "Prefer library APIs or tightly validated ProcessBuilder arguments",
180 },
181 RegexRuleSpec {
182 id: "JV003",
183 name: "objectinputstream-deserialization",
184 category: MisuseCategory::Security,
185 severity: MisuseSeverity::High,
186 description:
187 "ObjectInputStream on untrusted data can trigger unsafe deserialization gadgets",
188 correct_usage: "Use safer formats like JSON with explicit schemas",
189 pattern: r"\bnew\s+ObjectInputStream\s*\(",
190 api_call: "ObjectInputStream",
191 message: "ObjectInputStream enables unsafe native Java deserialization",
192 fix_suggestion: "Replace native object deserialization with a schema-driven format",
193 },
194 RegexRuleSpec {
195 id: "JV004",
196 name: "create-statement",
197 category: MisuseCategory::Security,
198 severity: MisuseSeverity::Medium,
199 description:
200 "createStatement often leads to string-built SQL instead of prepared statements",
201 correct_usage: "Use prepareStatement with placeholders",
202 pattern: r"\bcreateStatement\s*\(",
203 api_call: "createStatement",
204 message: "createStatement encourages dynamic SQL and weak parameter handling",
205 fix_suggestion: "Use prepareStatement with bound parameters",
206 },
207 RegexRuleSpec {
208 id: "JV005",
209 name: "system-gc-call",
210 category: MisuseCategory::Resources,
211 severity: MisuseSeverity::Low,
212 description: "System.gc() is usually a performance smell and not a reliable memory fix",
213 correct_usage: "Remove manual GC triggers and profile allocations instead",
214 pattern: r"\bSystem\.gc\s*\(",
215 api_call: "System.gc",
216 message: "System.gc() is an unreliable manual GC hint and often harms latency",
217 fix_suggestion: "Remove the call and fix the underlying allocation or lifetime issue",
218 },
219];
220
221const JAVASCRIPT_RULE_SPECS: &[RegexRuleSpec] = &[
222 RegexRuleSpec {
223 id: "JS001",
224 name: "loose-equality",
225 category: MisuseCategory::CallOrder,
226 severity: MisuseSeverity::Medium,
227 description: "Loose equality allows coercions that frequently hide correctness bugs",
228 correct_usage: "Use === / !== except in deliberately reviewed coercion cases",
229 pattern: r"\s==\s|\s!=\s",
230 api_call: "==",
231 message: "Loose equality can coerce values unexpectedly",
232 fix_suggestion: "Use === or !== and handle explicit type conversion",
233 },
234 RegexRuleSpec {
235 id: "JS002",
236 name: "parseint-without-radix",
237 category: MisuseCategory::Parameters,
238 severity: MisuseSeverity::Low,
239 description: "parseInt without a radix is ambiguous and less explicit than required",
240 correct_usage: "Use parseInt(value, 10)",
241 pattern: r"\bparseInt\s*\(\s*[^,\)]+\)",
242 api_call: "parseInt",
243 message: "parseInt called without an explicit radix",
244 fix_suggestion: "Pass a radix explicitly, usually parseInt(value, 10)",
245 },
246 RegexRuleSpec {
247 id: "JS003",
248 name: "json-parse-without-guard",
249 category: MisuseCategory::ErrorHandling,
250 severity: MisuseSeverity::Low,
251 description: "JSON.parse throws on malformed input and should usually be guarded",
252 correct_usage: "Wrap JSON.parse in try/catch when input is not fully trusted",
253 pattern: r"\bJSON\.parse\s*\(",
254 api_call: "JSON.parse",
255 message: "JSON.parse can throw and should be guarded for untrusted input",
256 fix_suggestion: "Use try/catch or validated parsing for untrusted payloads",
257 },
258 RegexRuleSpec {
259 id: "JS004",
260 name: "document-write",
261 category: MisuseCategory::Security,
262 severity: MisuseSeverity::High,
263 description: "document.write is legacy, brittle, and can inject unsanitized HTML",
264 correct_usage: "Use DOM APIs like textContent/appendChild instead",
265 pattern: r"\bdocument\.write(?:ln)?\s*\(",
266 api_call: "document.write",
267 message: "document.write is unsafe and can enable XSS",
268 fix_suggestion: "Use safe DOM APIs instead of writing raw HTML strings",
269 },
270 RegexRuleSpec {
271 id: "JS005",
272 name: "eval-call",
273 category: MisuseCategory::Security,
274 severity: MisuseSeverity::High,
275 description: "eval executes dynamic code and should be avoided",
276 correct_usage: "Use structured data parsing or explicit dispatch tables",
277 pattern: r"\beval\s*\(",
278 api_call: "eval",
279 message: "eval executes dynamic code and creates major security risk",
280 fix_suggestion: "Replace eval with data parsing or explicit function dispatch",
281 },
282];
283
284const TYPESCRIPT_RULE_SPECS: &[RegexRuleSpec] = &[
285 RegexRuleSpec {
286 id: "TS001",
287 name: "loose-equality",
288 category: MisuseCategory::CallOrder,
289 severity: MisuseSeverity::Medium,
290 description: "Loose equality allows coercions that frequently hide correctness bugs",
291 correct_usage: "Use === / !== except in deliberately reviewed coercion cases",
292 pattern: r"\s==\s|\s!=\s",
293 api_call: "==",
294 message: "Loose equality can coerce values unexpectedly",
295 fix_suggestion: "Use === or !== and handle explicit type conversion",
296 },
297 RegexRuleSpec {
298 id: "TS002",
299 name: "parseint-without-radix",
300 category: MisuseCategory::Parameters,
301 severity: MisuseSeverity::Low,
302 description: "parseInt without a radix is ambiguous and less explicit than required",
303 correct_usage: "Use parseInt(value, 10)",
304 pattern: r"\bparseInt\s*\(\s*[^,\)]+\)",
305 api_call: "parseInt",
306 message: "parseInt called without an explicit radix",
307 fix_suggestion: "Pass a radix explicitly, usually parseInt(value, 10)",
308 },
309 RegexRuleSpec {
310 id: "TS003",
311 name: "json-parse-without-guard",
312 category: MisuseCategory::ErrorHandling,
313 severity: MisuseSeverity::Low,
314 description: "JSON.parse throws on malformed input and should usually be guarded",
315 correct_usage: "Wrap JSON.parse in try/catch when input is not fully trusted",
316 pattern: r"\bJSON\.parse\s*\(",
317 api_call: "JSON.parse",
318 message: "JSON.parse can throw and should be guarded for untrusted input",
319 fix_suggestion: "Use try/catch or validated parsing for untrusted payloads",
320 },
321 RegexRuleSpec {
322 id: "TS004",
323 name: "document-write",
324 category: MisuseCategory::Security,
325 severity: MisuseSeverity::High,
326 description: "document.write is legacy, brittle, and can inject unsanitized HTML",
327 correct_usage: "Use DOM APIs like textContent/appendChild instead",
328 pattern: r"\bdocument\.write(?:ln)?\s*\(",
329 api_call: "document.write",
330 message: "document.write is unsafe and can enable XSS",
331 fix_suggestion: "Use safe DOM APIs instead of writing raw HTML strings",
332 },
333 RegexRuleSpec {
334 id: "TS005",
335 name: "eval-call",
336 category: MisuseCategory::Security,
337 severity: MisuseSeverity::High,
338 description: "eval executes dynamic code and should be avoided",
339 correct_usage: "Use structured data parsing or explicit dispatch tables",
340 pattern: r"\beval\s*\(",
341 api_call: "eval",
342 message: "eval executes dynamic code and creates major security risk",
343 fix_suggestion: "Replace eval with data parsing or explicit function dispatch",
344 },
345];
346
347const C_RULE_SPECS: &[RegexRuleSpec] = &[
348 RegexRuleSpec {
349 id: "C001",
350 name: "gets-call",
351 category: MisuseCategory::Security,
352 severity: MisuseSeverity::High,
353 description: "gets cannot bound input and has been removed from the standard library",
354 correct_usage: "Use fgets with an explicit buffer length",
355 pattern: r"\bgets\s*\(",
356 api_call: "gets",
357 message: "gets is inherently unsafe and enables buffer overflows",
358 fix_suggestion: "Use fgets(buffer, size, stdin) or another bounded API",
359 },
360 RegexRuleSpec {
361 id: "C002",
362 name: "strcpy-call",
363 category: MisuseCategory::Security,
364 severity: MisuseSeverity::High,
365 description: "strcpy performs unbounded copies and easily overflows buffers",
366 correct_usage: "Use snprintf, strlcpy, or explicit bounds checks",
367 pattern: r"\bstrcpy\s*\(",
368 api_call: "strcpy",
369 message: "strcpy performs an unbounded copy",
370 fix_suggestion: "Replace strcpy with a bounded copy strategy",
371 },
372 RegexRuleSpec {
373 id: "C003",
374 name: "sprintf-call",
375 category: MisuseCategory::Security,
376 severity: MisuseSeverity::High,
377 description: "sprintf writes formatted data without a size bound",
378 correct_usage: "Use snprintf with the destination buffer size",
379 pattern: r"\bsprintf\s*\(",
380 api_call: "sprintf",
381 message: "sprintf can overflow fixed-size buffers",
382 fix_suggestion: "Use snprintf(buffer, size, ...) instead",
383 },
384 RegexRuleSpec {
385 id: "C004",
386 name: "scanf-string-without-width",
387 category: MisuseCategory::Security,
388 severity: MisuseSeverity::High,
389 description: "scanf with %s and no width limit can overflow the destination buffer",
390 correct_usage: "Provide a width specifier or use fgets",
391 pattern: r#"\bscanf\s*\(\s*"%s"#,
392 api_call: "scanf",
393 message: "scanf(\"%s\") reads unbounded input into a buffer",
394 fix_suggestion: "Add a width limit or use fgets plus parsing",
395 },
396 RegexRuleSpec {
397 id: "C005",
398 name: "system-call",
399 category: MisuseCategory::Security,
400 severity: MisuseSeverity::High,
401 description: "system executes a shell command and is dangerous with dynamic input",
402 correct_usage: "Use execve-family APIs with validated arguments where possible",
403 pattern: r"\bsystem\s*\(",
404 api_call: "system",
405 message: "system executes a shell and is a common command injection vector",
406 fix_suggestion: "Avoid shell execution or tightly validate the command source",
407 },
408];
409
410const CPP_RULE_SPECS: &[RegexRuleSpec] = &[
411 RegexRuleSpec {
412 id: "CPP001",
413 name: "strcpy-call",
414 category: MisuseCategory::Security,
415 severity: MisuseSeverity::High,
416 description: "strcpy performs unbounded copies and easily overflows buffers",
417 correct_usage: "Use std::string, snprintf, or another bounded copy strategy",
418 pattern: r"\bstrcpy\s*\(",
419 api_call: "strcpy",
420 message: "strcpy performs an unbounded copy",
421 fix_suggestion: "Use std::string or a bounded copy API instead",
422 },
423 RegexRuleSpec {
424 id: "CPP002",
425 name: "sprintf-call",
426 category: MisuseCategory::Security,
427 severity: MisuseSeverity::High,
428 description: "sprintf writes formatted data without a size bound",
429 correct_usage: "Use snprintf or std::format into a bounded container",
430 pattern: r"\bsprintf\s*\(",
431 api_call: "sprintf",
432 message: "sprintf can overflow fixed-size buffers",
433 fix_suggestion: "Use snprintf or a safer formatting abstraction",
434 },
435 RegexRuleSpec {
436 id: "CPP003",
437 name: "auto-ptr",
438 category: MisuseCategory::Resources,
439 severity: MisuseSeverity::Medium,
440 description: "std::auto_ptr is obsolete and has broken transfer semantics",
441 correct_usage: "Use std::unique_ptr or std::shared_ptr",
442 pattern: r"\bstd::auto_ptr\s*<",
443 api_call: "std::auto_ptr",
444 message: "std::auto_ptr is obsolete and unsafe by modern ownership standards",
445 fix_suggestion: "Replace std::auto_ptr with std::unique_ptr or std::shared_ptr",
446 },
447 RegexRuleSpec {
448 id: "CPP004",
449 name: "raw-new",
450 category: MisuseCategory::Resources,
451 severity: MisuseSeverity::Medium,
452 description: "Raw new often leads to leaks and exception-safety issues",
453 correct_usage: "Use std::make_unique or stack allocation where possible",
454 pattern: r"\bnew\s+\w",
455 api_call: "new",
456 message: "Raw new makes ownership and exception safety harder to reason about",
457 fix_suggestion: "Use std::make_unique, containers, or stack allocation",
458 },
459 RegexRuleSpec {
460 id: "CPP005",
461 name: "system-call",
462 category: MisuseCategory::Security,
463 severity: MisuseSeverity::High,
464 description: "system executes a shell command and is dangerous with dynamic input",
465 correct_usage: "Use direct process APIs with validated arguments when possible",
466 pattern: r"(?:\bstd::)?system\s*\(",
467 api_call: "system",
468 message: "system executes a shell and is a common command injection vector",
469 fix_suggestion: "Avoid shell execution or tightly validate all command components",
470 },
471];
472
473const RUBY_RULE_SPECS: &[RegexRuleSpec] = &[
474 RegexRuleSpec {
475 id: "RB001",
476 name: "eval-call",
477 category: MisuseCategory::Security,
478 severity: MisuseSeverity::High,
479 description: "eval executes dynamic Ruby code and should be avoided",
480 correct_usage: "Use explicit dispatch or data parsing instead of dynamic code execution",
481 pattern: r"\beval\s*\(",
482 api_call: "eval",
483 message: "eval executes dynamic code and creates major security risk",
484 fix_suggestion: "Replace eval with explicit dispatch or structured parsing",
485 },
486 RegexRuleSpec {
487 id: "RB002",
488 name: "dynamic-send",
489 category: MisuseCategory::Security,
490 severity: MisuseSeverity::Medium,
491 description: "send can invoke arbitrary methods when fed untrusted method names",
492 correct_usage: "Use public_send on a strict allowlist of method names",
493 pattern: r"\.send\s*\(",
494 api_call: "send",
495 message: "send can dispatch to unsafe or unexpected methods",
496 fix_suggestion: "Use public_send with a reviewed allowlist",
497 },
498 RegexRuleSpec {
499 id: "RB003",
500 name: "system-call",
501 category: MisuseCategory::Security,
502 severity: MisuseSeverity::High,
503 description: "system executes a shell command and is dangerous with interpolated input",
504 correct_usage: "Use array-form process APIs with validated arguments",
505 pattern: r"\bsystem\s*\(",
506 api_call: "system",
507 message: "system is a common command injection footgun",
508 fix_suggestion: "Avoid shell execution or pass validated argv-style arguments",
509 },
510 RegexRuleSpec {
511 id: "RB004",
512 name: "yaml-load",
513 category: MisuseCategory::Security,
514 severity: MisuseSeverity::High,
515 description: "YAML.load can instantiate arbitrary objects from untrusted input",
516 correct_usage: "Use YAML.safe_load with permitted classes",
517 pattern: r"\bYAML\.load\s*\(",
518 api_call: "YAML.load",
519 message: "YAML.load can deserialize unsafe objects",
520 fix_suggestion: "Use YAML.safe_load and restrict allowed classes",
521 },
522 RegexRuleSpec {
523 id: "RB005",
524 name: "marshal-load",
525 category: MisuseCategory::Security,
526 severity: MisuseSeverity::High,
527 description: "Marshal.load on untrusted data is unsafe deserialization",
528 correct_usage: "Use JSON or another safe, schema-checked format",
529 pattern: r"\bMarshal\.load\s*\(",
530 api_call: "Marshal.load",
531 message: "Marshal.load performs unsafe native deserialization",
532 fix_suggestion: "Replace Marshal.load with a safer serialization format",
533 },
534];
535
536const PHP_RULE_SPECS: &[RegexRuleSpec] = &[
537 RegexRuleSpec {
538 id: "PH001",
539 name: "deprecated-mysql-functions",
540 category: MisuseCategory::Security,
541 severity: MisuseSeverity::High,
542 description: "mysql_* APIs are removed and encourage unsafe query construction",
543 correct_usage: "Use PDO or mysqli with prepared statements",
544 pattern: r"\bmysql_[a-z_]+\s*\(",
545 api_call: "mysql_*",
546 message: "mysql_* functions are removed and unsafe by modern standards",
547 fix_suggestion: "Migrate to PDO or mysqli prepared statements",
548 },
549 RegexRuleSpec {
550 id: "PH002",
551 name: "extract-call",
552 category: MisuseCategory::Security,
553 severity: MisuseSeverity::Medium,
554 description: "extract pollutes local scope and can overwrite important variables",
555 correct_usage: "Read array keys explicitly instead of splatting them into scope",
556 pattern: r"\bextract\s*\(",
557 api_call: "extract",
558 message: "extract can overwrite local variables and hide data flow",
559 fix_suggestion: "Assign required keys explicitly instead of using extract",
560 },
561 RegexRuleSpec {
562 id: "PH003",
563 name: "eval-call",
564 category: MisuseCategory::Security,
565 severity: MisuseSeverity::High,
566 description: "eval executes dynamic PHP code and should be avoided",
567 correct_usage: "Use explicit dispatch or data parsing instead of dynamic code execution",
568 pattern: r"\beval\s*\(",
569 api_call: "eval",
570 message: "eval executes dynamic code and creates major security risk",
571 fix_suggestion: "Replace eval with explicit dispatch or structured parsing",
572 },
573 RegexRuleSpec {
574 id: "PH004",
575 name: "variable-variables",
576 category: MisuseCategory::Security,
577 severity: MisuseSeverity::Medium,
578 description: "Variable variables make scope mutation hard to reason about",
579 correct_usage: "Use associative arrays or explicit variables instead",
580 pattern: r"\$\$[A-Za-z_]",
581 api_call: "$$",
582 message: "Variable variables obscure data flow and can enable unsafe access patterns",
583 fix_suggestion: "Use an array/map or explicit variable names instead",
584 },
585 RegexRuleSpec {
586 id: "PH005",
587 name: "unserialize-call",
588 category: MisuseCategory::Security,
589 severity: MisuseSeverity::High,
590 description: "unserialize on untrusted data can trigger object injection chains",
591 correct_usage: "Use json_decode or a safer schema-checked format",
592 pattern: r"\bunserialize\s*\(",
593 api_call: "unserialize",
594 message: "unserialize enables unsafe object deserialization",
595 fix_suggestion: "Replace unserialize with json_decode or a safe serializer",
596 },
597];
598
599const KOTLIN_RULE_SPECS: &[RegexRuleSpec] = &[
600 RegexRuleSpec {
601 id: "KT001",
602 name: "force-unwrapped-null",
603 category: MisuseCategory::ErrorHandling,
604 severity: MisuseSeverity::Medium,
605 description: "!! converts nullable values into runtime crashes",
606 correct_usage: "Use safe calls, let, requireNotNull, or explicit branching",
607 pattern: r"!!",
608 api_call: "!!",
609 message: "!! will throw NullPointerException on null values",
610 fix_suggestion: "Use safe calls or explicit null handling instead of !!",
611 },
612 RegexRuleSpec {
613 id: "KT002",
614 name: "lateinit-var",
615 category: MisuseCategory::ErrorHandling,
616 severity: MisuseSeverity::Low,
617 description: "lateinit shifts initialization failures to runtime",
618 correct_usage: "Prefer constructor injection or nullable/state wrappers",
619 pattern: r"\blateinit\s+var\b",
620 api_call: "lateinit",
621 message: "lateinit can fail at runtime if the property is read before initialization",
622 fix_suggestion: "Prefer constructor injection or explicit nullable state",
623 },
624 RegexRuleSpec {
625 id: "KT003",
626 name: "globalscope-launch",
627 category: MisuseCategory::Concurrency,
628 severity: MisuseSeverity::Medium,
629 description: "GlobalScope.launch escapes structured concurrency and leaks work",
630 correct_usage: "Launch from a lifecycle-bound CoroutineScope",
631 pattern: r"\bGlobalScope\.launch\s*\(",
632 api_call: "GlobalScope.launch",
633 message: "GlobalScope.launch detaches work from structured concurrency",
634 fix_suggestion: "Use a lifecycle-bound CoroutineScope instead",
635 },
636 RegexRuleSpec {
637 id: "KT004",
638 name: "runtime-exec",
639 category: MisuseCategory::Security,
640 severity: MisuseSeverity::High,
641 description: "Runtime.exec is dangerous with dynamic input and hard to sandbox correctly",
642 correct_usage: "Use structured APIs or strictly validated ProcessBuilder arguments",
643 pattern: r"\bRuntime\.getRuntime\(\)\.exec\s*\(",
644 api_call: "Runtime.exec",
645 message: "Runtime.exec is a common command injection footgun",
646 fix_suggestion: "Prefer library APIs or tightly validated ProcessBuilder arguments",
647 },
648 RegexRuleSpec {
649 id: "KT005",
650 name: "thread-sleep",
651 category: MisuseCategory::Concurrency,
652 severity: MisuseSeverity::Low,
653 description:
654 "Thread.sleep blocks threads directly and is usually wrong in coroutine-based code",
655 correct_usage: "Use delay(...) in coroutines or higher-level scheduling",
656 pattern: r"\bThread\.sleep\s*\(",
657 api_call: "Thread.sleep",
658 message: "Thread.sleep blocks the current thread directly",
659 fix_suggestion: "Use delay(...) or a proper scheduler instead",
660 },
661];
662
663const SWIFT_RULE_SPECS: &[RegexRuleSpec] = &[
664 RegexRuleSpec {
665 id: "SW001",
666 name: "forced-cast",
667 category: MisuseCategory::ErrorHandling,
668 severity: MisuseSeverity::Medium,
669 description: "as! crashes at runtime when the cast fails",
670 correct_usage: "Use as? with conditional handling",
671 pattern: r"\bas!\b",
672 api_call: "as!",
673 message: "Forced casts crash when the runtime type is different",
674 fix_suggestion: "Use as? and handle the nil case explicitly",
675 },
676 RegexRuleSpec {
677 id: "SW002",
678 name: "forced-try",
679 category: MisuseCategory::ErrorHandling,
680 severity: MisuseSeverity::Medium,
681 description: "try! crashes when the call throws",
682 correct_usage: "Use do/catch or try? with explicit fallback",
683 pattern: r"\btry!\b",
684 api_call: "try!",
685 message: "try! crashes the process on thrown errors",
686 fix_suggestion: "Use do/catch or try? and handle failure explicitly",
687 },
688 RegexRuleSpec {
689 id: "SW003",
690 name: "force-unwrap",
691 category: MisuseCategory::ErrorHandling,
692 severity: MisuseSeverity::Medium,
693 description: "Force unwrapping optionals crashes at runtime on nil",
694 correct_usage: "Use if let, guard let, or nil-coalescing",
695 pattern: r"\b[A-Za-z_][A-Za-z0-9_]*!",
696 api_call: "!",
697 message: "Force unwraps crash when the optional is nil",
698 fix_suggestion: "Use optional binding or nil-coalescing instead of force unwraps",
699 },
700 RegexRuleSpec {
701 id: "SW004",
702 name: "nskeyedunarchiver",
703 category: MisuseCategory::Security,
704 severity: MisuseSeverity::High,
705 description: "Legacy NSKeyedUnarchiver APIs on untrusted data are unsafe",
706 correct_usage: "Use secure decoding APIs with requiresSecureCoding",
707 pattern: r"\bNSKeyedUnarchiver\.unarchiveObject",
708 api_call: "NSKeyedUnarchiver",
709 message: "Legacy unarchiving can deserialize unexpected object graphs",
710 fix_suggestion: "Use secure coding APIs and schema-checked decoding",
711 },
712 RegexRuleSpec {
713 id: "SW005",
714 name: "fatalerror-call",
715 category: MisuseCategory::ErrorHandling,
716 severity: MisuseSeverity::Low,
717 description:
718 "fatalError terminates the process and is risky outside clearly impossible states",
719 correct_usage: "Return/throw recoverable errors where possible",
720 pattern: r"\bfatalError\s*\(",
721 api_call: "fatalError",
722 message: "fatalError terminates the process immediately",
723 fix_suggestion: "Use recoverable error handling unless the state is truly unreachable",
724 },
725];
726
727const CSHARP_RULE_SPECS: &[RegexRuleSpec] = &[
728 RegexRuleSpec {
729 id: "CS001",
730 name: "binaryformatter",
731 category: MisuseCategory::Security,
732 severity: MisuseSeverity::High,
733 description: "BinaryFormatter is insecure and obsolete for untrusted data",
734 correct_usage: "Use System.Text.Json or another safe serializer",
735 pattern: r"\bBinaryFormatter\b",
736 api_call: "BinaryFormatter",
737 message: "BinaryFormatter is insecure and should not be used",
738 fix_suggestion: "Use System.Text.Json or another safe serializer",
739 },
740 RegexRuleSpec {
741 id: "CS002",
742 name: "gc-collect",
743 category: MisuseCategory::Resources,
744 severity: MisuseSeverity::Low,
745 description: "GC.Collect is rarely the right fix and often harms latency",
746 correct_usage: "Remove manual GC triggers and profile the real allocation issue",
747 pattern: r"\bGC\.Collect\s*\(",
748 api_call: "GC.Collect",
749 message: "GC.Collect is an unreliable manual GC hint and often harms performance",
750 fix_suggestion: "Remove the call and fix the underlying allocation issue",
751 },
752 RegexRuleSpec {
753 id: "CS003",
754 name: "task-result",
755 category: MisuseCategory::Concurrency,
756 severity: MisuseSeverity::Medium,
757 description: "Task.Result blocks synchronously and can deadlock async flows",
758 correct_usage: "Use await instead of blocking on Task.Result",
759 pattern: r"\.Result\b",
760 api_call: "Task.Result",
761 message: "Task.Result blocks synchronously and can deadlock async contexts",
762 fix_suggestion: "Use await and keep the async chain asynchronous",
763 },
764 RegexRuleSpec {
765 id: "CS004",
766 name: "task-wait",
767 category: MisuseCategory::Concurrency,
768 severity: MisuseSeverity::Medium,
769 description: "Task.Wait blocks synchronously and can deadlock async flows",
770 correct_usage: "Use await or WhenAll/WhenAny instead of blocking waits",
771 pattern: r"\.Wait\s*\(",
772 api_call: "Task.Wait",
773 message: "Task.Wait blocks synchronously and can deadlock async contexts",
774 fix_suggestion: "Use await or asynchronous coordination primitives instead",
775 },
776 RegexRuleSpec {
777 id: "CS005",
778 name: "process-start",
779 category: MisuseCategory::Security,
780 severity: MisuseSeverity::High,
781 description: "Process.Start is dangerous with untrusted paths or arguments",
782 correct_usage: "Use strict allowlists and avoid shell execution semantics",
783 pattern: r"\bProcess\.Start\s*\(",
784 api_call: "Process.Start",
785 message: "Process.Start can enable command injection with untrusted inputs",
786 fix_suggestion: "Validate executable and arguments against a strict allowlist",
787 },
788];
789
790const SCALA_RULE_SPECS: &[RegexRuleSpec] = &[
791 RegexRuleSpec {
792 id: "SC001",
793 name: "null-usage",
794 category: MisuseCategory::ErrorHandling,
795 severity: MisuseSeverity::Low,
796 description: "null bypasses Scala's stronger option-based absence modeling",
797 correct_usage: "Use Option instead of null",
798 pattern: r"\bnull\b",
799 api_call: "null",
800 message: "null reintroduces runtime absence bugs into Scala code",
801 fix_suggestion: "Use Option and explicit pattern matching instead",
802 },
803 RegexRuleSpec {
804 id: "SC002",
805 name: "asinstanceof-cast",
806 category: MisuseCategory::ErrorHandling,
807 severity: MisuseSeverity::Medium,
808 description: "asInstanceOf crashes at runtime when the type assumption is wrong",
809 correct_usage: "Use pattern matching or TypeTag/ClassTag-aware APIs",
810 pattern: r"\basInstanceOf\[",
811 api_call: "asInstanceOf",
812 message: "asInstanceOf creates unchecked runtime casts",
813 fix_suggestion: "Use pattern matching or safer typed abstractions",
814 },
815 RegexRuleSpec {
816 id: "SC003",
817 name: "await-result",
818 category: MisuseCategory::Concurrency,
819 severity: MisuseSeverity::Medium,
820 description: "Await.result blocks threads and can collapse asynchronous throughput",
821 correct_usage: "Compose futures asynchronously instead of blocking",
822 pattern: r"\bAwait\.result\s*\(",
823 api_call: "Await.result",
824 message: "Await.result blocks threads and can create deadlocks or latency spikes",
825 fix_suggestion: "Use map/flatMap/for-comprehensions instead of blocking",
826 },
827 RegexRuleSpec {
828 id: "SC004",
829 name: "mutable-collection",
830 category: MisuseCategory::Concurrency,
831 severity: MisuseSeverity::Low,
832 description: "scala.collection.mutable structures are harder to reason about under concurrency",
833 correct_usage: "Prefer immutable collections unless mutation is intentionally scoped",
834 pattern: r"\bscala\.collection\.mutable\.",
835 api_call: "scala.collection.mutable",
836 message: "Mutable collections can hide shared-state bugs",
837 fix_suggestion: "Prefer immutable collections or encapsulate mutation carefully",
838 },
839 RegexRuleSpec {
840 id: "SC005",
841 name: "sys-process",
842 category: MisuseCategory::Security,
843 severity: MisuseSeverity::High,
844 description: "sys.process.Process executes external commands and is dangerous with input-derived values",
845 correct_usage: "Use library APIs or validate commands and arguments against an allowlist",
846 pattern: r"\bsys\.process\.Process\s*\(",
847 api_call: "sys.process.Process",
848 message: "sys.process.Process can enable command injection with untrusted input",
849 fix_suggestion: "Avoid shell-style execution or strictly validate all command parts",
850 },
851];
852
853const ELIXIR_RULE_SPECS: &[RegexRuleSpec] = &[
854 RegexRuleSpec {
855 id: "EX001",
856 name: "string-to-atom",
857 category: MisuseCategory::Security,
858 severity: MisuseSeverity::High,
859 description: "String.to_atom on untrusted input can exhaust the VM atom table",
860 correct_usage: "Use String.to_existing_atom only for reviewed values or keep strings",
861 pattern: r"\bString\.to_atom\s*\(",
862 api_call: "String.to_atom",
863 message: "String.to_atom can permanently grow the atom table from user input",
864 fix_suggestion: "Keep values as strings or use a reviewed to_existing_atom path",
865 },
866 RegexRuleSpec {
867 id: "EX002",
868 name: "code-eval-string",
869 category: MisuseCategory::Security,
870 severity: MisuseSeverity::High,
871 description: "Code.eval_string executes dynamic Elixir code and should be avoided",
872 correct_usage: "Use explicit dispatch or data parsing instead of dynamic evaluation",
873 pattern: r"\bCode\.eval_string\s*\(",
874 api_call: "Code.eval_string",
875 message: "Code.eval_string executes dynamic code and is a major security risk",
876 fix_suggestion: "Replace dynamic evaluation with explicit dispatch or parsing",
877 },
878 RegexRuleSpec {
879 id: "EX003",
880 name: "binary-to-term",
881 category: MisuseCategory::Security,
882 severity: MisuseSeverity::High,
883 description: ":erlang.binary_to_term on untrusted data is unsafe deserialization",
884 correct_usage: "Use safe formats like JSON or term_to_binary only for trusted data",
885 pattern: r":erlang\.binary_to_term\s*\(",
886 api_call: ":erlang.binary_to_term",
887 message: ":erlang.binary_to_term can deserialize unsafe terms from untrusted input",
888 fix_suggestion: "Use a safer serialization format for external input",
889 },
890 RegexRuleSpec {
891 id: "EX004",
892 name: "file-read-bang",
893 category: MisuseCategory::ErrorHandling,
894 severity: MisuseSeverity::Low,
895 description: "Bang file APIs raise instead of returning tagged tuples",
896 correct_usage: "Prefer File.read/1 with explicit {:ok, data} / {:error, reason} handling",
897 pattern: r"\bFile\.read!\s*\(",
898 api_call: "File.read!",
899 message: "File.read! raises on failure instead of returning a recoverable error",
900 fix_suggestion: "Use File.read/1 and handle the returned tuple explicitly",
901 },
902 RegexRuleSpec {
903 id: "EX005",
904 name: "task-await-infinity",
905 category: MisuseCategory::Concurrency,
906 severity: MisuseSeverity::Medium,
907 description: "Task.await with :infinity can stall callers indefinitely",
908 correct_usage: "Use bounded timeouts and supervised retry/cancellation behavior",
909 pattern: r"\bTask\.await\s*\([^,]+,\s*:infinity\s*\)",
910 api_call: "Task.await",
911 message: "Task.await(..., :infinity) can block forever",
912 fix_suggestion: "Use a bounded timeout and explicit failure handling",
913 },
914];
915
916const LUA_RULE_SPECS: &[RegexRuleSpec] = &[
917 RegexRuleSpec {
918 id: "LU001",
919 name: "implicit-global",
920 category: MisuseCategory::CallOrder,
921 severity: MisuseSeverity::Low,
922 description: "Assigning without local leaks mutable globals and creates hidden coupling",
923 correct_usage: "Declare locals explicitly with local name = ...",
924 pattern: r"^[A-Za-z_][A-Za-z0-9_]*\s*=",
925 api_call: "global assignment",
926 message: "Implicit global assignment leaks state outside local scope",
927 fix_suggestion: "Prefix the binding with local to keep scope explicit",
928 },
929 RegexRuleSpec {
930 id: "LU002",
931 name: "dynamic-load",
932 category: MisuseCategory::Security,
933 severity: MisuseSeverity::High,
934 description: "load/loadstring execute dynamic Lua code and should be avoided",
935 correct_usage: "Use structured parsing or explicit dispatch instead of dynamic evaluation",
936 pattern: r"\b(?:loadstring|load)\s*\(",
937 api_call: "load",
938 message: "Dynamic code loading executes attacker-controlled Lua if fed untrusted input",
939 fix_suggestion: "Replace dynamic evaluation with explicit dispatch or parsing",
940 },
941 RegexRuleSpec {
942 id: "LU003",
943 name: "os-execute",
944 category: MisuseCategory::Security,
945 severity: MisuseSeverity::High,
946 description: "os.execute shells out and is dangerous with dynamic input",
947 correct_usage: "Avoid shell execution or validate every command component",
948 pattern: r"\bos\.execute\s*\(",
949 api_call: "os.execute",
950 message: "os.execute can enable command injection with untrusted input",
951 fix_suggestion: "Avoid shelling out or strictly validate the command source",
952 },
953 RegexRuleSpec {
954 id: "LU004",
955 name: "io-popen",
956 category: MisuseCategory::Security,
957 severity: MisuseSeverity::High,
958 description: "io.popen launches shell commands and should be treated as high risk",
959 correct_usage: "Use safer process APIs or validate all command components",
960 pattern: r"\bio\.popen\s*\(",
961 api_call: "io.popen",
962 message: "io.popen can enable command injection with untrusted input",
963 fix_suggestion: "Avoid shell execution or validate every command component",
964 },
965 RegexRuleSpec {
966 id: "LU005",
967 name: "dofile-loadfile",
968 category: MisuseCategory::Security,
969 severity: MisuseSeverity::Medium,
970 description:
971 "dofile/loadfile execute external files and are risky with user-controlled paths",
972 correct_usage: "Validate file origins strictly before executing them",
973 pattern: r"\b(?:dofile|loadfile)\s*\(",
974 api_call: "dofile",
975 message: "Executing external files is dangerous when the path is not fully trusted",
976 fix_suggestion: "Avoid dynamic file execution or tightly validate trusted origins",
977 },
978];
979
980const OCAML_RULE_SPECS: &[RegexRuleSpec] = &[
981 RegexRuleSpec {
982 id: "OC001",
983 name: "marshal-from-string",
984 category: MisuseCategory::Security,
985 severity: MisuseSeverity::High,
986 description: "Marshal.from_string on untrusted data is unsafe native deserialization",
987 correct_usage: "Use a safe, schema-checked serialization format",
988 pattern: r"\bMarshal\.from_string\b",
989 api_call: "Marshal.from_string",
990 message: "Marshal.from_string can deserialize unsafe values from untrusted input",
991 fix_suggestion: "Use a safer serialization format for external input",
992 },
993 RegexRuleSpec {
994 id: "OC002",
995 name: "marshal-from-channel",
996 category: MisuseCategory::Security,
997 severity: MisuseSeverity::High,
998 description: "Marshal.from_channel on untrusted data is unsafe native deserialization",
999 correct_usage: "Use a safe, schema-checked serialization format",
1000 pattern: r"\bMarshal\.from_channel\b",
1001 api_call: "Marshal.from_channel",
1002 message: "Marshal.from_channel can deserialize unsafe values from untrusted input",
1003 fix_suggestion: "Use a safer serialization format for external input",
1004 },
1005 RegexRuleSpec {
1006 id: "OC003",
1007 name: "sys-command",
1008 category: MisuseCategory::Security,
1009 severity: MisuseSeverity::High,
1010 description: "Sys.command executes a shell command and is dangerous with dynamic input",
1011 correct_usage: "Prefer direct library APIs or validate allowed commands strictly",
1012 pattern: r"\bSys\.command\b",
1013 api_call: "Sys.command",
1014 message: "Sys.command can enable command injection with untrusted input",
1015 fix_suggestion: "Avoid shell execution or tightly validate the command source",
1016 },
1017 RegexRuleSpec {
1018 id: "OC004",
1019 name: "obj-magic",
1020 category: MisuseCategory::ErrorHandling,
1021 severity: MisuseSeverity::High,
1022 description: "Obj.magic bypasses the type system and can produce memory-unsound behavior",
1023 correct_usage: "Use typed abstractions or explicit variant handling",
1024 pattern: r"\bObj\.magic\b",
1025 api_call: "Obj.magic",
1026 message: "Obj.magic bypasses type safety and can create undefined behavior",
1027 fix_suggestion: "Refactor to a typed abstraction instead of coercing with Obj.magic",
1028 },
1029 RegexRuleSpec {
1030 id: "OC005",
1031 name: "open-in-out",
1032 category: MisuseCategory::Resources,
1033 severity: MisuseSeverity::Low,
1034 description: "open_in/open_out require explicit close calls and are easy to leak",
1035 correct_usage: "Use In_channel.with_open_* or Out_channel.with_open_* helpers",
1036 pattern: r"\b(?:open_in|open_out)\b",
1037 api_call: "open_in",
1038 message: "open_in/open_out require explicit close handling and are easy to leak",
1039 fix_suggestion: "Use with_open_* helpers to scope the channel lifetime",
1040 },
1041];
1042
1043const ALL_API_LANGUAGES: &[ApiLanguage] = &[
1044 ApiLanguage::Python,
1045 ApiLanguage::Rust,
1046 ApiLanguage::Go,
1047 ApiLanguage::Java,
1048 ApiLanguage::JavaScript,
1049 ApiLanguage::TypeScript,
1050 ApiLanguage::C,
1051 ApiLanguage::Cpp,
1052 ApiLanguage::Ruby,
1053 ApiLanguage::Php,
1054 ApiLanguage::Kotlin,
1055 ApiLanguage::Swift,
1056 ApiLanguage::CSharp,
1057 ApiLanguage::Scala,
1058 ApiLanguage::Elixir,
1059 ApiLanguage::Lua,
1060 ApiLanguage::Luau,
1061 ApiLanguage::Ocaml,
1062];
1063
1064fn python_rules() -> Vec<APIRule> {
1070 vec![
1071 APIRule {
1072 id: "PY001".to_string(),
1073 name: "missing-timeout".to_string(),
1074 category: MisuseCategory::Parameters,
1075 severity: MisuseSeverity::High,
1076 description: "requests.get/post/etc without timeout parameter can hang indefinitely"
1077 .to_string(),
1078 correct_usage: "requests.get(url, timeout=30)".to_string(),
1079 },
1080 APIRule {
1081 id: "PY002".to_string(),
1082 name: "bare-except".to_string(),
1083 category: MisuseCategory::ErrorHandling,
1084 severity: MisuseSeverity::Medium,
1085 description: "Bare except clause catches all exceptions including KeyboardInterrupt"
1086 .to_string(),
1087 correct_usage: "except Exception as e:".to_string(),
1088 },
1089 APIRule {
1090 id: "PY003".to_string(),
1091 name: "weak-hash-md5".to_string(),
1092 category: MisuseCategory::Crypto,
1093 severity: MisuseSeverity::High,
1094 description: "MD5 is cryptographically broken, don't use for security purposes"
1095 .to_string(),
1096 correct_usage: "hashlib.sha256() or bcrypt for passwords".to_string(),
1097 },
1098 APIRule {
1099 id: "PY004".to_string(),
1100 name: "weak-hash-sha1".to_string(),
1101 category: MisuseCategory::Crypto,
1102 severity: MisuseSeverity::High,
1103 description: "SHA1 is cryptographically weak, don't use for security purposes"
1104 .to_string(),
1105 correct_usage: "hashlib.sha256() or stronger".to_string(),
1106 },
1107 APIRule {
1108 id: "PY005".to_string(),
1109 name: "unclosed-file".to_string(),
1110 category: MisuseCategory::Resources,
1111 severity: MisuseSeverity::Medium,
1112 description: "File opened without context manager may not be properly closed"
1113 .to_string(),
1114 correct_usage: "with open(path) as f:".to_string(),
1115 },
1116 APIRule {
1117 id: "PY006".to_string(),
1118 name: "insecure-random".to_string(),
1119 category: MisuseCategory::Security,
1120 severity: MisuseSeverity::High,
1121 description: "random module is not cryptographically secure".to_string(),
1122 correct_usage: "secrets.token_bytes() or secrets.token_hex()".to_string(),
1123 },
1124 ]
1125}
1126
1127fn rust_rules() -> Vec<APIRule> {
1129 vec![
1130 APIRule {
1131 id: "RS001".to_string(),
1132 name: "mutex-lock-unwrap".to_string(),
1133 category: MisuseCategory::Concurrency,
1134 severity: MisuseSeverity::Medium,
1135 description: "Mutex::lock().unwrap() can panic and amplify lock contention (CWE-833)"
1136 .to_string(),
1137 correct_usage:
1138 "Prefer try_lock()/error handling or explicit poison recovery instead of unwrap()"
1139 .to_string(),
1140 },
1141 APIRule {
1142 id: "RS002".to_string(),
1143 name: "file-open-without-context".to_string(),
1144 category: MisuseCategory::ErrorHandling,
1145 severity: MisuseSeverity::Low,
1146 description:
1147 "File::open without contextual error mapping makes failures hard to triage"
1148 .to_string(),
1149 correct_usage:
1150 "File::open(path).with_context(|| format!(\"opening {}\", path.display()))?"
1151 .to_string(),
1152 },
1153 APIRule {
1154 id: "RS003".to_string(),
1155 name: "unbounded-with-capacity".to_string(),
1156 category: MisuseCategory::Resources,
1157 severity: MisuseSeverity::High,
1158 description:
1159 "Vec::with_capacity fed from unbounded input can cause memory exhaustion (CWE-770)"
1160 .to_string(),
1161 correct_usage: "Clamp capacity input before allocation (e.g. min(user_len, MAX))"
1162 .to_string(),
1163 },
1164 APIRule {
1165 id: "RS004".to_string(),
1166 name: "detached-tokio-spawn".to_string(),
1167 category: MisuseCategory::Concurrency,
1168 severity: MisuseSeverity::Medium,
1169 description: "tokio::spawn without retaining JoinHandle risks silent task failures"
1170 .to_string(),
1171 correct_usage: "Store JoinHandle values and await/join them".to_string(),
1172 },
1173 APIRule {
1174 id: "RS005".to_string(),
1175 name: "hashmap-order-dependence".to_string(),
1176 category: MisuseCategory::CallOrder,
1177 severity: MisuseSeverity::Low,
1178 description:
1179 "HashMap iteration order is non-deterministic; relying on it can break logic"
1180 .to_string(),
1181 correct_usage:
1182 "Collect keys and sort them, or use BTreeMap/IndexMap when stable order is required"
1183 .to_string(),
1184 },
1185 APIRule {
1186 id: "RS006".to_string(),
1187 name: "clone-in-hot-loop".to_string(),
1188 category: MisuseCategory::Resources,
1189 severity: MisuseSeverity::Low,
1190 description: "clone() inside loop bodies can create avoidable allocation pressure"
1191 .to_string(),
1192 correct_usage: "Borrow or move values instead of cloning in tight loops".to_string(),
1193 },
1194 ]
1195}
1196
1197fn regex_rule_specs_for_language(language: ApiLanguage) -> &'static [RegexRuleSpec] {
1198 match language {
1199 ApiLanguage::Python | ApiLanguage::Rust => &[],
1200 ApiLanguage::Go => GO_RULE_SPECS,
1201 ApiLanguage::Java => JAVA_RULE_SPECS,
1202 ApiLanguage::JavaScript => JAVASCRIPT_RULE_SPECS,
1203 ApiLanguage::TypeScript => TYPESCRIPT_RULE_SPECS,
1204 ApiLanguage::C => C_RULE_SPECS,
1205 ApiLanguage::Cpp => CPP_RULE_SPECS,
1206 ApiLanguage::Ruby => RUBY_RULE_SPECS,
1207 ApiLanguage::Php => PHP_RULE_SPECS,
1208 ApiLanguage::Kotlin => KOTLIN_RULE_SPECS,
1209 ApiLanguage::Swift => SWIFT_RULE_SPECS,
1210 ApiLanguage::CSharp => CSHARP_RULE_SPECS,
1211 ApiLanguage::Scala => SCALA_RULE_SPECS,
1212 ApiLanguage::Elixir => ELIXIR_RULE_SPECS,
1213 ApiLanguage::Lua | ApiLanguage::Luau => LUA_RULE_SPECS,
1214 ApiLanguage::Ocaml => OCAML_RULE_SPECS,
1215 }
1216}
1217
1218fn all_api_languages() -> &'static [ApiLanguage] {
1219 ALL_API_LANGUAGES
1220}
1221
1222#[derive(Debug, Args)]
1239pub struct ApiCheckArgs {
1240 #[arg(value_name = "path")]
1242 pub path: PathBuf,
1243
1244 #[arg(long, value_delimiter = ',')]
1246 pub category: Option<Vec<MisuseCategory>>,
1247
1248 #[arg(long, value_delimiter = ',')]
1250 pub severity: Option<Vec<MisuseSeverity>>,
1251
1252 #[arg(long, short = 'O')]
1254 pub output: Option<PathBuf>,
1255}
1256
1257impl ApiCheckArgs {
1258 pub fn run(&self, format: crate::output::OutputFormat, quiet: bool) -> Result<()> {
1260 let writer = OutputWriter::new(format, quiet);
1261
1262 writer.progress(&format!(
1263 "Checking {} for API misuse patterns...",
1264 self.path.display()
1265 ));
1266
1267 if !self.path.exists() {
1269 return Err(RemainingError::file_not_found(&self.path).into());
1270 }
1271
1272 let all_rules_count = all_api_languages()
1273 .iter()
1274 .map(|language| rules_for_language(*language).len() as u32)
1275 .sum();
1276
1277 let files = collect_files(&self.path)?;
1279 writer.progress(&format!("Found {} files to analyze", files.len()));
1280
1281 let mut all_findings: Vec<MisuseFinding> = Vec::new();
1283 let mut files_scanned = 0u32;
1284
1285 for file_path in &files {
1286 let Some(language) = detect_language(file_path) else {
1287 continue;
1288 };
1289 let rules = rules_for_language(language);
1290 match analyze_file(file_path, &rules, language) {
1291 Ok(findings) => {
1292 all_findings.extend(findings);
1293 files_scanned += 1;
1294 }
1295 Err(e) => {
1296 writer.progress(&format!(
1297 "Warning: Failed to analyze {}: {}",
1298 file_path.display(),
1299 e
1300 ));
1301 }
1302 }
1303 }
1304
1305 let filtered_findings = filter_findings(
1307 all_findings,
1308 self.category.as_deref(),
1309 self.severity.as_deref(),
1310 );
1311
1312 let summary = build_summary(&filtered_findings, files_scanned);
1314
1315 let report = APICheckReport {
1317 findings: filtered_findings,
1318 summary,
1319 rules_applied: all_rules_count,
1320 };
1321
1322 if let Some(ref output_path) = self.output {
1324 if writer.is_text() {
1325 let text = format_api_check_text(&report);
1326 fs::write(output_path, text)?;
1327 } else {
1328 let json = serde_json::to_string_pretty(&report)?;
1329 fs::write(output_path, json)?;
1330 }
1331 } else if writer.is_text() {
1332 let text = format_api_check_text(&report);
1333 writer.write_text(&text)?;
1334 } else {
1335 writer.write(&report)?;
1336 }
1337
1338 Ok(())
1339 }
1340}
1341
1342fn collect_files(path: &Path) -> Result<Vec<PathBuf>> {
1348 let mut files = Vec::new();
1349
1350 if path.is_file() {
1351 if is_supported_file(path) {
1352 files.push(path.to_path_buf());
1353 }
1354 } else if path.is_dir() {
1355 for entry in walk_project(path) {
1356 if files.len() >= MAX_DIRECTORY_FILES as usize {
1357 break;
1358 }
1359
1360 let entry_path = entry.path();
1361 if entry_path.is_file() && is_supported_file(entry_path) {
1362 if let Ok(metadata) = fs::metadata(entry_path) {
1364 if metadata.len() <= MAX_FILE_SIZE {
1365 files.push(entry_path.to_path_buf());
1366 }
1367 }
1368 }
1369 }
1370 }
1371
1372 Ok(files)
1373}
1374
1375fn is_supported_file(path: &Path) -> bool {
1377 detect_language(path).is_some()
1378}
1379
1380pub(crate) fn detect_language(path: &Path) -> Option<ApiLanguage> {
1381 match path.extension().and_then(|e| e.to_str()) {
1382 Some("py") => Some(ApiLanguage::Python),
1383 Some("rs") => Some(ApiLanguage::Rust),
1384 Some("go") => Some(ApiLanguage::Go),
1385 Some("java") => Some(ApiLanguage::Java),
1386 Some("js") | Some("jsx") | Some("mjs") | Some("cjs") => Some(ApiLanguage::JavaScript),
1387 Some("ts") | Some("tsx") => Some(ApiLanguage::TypeScript),
1388 Some("c") | Some("h") => Some(ApiLanguage::C),
1389 Some("cpp") | Some("hpp") | Some("cc") | Some("cxx") => Some(ApiLanguage::Cpp),
1390 Some("rb") => Some(ApiLanguage::Ruby),
1391 Some("php") => Some(ApiLanguage::Php),
1392 Some("kt") | Some("kts") => Some(ApiLanguage::Kotlin),
1393 Some("swift") => Some(ApiLanguage::Swift),
1394 Some("cs") => Some(ApiLanguage::CSharp),
1395 Some("scala") => Some(ApiLanguage::Scala),
1396 Some("ex") | Some("exs") => Some(ApiLanguage::Elixir),
1397 Some("lua") => Some(ApiLanguage::Lua),
1398 Some("luau") => Some(ApiLanguage::Luau),
1399 Some("ml") | Some("mli") => Some(ApiLanguage::Ocaml),
1400 _ => None,
1401 }
1402}
1403
1404pub(crate) fn rules_for_language(language: ApiLanguage) -> Vec<APIRule> {
1405 match language {
1406 ApiLanguage::Python => python_rules(),
1407 ApiLanguage::Rust => rust_rules(),
1408 _ => regex_rule_specs_for_language(language)
1409 .iter()
1410 .copied()
1411 .map(RegexRuleSpec::rule)
1412 .collect(),
1413 }
1414}
1415
1416pub(crate) fn analyze_file(
1422 path: &Path,
1423 rules: &[APIRule],
1424 language: ApiLanguage,
1425) -> Result<Vec<MisuseFinding>> {
1426 let content = fs::read_to_string(path)?;
1427 let file_str = path.display().to_string();
1428 let mut findings = Vec::new();
1429 let mut prev_trimmed = String::new();
1430 let file_has_hashmap = matches!(language, ApiLanguage::Rust) && content.contains("HashMap");
1431
1432 for (line_num, line) in content.lines().enumerate() {
1433 let line_number = (line_num + 1) as u32;
1434 let trimmed = line.trim();
1435 let rust_ctx = RustLineContext {
1436 file_has_hashmap,
1437 previous_line: prev_trimmed.as_str(),
1438 previous_is_loop: prev_trimmed.starts_with("for ")
1439 || prev_trimmed.starts_with("while "),
1440 };
1441
1442 for rule in rules {
1444 if let Some(finding) =
1445 check_rule(rule, &file_str, line_number, line, language, &rust_ctx)
1446 {
1447 findings.push(finding);
1448 }
1449 }
1450 prev_trimmed = trimmed.to_string();
1451 }
1452
1453 Ok(findings)
1454}
1455
1456struct RustLineContext<'a> {
1457 file_has_hashmap: bool,
1458 previous_line: &'a str,
1459 previous_is_loop: bool,
1460}
1461
1462fn check_rule(
1464 rule: &APIRule,
1465 file: &str,
1466 line: u32,
1467 line_text: &str,
1468 language: ApiLanguage,
1469 rust_ctx: &RustLineContext<'_>,
1470) -> Option<MisuseFinding> {
1471 let trimmed = line_text.trim();
1472
1473 if is_comment_line(trimmed, language) {
1475 return None;
1476 }
1477
1478 match rule.id.as_str() {
1479 "PY001" => check_missing_timeout(rule, file, line, trimmed),
1480 "PY002" => check_bare_except(rule, file, line, trimmed),
1481 "PY003" => check_md5_usage(rule, file, line, trimmed),
1482 "PY004" => check_sha1_usage(rule, file, line, trimmed),
1483 "PY005" => check_unclosed_file(rule, file, line, trimmed),
1484 "PY006" => check_insecure_random(rule, file, line, trimmed),
1485 "RS001" => check_mutex_lock_unwrap(rule, file, line, trimmed),
1486 "RS002" => check_file_open_without_context(rule, file, line, trimmed),
1487 "RS003" => check_unbounded_with_capacity(rule, file, line, trimmed),
1488 "RS004" => check_detached_tokio_spawn(rule, file, line, trimmed),
1489 "RS005" => check_hashmap_order_dependence(rule, file, line, trimmed, rust_ctx),
1490 "RS006" => check_clone_in_hot_loop(rule, file, line, trimmed, rust_ctx),
1491 _ => check_regex_rule(rule, file, line, trimmed, language),
1492 }
1493}
1494
1495fn is_comment_line(trimmed: &str, language: ApiLanguage) -> bool {
1496 match language {
1497 ApiLanguage::Python | ApiLanguage::Ruby | ApiLanguage::Elixir => trimmed.starts_with('#'),
1498 ApiLanguage::Rust
1499 | ApiLanguage::Go
1500 | ApiLanguage::Java
1501 | ApiLanguage::JavaScript
1502 | ApiLanguage::TypeScript
1503 | ApiLanguage::C
1504 | ApiLanguage::Cpp
1505 | ApiLanguage::Kotlin
1506 | ApiLanguage::Swift
1507 | ApiLanguage::CSharp
1508 | ApiLanguage::Scala => trimmed.starts_with("//"),
1509 ApiLanguage::Php => trimmed.starts_with("//") || trimmed.starts_with('#'),
1510 ApiLanguage::Lua | ApiLanguage::Luau => trimmed.starts_with("--"),
1511 ApiLanguage::Ocaml => trimmed.starts_with("(*"),
1512 }
1513}
1514
1515fn check_regex_rule(
1516 rule: &APIRule,
1517 file: &str,
1518 line: u32,
1519 line_text: &str,
1520 language: ApiLanguage,
1521) -> Option<MisuseFinding> {
1522 let spec = regex_rule_specs_for_language(language)
1523 .iter()
1524 .find(|spec| spec.id == rule.id)?;
1525 let regex = Regex::new(spec.pattern).ok()?;
1526 if !regex.is_match(line_text) {
1527 return None;
1528 }
1529
1530 let column = regex.find(line_text).map(|m| m.start()).unwrap_or(0) as u32;
1531 Some(MisuseFinding {
1532 file: file.to_string(),
1533 line,
1534 column,
1535 rule: rule.clone(),
1536 api_call: spec.api_call.to_string(),
1537 message: spec.message.to_string(),
1538 fix_suggestion: spec.fix_suggestion.to_string(),
1539 code_context: line_text.to_string(),
1540 })
1541}
1542
1543fn check_missing_timeout(
1545 rule: &APIRule,
1546 file: &str,
1547 line: u32,
1548 line_text: &str,
1549) -> Option<MisuseFinding> {
1550 let request_patterns = [
1552 "requests.get(",
1553 "requests.post(",
1554 "requests.put(",
1555 "requests.delete(",
1556 "requests.patch(",
1557 "requests.head(",
1558 "requests.options(",
1559 ];
1560
1561 for pattern in &request_patterns {
1562 if line_text.contains(pattern) && !line_text.contains("timeout") {
1563 let column = line_text.find(pattern).unwrap_or(0) as u32;
1564 return Some(MisuseFinding {
1565 file: file.to_string(),
1566 line,
1567 column,
1568 rule: rule.clone(),
1569 api_call: pattern.trim_end_matches('(').to_string(),
1570 message: format!(
1571 "{} called without timeout parameter",
1572 pattern.trim_end_matches('(')
1573 ),
1574 fix_suggestion: format!("Add timeout parameter: {}url, timeout=30)", pattern),
1575 code_context: line_text.to_string(),
1576 });
1577 }
1578 }
1579
1580 None
1581}
1582
1583fn check_bare_except(
1585 rule: &APIRule,
1586 file: &str,
1587 line: u32,
1588 line_text: &str,
1589) -> Option<MisuseFinding> {
1590 if line_text.starts_with("except:") || line_text.contains(" except:") {
1593 let column = line_text.find("except:").unwrap_or(0) as u32;
1594 return Some(MisuseFinding {
1595 file: file.to_string(),
1596 line,
1597 column,
1598 rule: rule.clone(),
1599 api_call: "except".to_string(),
1600 message: "Bare except clause catches all exceptions including KeyboardInterrupt and SystemExit".to_string(),
1601 fix_suggestion: "Use 'except Exception as e:' to catch only program exceptions".to_string(),
1602 code_context: line_text.to_string(),
1603 });
1604 }
1605
1606 None
1607}
1608
1609fn check_md5_usage(
1611 rule: &APIRule,
1612 file: &str,
1613 line: u32,
1614 line_text: &str,
1615) -> Option<MisuseFinding> {
1616 if line_text.contains("hashlib.md5") || line_text.contains("md5(") {
1618 let column = line_text
1619 .find("hashlib.md5")
1620 .or_else(|| line_text.find("md5("))
1621 .unwrap_or(0) as u32;
1622 return Some(MisuseFinding {
1623 file: file.to_string(),
1624 line,
1625 column,
1626 rule: rule.clone(),
1627 api_call: "hashlib.md5".to_string(),
1628 message: "MD5 is cryptographically broken and should not be used for security purposes"
1629 .to_string(),
1630 fix_suggestion: "Use hashlib.sha256() or stronger. For passwords, use bcrypt or argon2"
1631 .to_string(),
1632 code_context: line_text.to_string(),
1633 });
1634 }
1635
1636 None
1637}
1638
1639fn check_sha1_usage(
1641 rule: &APIRule,
1642 file: &str,
1643 line: u32,
1644 line_text: &str,
1645) -> Option<MisuseFinding> {
1646 if line_text.contains("hashlib.sha1") || line_text.contains("sha1(") {
1648 let column = line_text
1649 .find("hashlib.sha1")
1650 .or_else(|| line_text.find("sha1("))
1651 .unwrap_or(0) as u32;
1652 return Some(MisuseFinding {
1653 file: file.to_string(),
1654 line,
1655 column,
1656 rule: rule.clone(),
1657 api_call: "hashlib.sha1".to_string(),
1658 message: "SHA1 is cryptographically weak and should not be used for security purposes"
1659 .to_string(),
1660 fix_suggestion: "Use hashlib.sha256() or stronger".to_string(),
1661 code_context: line_text.to_string(),
1662 });
1663 }
1664
1665 None
1666}
1667
1668fn check_unclosed_file(
1670 rule: &APIRule,
1671 file: &str,
1672 line: u32,
1673 line_text: &str,
1674) -> Option<MisuseFinding> {
1675 if line_text.contains("open(")
1678 && !line_text.contains("with ")
1679 && !line_text.starts_with("with ")
1680 {
1681 if line_text.contains("= open(") || line_text.contains("=open(") {
1683 let column = line_text.find("open(").unwrap_or(0) as u32;
1684 return Some(MisuseFinding {
1685 file: file.to_string(),
1686 line,
1687 column,
1688 rule: rule.clone(),
1689 api_call: "open".to_string(),
1690 message: "File opened without context manager may not be properly closed"
1691 .to_string(),
1692 fix_suggestion: "Use 'with open(path) as f:' to ensure file is closed".to_string(),
1693 code_context: line_text.to_string(),
1694 });
1695 }
1696 }
1697
1698 None
1699}
1700
1701fn check_insecure_random(
1703 rule: &APIRule,
1704 file: &str,
1705 line: u32,
1706 line_text: &str,
1707) -> Option<MisuseFinding> {
1708 let insecure_patterns = [
1710 "random.randint(",
1711 "random.random(",
1712 "random.choice(",
1713 "random.randrange(",
1714 ];
1715
1716 let security_indicators = ["token", "secret", "password", "key", "auth", "session"];
1719
1720 for pattern in &insecure_patterns {
1721 if line_text.contains(pattern) {
1722 let line_lower = line_text.to_lowercase();
1724 for indicator in &security_indicators {
1725 if line_lower.contains(indicator) {
1726 let column = line_text.find(pattern).unwrap_or(0) as u32;
1727 return Some(MisuseFinding {
1728 file: file.to_string(),
1729 line,
1730 column,
1731 rule: rule.clone(),
1732 api_call: pattern.trim_end_matches('(').to_string(),
1733 message: format!(
1734 "{} is not cryptographically secure, don't use for security purposes",
1735 pattern.trim_end_matches('(')
1736 ),
1737 fix_suggestion:
1738 "Use secrets.token_bytes() or secrets.token_hex() for security"
1739 .to_string(),
1740 code_context: line_text.to_string(),
1741 });
1742 }
1743 }
1744 }
1745 }
1746
1747 None
1748}
1749
1750fn check_mutex_lock_unwrap(
1752 rule: &APIRule,
1753 file: &str,
1754 line: u32,
1755 line_text: &str,
1756) -> Option<MisuseFinding> {
1757 if line_text.contains(".lock().unwrap()") {
1758 let column = line_text.find(".lock().unwrap()").unwrap_or(0) as u32;
1759 return Some(MisuseFinding {
1760 file: file.to_string(),
1761 line,
1762 column,
1763 rule: rule.clone(),
1764 api_call: "Mutex::lock".to_string(),
1765 message:
1766 "Mutex::lock().unwrap() can panic on poisoned locks and hide deadlock behavior"
1767 .to_string(),
1768 fix_suggestion:
1769 "Handle lock errors explicitly (match/if let), or use try_lock with backoff"
1770 .to_string(),
1771 code_context: line_text.to_string(),
1772 });
1773 }
1774 None
1775}
1776
1777fn check_file_open_without_context(
1779 rule: &APIRule,
1780 file: &str,
1781 line: u32,
1782 line_text: &str,
1783) -> Option<MisuseFinding> {
1784 if line_text.contains("File::open(")
1785 && !line_text.contains(".context(")
1786 && !line_text.contains(".with_context(")
1787 && !line_text.contains("map_err(")
1788 {
1789 let column = line_text.find("File::open(").unwrap_or(0) as u32;
1790 return Some(MisuseFinding {
1791 file: file.to_string(),
1792 line,
1793 column,
1794 rule: rule.clone(),
1795 api_call: "File::open".to_string(),
1796 message: "File::open used without contextual error mapping".to_string(),
1797 fix_suggestion:
1798 "Wrap errors with context (with_context/context/map_err) before propagating"
1799 .to_string(),
1800 code_context: line_text.to_string(),
1801 });
1802 }
1803 None
1804}
1805
1806fn check_unbounded_with_capacity(
1808 rule: &APIRule,
1809 file: &str,
1810 line: u32,
1811 line_text: &str,
1812) -> Option<MisuseFinding> {
1813 if line_text.contains("Vec::with_capacity(") {
1814 let line_lower = line_text.to_lowercase();
1815 let user_input_markers = ["input", "args", "user", "request", "len", "size"];
1816 if user_input_markers.iter().any(|m| line_lower.contains(m)) {
1817 let column = line_text.find("Vec::with_capacity(").unwrap_or(0) as u32;
1818 return Some(MisuseFinding {
1819 file: file.to_string(),
1820 line,
1821 column,
1822 rule: rule.clone(),
1823 api_call: "Vec::with_capacity".to_string(),
1824 message: "Vec::with_capacity appears to use unbounded external input".to_string(),
1825 fix_suggestion:
1826 "Clamp requested capacity with a hard upper bound before allocation".to_string(),
1827 code_context: line_text.to_string(),
1828 });
1829 }
1830 }
1831 None
1832}
1833
1834fn check_detached_tokio_spawn(
1836 rule: &APIRule,
1837 file: &str,
1838 line: u32,
1839 line_text: &str,
1840) -> Option<MisuseFinding> {
1841 if line_text.contains("tokio::spawn(")
1842 && !line_text.contains('=')
1843 && !line_text.contains("handles.push")
1844 {
1845 let column = line_text.find("tokio::spawn(").unwrap_or(0) as u32;
1846 return Some(MisuseFinding {
1847 file: file.to_string(),
1848 line,
1849 column,
1850 rule: rule.clone(),
1851 api_call: "tokio::spawn".to_string(),
1852 message: "tokio::spawn used without keeping JoinHandle".to_string(),
1853 fix_suggestion: "Store JoinHandle values and await them to surface task errors"
1854 .to_string(),
1855 code_context: line_text.to_string(),
1856 });
1857 }
1858 None
1859}
1860
1861fn check_hashmap_order_dependence(
1863 rule: &APIRule,
1864 file: &str,
1865 line: u32,
1866 line_text: &str,
1867 rust_ctx: &RustLineContext<'_>,
1868) -> Option<MisuseFinding> {
1869 let looks_like_hashmap_iteration = line_text.contains(".iter()")
1870 && (line_text.contains("for ") || rust_ctx.previous_line.starts_with("for "))
1871 && rust_ctx.file_has_hashmap;
1872 if looks_like_hashmap_iteration {
1873 let column = line_text.find(".iter()").unwrap_or(0) as u32;
1874 return Some(MisuseFinding {
1875 file: file.to_string(),
1876 line,
1877 column,
1878 rule: rule.clone(),
1879 api_call: "HashMap::iter".to_string(),
1880 message: "Potential logic dependence on HashMap iteration order".to_string(),
1881 fix_suggestion: "Use BTreeMap/IndexMap or sort keys before ordered operations"
1882 .to_string(),
1883 code_context: line_text.to_string(),
1884 });
1885 }
1886 None
1887}
1888
1889fn check_clone_in_hot_loop(
1891 rule: &APIRule,
1892 file: &str,
1893 line: u32,
1894 line_text: &str,
1895 rust_ctx: &RustLineContext<'_>,
1896) -> Option<MisuseFinding> {
1897 if line_text.contains(".clone()")
1898 && (line_text.contains("for ") || line_text.contains("while ") || rust_ctx.previous_is_loop)
1899 {
1900 let column = line_text.find(".clone()").unwrap_or(0) as u32;
1901 return Some(MisuseFinding {
1902 file: file.to_string(),
1903 line,
1904 column,
1905 rule: rule.clone(),
1906 api_call: "clone".to_string(),
1907 message: "clone() in loop context may create avoidable allocation overhead".to_string(),
1908 fix_suggestion: "Prefer borrowing/references or move semantics inside hot loops"
1909 .to_string(),
1910 code_context: line_text.to_string(),
1911 });
1912 }
1913 None
1914}
1915
1916fn filter_findings(
1922 findings: Vec<MisuseFinding>,
1923 categories: Option<&[MisuseCategory]>,
1924 severities: Option<&[MisuseSeverity]>,
1925) -> Vec<MisuseFinding> {
1926 findings
1927 .into_iter()
1928 .filter(|f| {
1929 if let Some(cats) = categories {
1931 if !cats.contains(&f.rule.category) {
1932 return false;
1933 }
1934 }
1935
1936 if let Some(sevs) = severities {
1938 if !sevs.contains(&f.rule.severity) {
1939 return false;
1940 }
1941 }
1942
1943 true
1944 })
1945 .collect()
1946}
1947
1948fn build_summary(findings: &[MisuseFinding], files_scanned: u32) -> APICheckSummary {
1954 let mut by_category: HashMap<String, u32> = HashMap::new();
1955 let mut by_severity: HashMap<String, u32> = HashMap::new();
1956 let mut apis_checked: Vec<String> = Vec::new();
1957
1958 for finding in findings {
1959 let cat_str = format!("{:?}", finding.rule.category).to_lowercase();
1961 *by_category.entry(cat_str).or_insert(0) += 1;
1962
1963 let sev_str = format!("{:?}", finding.rule.severity).to_lowercase();
1965 *by_severity.entry(sev_str).or_insert(0) += 1;
1966
1967 if !apis_checked.contains(&finding.api_call) {
1969 apis_checked.push(finding.api_call.clone());
1970 }
1971 }
1972
1973 APICheckSummary {
1974 total_findings: findings.len() as u32,
1975 by_category,
1976 by_severity,
1977 apis_checked,
1978 files_scanned,
1979 }
1980}
1981
1982fn format_api_check_text(report: &APICheckReport) -> String {
1988 let mut output = String::new();
1989
1990 output.push_str("=== API Check Report ===\n\n");
1991
1992 output.push_str(&format!(
1994 "Files scanned: {}\n",
1995 report.summary.files_scanned
1996 ));
1997 output.push_str(&format!("Rules applied: {}\n", report.rules_applied));
1998 output.push_str(&format!(
1999 "Total findings: {}\n\n",
2000 report.summary.total_findings
2001 ));
2002
2003 if !report.summary.by_severity.is_empty() {
2005 output.push_str("By Severity:\n");
2006 for (severity, count) in &report.summary.by_severity {
2007 output.push_str(&format!(" {}: {}\n", severity, count));
2008 }
2009 output.push('\n');
2010 }
2011
2012 if !report.summary.by_category.is_empty() {
2014 output.push_str("By Category:\n");
2015 for (category, count) in &report.summary.by_category {
2016 output.push_str(&format!(" {}: {}\n", category, count));
2017 }
2018 output.push('\n');
2019 }
2020
2021 if !report.findings.is_empty() {
2023 output.push_str("Findings:\n");
2024 output.push_str(&"-".repeat(60));
2025 output.push('\n');
2026
2027 for finding in &report.findings {
2028 output.push_str(&format!(
2029 "[{:?}] {} ({})\n",
2030 finding.rule.severity, finding.rule.name, finding.rule.id
2031 ));
2032 output.push_str(&format!(
2033 " Location: {}:{}:{}\n",
2034 finding.file, finding.line, finding.column
2035 ));
2036 output.push_str(&format!(" API: {}\n", finding.api_call));
2037 output.push_str(&format!(" Message: {}\n", finding.message));
2038 output.push_str(&format!(" Fix: {}\n", finding.fix_suggestion));
2039 if !finding.code_context.is_empty() {
2040 output.push_str(&format!(" Context: {}\n", finding.code_context.trim()));
2041 }
2042 output.push('\n');
2043 }
2044 } else {
2045 output.push_str("No API misuse patterns detected.\n");
2046 }
2047
2048 output
2049}
2050
2051#[cfg(test)]
2056mod tests {
2057 use super::*;
2058 use tempfile::TempDir;
2059
2060 #[test]
2061 fn test_python_rules_defined() {
2062 let rules = python_rules();
2063 assert!(!rules.is_empty());
2064 assert!(rules.iter().any(|r| r.id == "PY001")); assert!(rules.iter().any(|r| r.id == "PY002")); assert!(rules.iter().any(|r| r.id == "PY003")); assert!(rules.iter().any(|r| r.id == "PY005")); }
2069
2070 #[test]
2071 fn test_rust_rules_defined() {
2072 let rules = rust_rules();
2073 assert!(!rules.is_empty());
2074 assert!(rules.iter().any(|r| r.id == "RS001"));
2075 assert!(rules.iter().any(|r| r.id == "RS002"));
2076 assert!(rules.iter().any(|r| r.id == "RS003"));
2077 assert!(rules.iter().any(|r| r.id == "RS004"));
2078 assert!(rules.iter().any(|r| r.id == "RS005"));
2079 assert!(rules.iter().any(|r| r.id == "RS006"));
2080 }
2081
2082 #[test]
2083 fn test_all_supported_languages_have_rules() {
2084 for language in all_api_languages() {
2085 let rules = rules_for_language(*language);
2086 assert!(
2087 !rules.is_empty(),
2088 "expected at least one api-check rule for {:?}",
2089 language
2090 );
2091 }
2092 }
2093
2094 #[test]
2095 fn test_detect_language_extended_extensions() {
2096 let cases = [
2097 ("main.go", ApiLanguage::Go),
2098 ("Main.java", ApiLanguage::Java),
2099 ("app.js", ApiLanguage::JavaScript),
2100 ("component.tsx", ApiLanguage::TypeScript),
2101 ("main.c", ApiLanguage::C),
2102 ("main.cpp", ApiLanguage::Cpp),
2103 ("app.rb", ApiLanguage::Ruby),
2104 ("index.php", ApiLanguage::Php),
2105 ("Main.kt", ApiLanguage::Kotlin),
2106 ("main.swift", ApiLanguage::Swift),
2107 ("Program.cs", ApiLanguage::CSharp),
2108 ("Main.scala", ApiLanguage::Scala),
2109 ("app.ex", ApiLanguage::Elixir),
2110 ("main.lua", ApiLanguage::Lua),
2111 ("game.luau", ApiLanguage::Luau),
2112 ("main.ml", ApiLanguage::Ocaml),
2113 ];
2114
2115 for (path, expected) in cases {
2116 assert_eq!(detect_language(Path::new(path)), Some(expected), "{path}");
2117 }
2118 }
2119
2120 #[test]
2121 fn test_check_missing_timeout() {
2122 let rule = &python_rules()[0]; let finding = check_missing_timeout(rule, "test.py", 1, "response = requests.get(url)");
2126 assert!(finding.is_some());
2127
2128 let finding = check_missing_timeout(
2130 rule,
2131 "test.py",
2132 1,
2133 "response = requests.get(url, timeout=30)",
2134 );
2135 assert!(finding.is_none());
2136 }
2137
2138 #[test]
2139 fn test_check_bare_except() {
2140 let rule = &python_rules()[1]; let finding = check_bare_except(rule, "test.py", 1, "except:");
2144 assert!(finding.is_some());
2145
2146 let finding = check_bare_except(rule, "test.py", 1, "except Exception:");
2148 assert!(finding.is_none());
2149 }
2150
2151 #[test]
2152 fn test_check_md5_usage() {
2153 let rule = &python_rules()[2]; let finding = check_md5_usage(rule, "test.py", 1, "hash = hashlib.md5(data)");
2157 assert!(finding.is_some());
2158
2159 let finding = check_md5_usage(rule, "test.py", 1, "hash = hashlib.sha256(data)");
2161 assert!(finding.is_none());
2162 }
2163
2164 #[test]
2165 fn test_check_unclosed_file() {
2166 let rule = &python_rules()[4]; let finding = check_unclosed_file(rule, "test.py", 1, "f = open('data.txt')");
2170 assert!(finding.is_some());
2171
2172 let finding = check_unclosed_file(rule, "test.py", 1, "with open('data.txt') as f:");
2174 assert!(finding.is_none());
2175 }
2176
2177 #[test]
2178 fn test_filter_by_category() {
2179 let findings = vec![
2180 MisuseFinding {
2181 file: "test.py".to_string(),
2182 line: 1,
2183 column: 0,
2184 rule: APIRule {
2185 id: "PY001".to_string(),
2186 name: "test".to_string(),
2187 category: MisuseCategory::Parameters,
2188 severity: MisuseSeverity::High,
2189 description: "test".to_string(),
2190 correct_usage: "test".to_string(),
2191 },
2192 api_call: "test".to_string(),
2193 message: "test".to_string(),
2194 fix_suggestion: "test".to_string(),
2195 code_context: "test".to_string(),
2196 },
2197 MisuseFinding {
2198 file: "test.py".to_string(),
2199 line: 2,
2200 column: 0,
2201 rule: APIRule {
2202 id: "PY003".to_string(),
2203 name: "test".to_string(),
2204 category: MisuseCategory::Crypto,
2205 severity: MisuseSeverity::High,
2206 description: "test".to_string(),
2207 correct_usage: "test".to_string(),
2208 },
2209 api_call: "test".to_string(),
2210 message: "test".to_string(),
2211 fix_suggestion: "test".to_string(),
2212 code_context: "test".to_string(),
2213 },
2214 ];
2215
2216 let filtered = filter_findings(findings, Some(&[MisuseCategory::Crypto]), None);
2217 assert_eq!(filtered.len(), 1);
2218 assert_eq!(filtered[0].rule.category, MisuseCategory::Crypto);
2219 }
2220
2221 #[test]
2222 fn test_build_summary() {
2223 let findings = vec![MisuseFinding {
2224 file: "test.py".to_string(),
2225 line: 1,
2226 column: 0,
2227 rule: APIRule {
2228 id: "PY001".to_string(),
2229 name: "test".to_string(),
2230 category: MisuseCategory::Parameters,
2231 severity: MisuseSeverity::High,
2232 description: "test".to_string(),
2233 correct_usage: "test".to_string(),
2234 },
2235 api_call: "requests.get".to_string(),
2236 message: "test".to_string(),
2237 fix_suggestion: "test".to_string(),
2238 code_context: "test".to_string(),
2239 }];
2240
2241 let summary = build_summary(&findings, 5);
2242 assert_eq!(summary.total_findings, 1);
2243 assert_eq!(summary.files_scanned, 5);
2244 assert!(summary.apis_checked.contains(&"requests.get".to_string()));
2245 }
2246
2247 #[test]
2248 fn test_collect_files_includes_rust() {
2249 let temp = TempDir::new().unwrap();
2250 let py = temp.path().join("a.py");
2251 let rs = temp.path().join("b.rs");
2252 let go = temp.path().join("c.go");
2253 let txt = temp.path().join("c.txt");
2254 fs::write(&py, "print('ok')").unwrap();
2255 fs::write(&rs, "fn main() {}").unwrap();
2256 fs::write(&go, "package main").unwrap();
2257 fs::write(&txt, "ignore").unwrap();
2258
2259 let files = collect_files(temp.path()).unwrap();
2260 assert!(files.iter().any(|f| f.ends_with("a.py")));
2261 assert!(files.iter().any(|f| f.ends_with("b.rs")));
2262 assert!(files.iter().any(|f| f.ends_with("c.go")));
2263 assert!(!files.iter().any(|f| f.ends_with("c.txt")));
2264 }
2265
2266 #[test]
2267 fn test_check_mutex_lock_unwrap() {
2268 let rule = &rust_rules()[0];
2269 let finding =
2270 check_mutex_lock_unwrap(rule, "lib.rs", 10, "let guard = shared.lock().unwrap();");
2271 assert!(finding.is_some());
2272 }
2273
2274 #[test]
2275 fn test_check_file_open_without_context() {
2276 let rule = &rust_rules()[1];
2277 let finding = check_file_open_without_context(rule, "lib.rs", 8, "let f = File::open(p)?;");
2278 assert!(finding.is_some());
2279
2280 let contextual = check_file_open_without_context(
2281 rule,
2282 "lib.rs",
2283 9,
2284 "let f = File::open(p).with_context(|| \"open\".to_string())?;",
2285 );
2286 assert!(contextual.is_none());
2287 }
2288
2289 #[test]
2290 fn test_check_unbounded_with_capacity() {
2291 let rule = &rust_rules()[2];
2292 let finding =
2293 check_unbounded_with_capacity(rule, "lib.rs", 12, "let v = Vec::with_capacity(len);");
2294 assert!(finding.is_some());
2295
2296 let bounded =
2297 check_unbounded_with_capacity(rule, "lib.rs", 13, "let v = Vec::with_capacity(256);");
2298 assert!(bounded.is_none());
2299 }
2300
2301 #[test]
2302 fn test_check_tokio_spawn_detached() {
2303 let rule = &rust_rules()[3];
2304 let detached = check_detached_tokio_spawn(
2305 rule,
2306 "lib.rs",
2307 3,
2308 "tokio::spawn(async move { work().await; });",
2309 );
2310 let tracked = check_detached_tokio_spawn(
2311 rule,
2312 "lib.rs",
2313 4,
2314 "let handle = tokio::spawn(async move { work().await; });",
2315 );
2316 assert!(detached.is_some());
2317 assert!(tracked.is_none());
2318 }
2319
2320 #[test]
2321 fn test_check_hashmap_order_dependence() {
2322 let rule = &rust_rules()[4];
2323 let ctx = RustLineContext {
2324 file_has_hashmap: true,
2325 previous_line: "for (k, v) in map",
2326 previous_is_loop: true,
2327 };
2328 let finding = check_hashmap_order_dependence(rule, "lib.rs", 12, " .iter()", &ctx);
2329 assert!(finding.is_some());
2330 }
2331
2332 #[test]
2333 fn test_check_clone_in_hot_loop() {
2334 let rule = &rust_rules()[5];
2335 let ctx = RustLineContext {
2336 file_has_hashmap: false,
2337 previous_line: "for item in items {",
2338 previous_is_loop: true,
2339 };
2340 let finding = check_clone_in_hot_loop(rule, "lib.rs", 20, "value.clone()", &ctx);
2341 assert!(finding.is_some());
2342 }
2343
2344 fn assert_language_findings(
2345 filename: &str,
2346 language: ApiLanguage,
2347 source: &str,
2348 expected_rule_id: &str,
2349 ) {
2350 let temp = TempDir::new().unwrap();
2351 let path = temp.path().join(filename);
2352 fs::write(&path, source).unwrap();
2353 let rules = rules_for_language(language);
2354 let findings = analyze_file(&path, &rules, language).unwrap();
2355 assert!(
2356 findings
2357 .iter()
2358 .any(|finding| finding.rule.id == expected_rule_id),
2359 "expected {expected_rule_id} for {filename}, got {:?}",
2360 findings
2361 .iter()
2362 .map(|f| f.rule.id.clone())
2363 .collect::<Vec<_>>()
2364 );
2365 }
2366
2367 #[test]
2368 fn test_extended_language_rule_detection() {
2369 let cases = [
2370 (
2371 "main.go",
2372 ApiLanguage::Go,
2373 "data, _ := ioutil.ReadFile(path)",
2374 "GO001",
2375 ),
2376 (
2377 "Main.java",
2378 ApiLanguage::Java,
2379 "if (name == otherName) { }",
2380 "JV001",
2381 ),
2382 ("app.js", ApiLanguage::JavaScript, "if (a == b) {}", "JS001"),
2383 ("app.ts", ApiLanguage::TypeScript, "if (a == b) {}", "TS001"),
2384 ("main.c", ApiLanguage::C, "gets(buffer);", "C001"),
2385 (
2386 "main.cpp",
2387 ApiLanguage::Cpp,
2388 "std::auto_ptr<Foo> p;",
2389 "CPP003",
2390 ),
2391 ("app.rb", ApiLanguage::Ruby, "eval(params[:code])", "RB001"),
2392 (
2393 "index.php",
2394 ApiLanguage::Php,
2395 "unserialize($payload);",
2396 "PH005",
2397 ),
2398 ("Main.kt", ApiLanguage::Kotlin, "val name = user!!", "KT001"),
2399 (
2400 "main.swift",
2401 ApiLanguage::Swift,
2402 "let name = value!",
2403 "SW003",
2404 ),
2405 (
2406 "Program.cs",
2407 ApiLanguage::CSharp,
2408 "var x = task.Result;",
2409 "CS003",
2410 ),
2411 (
2412 "Main.scala",
2413 ApiLanguage::Scala,
2414 "val casted = value.asInstanceOf[String]",
2415 "SC002",
2416 ),
2417 (
2418 "app.ex",
2419 ApiLanguage::Elixir,
2420 "String.to_atom(param)",
2421 "EX001",
2422 ),
2423 ("main.lua", ApiLanguage::Lua, "value = 1", "LU001"),
2424 ("game.luau", ApiLanguage::Luau, "os.execute(cmd)", "LU003"),
2425 ("main.ml", ApiLanguage::Ocaml, "Obj.magic value", "OC004"),
2426 ];
2427
2428 for (filename, language, source, expected_rule_id) in cases {
2429 assert_language_findings(filename, language, source, expected_rule_id);
2430 }
2431 }
2432}