1use std::collections::HashMap;
18use std::fs;
19use std::path::{Path, PathBuf};
20
21use anyhow::Result;
22use clap::Args;
23use regex::Regex;
24use walkdir::WalkDir;
25
26use super::error::RemainingError;
27use super::types::{
28 APICheckReport, APICheckSummary, APIRule, MisuseCategory, MisuseFinding, MisuseSeverity,
29};
30
31use crate::output::OutputWriter;
32
33const MAX_DIRECTORY_FILES: u32 = 1000;
39
40const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub(crate) enum ApiLanguage {
45 Python,
46 Rust,
47 Go,
48 Java,
49 JavaScript,
50 TypeScript,
51 C,
52 Cpp,
53 Ruby,
54 Php,
55 Kotlin,
56 Swift,
57 CSharp,
58 Scala,
59 Elixir,
60 Lua,
61 Luau,
62 Ocaml,
63}
64
65#[derive(Clone, Copy)]
66struct RegexRuleSpec {
67 id: &'static str,
68 name: &'static str,
69 category: MisuseCategory,
70 severity: MisuseSeverity,
71 description: &'static str,
72 correct_usage: &'static str,
73 pattern: &'static str,
74 api_call: &'static str,
75 message: &'static str,
76 fix_suggestion: &'static str,
77}
78
79impl RegexRuleSpec {
80 fn rule(self) -> APIRule {
81 APIRule {
82 id: self.id.to_string(),
83 name: self.name.to_string(),
84 category: self.category,
85 severity: self.severity,
86 description: self.description.to_string(),
87 correct_usage: self.correct_usage.to_string(),
88 }
89 }
90}
91
92const GO_RULE_SPECS: &[RegexRuleSpec] = &[
93 RegexRuleSpec {
94 id: "GO001",
95 name: "deprecated-ioutil-readfile",
96 category: MisuseCategory::Resources,
97 severity: MisuseSeverity::Low,
98 description: "ioutil.ReadFile is deprecated and encourages unbounded whole-file reads",
99 correct_usage: "Use os.ReadFile or stream with bufio.Scanner/Reader",
100 pattern: r"\bioutil\.ReadFile\s*\(",
101 api_call: "ioutil.ReadFile",
102 message: "ioutil.ReadFile is deprecated and can load unbounded content into memory",
103 fix_suggestion: "Use os.ReadFile for simple reads or bufio.Reader for bounded streaming",
104 },
105 RegexRuleSpec {
106 id: "GO002",
107 name: "http-get-without-timeout",
108 category: MisuseCategory::Parameters,
109 severity: MisuseSeverity::Medium,
110 description: "http.Get uses the default client and provides no call-specific timeout",
111 correct_usage: "Use an http.Client with Timeout or context-aware requests",
112 pattern: r"\bhttp\.Get\s*\(",
113 api_call: "http.Get",
114 message: "http.Get without an explicit timeout can hang indefinitely",
115 fix_suggestion: "Use an http.Client{Timeout: ...} or NewRequestWithContext",
116 },
117 RegexRuleSpec {
118 id: "GO003",
119 name: "exec-command",
120 category: MisuseCategory::Security,
121 severity: MisuseSeverity::High,
122 description: "exec.Command is risky when arguments or executable names come from input",
123 correct_usage: "Prefer direct library APIs or strictly validate allowed commands",
124 pattern: r"\bexec\.Command\s*\(",
125 api_call: "exec.Command",
126 message: "exec.Command can enable command injection when fed user-controlled values",
127 fix_suggestion: "Validate commands against an allowlist and avoid shell-like execution",
128 },
129 RegexRuleSpec {
130 id: "GO004",
131 name: "template-html-cast",
132 category: MisuseCategory::Security,
133 severity: MisuseSeverity::High,
134 description: "template.HTML bypasses html/template escaping guarantees",
135 correct_usage: "Pass plain strings to templates and let html/template escape them",
136 pattern: r"\btemplate\.HTML\s*\(",
137 api_call: "template.HTML",
138 message: "template.HTML disables escaping and can introduce XSS",
139 fix_suggestion: "Remove the cast and rely on html/template auto-escaping",
140 },
141 RegexRuleSpec {
142 id: "GO005",
143 name: "sql-query-without-context",
144 category: MisuseCategory::CallOrder,
145 severity: MisuseSeverity::Medium,
146 description: "sql.DB.Query lacks cancellation and timeout propagation compared with QueryContext",
147 correct_usage: "Use db.QueryContext(ctx, query, args...)",
148 pattern: r"\bsql\.Query\s*\(",
149 api_call: "sql.Query",
150 message: "sql.Query omits context-driven cancellation and timeout handling",
151 fix_suggestion: "Use QueryContext/ExecContext with a bounded context",
152 },
153];
154
155const JAVA_RULE_SPECS: &[RegexRuleSpec] = &[
156 RegexRuleSpec {
157 id: "JV001",
158 name: "string-comparison-with-double-equals",
159 category: MisuseCategory::CallOrder,
160 severity: MisuseSeverity::Medium,
161 description: "Using == on strings compares references instead of values",
162 correct_usage: "Use value.equals(other) or Objects.equals(a, b)",
163 pattern: r#"(?:".*"|\b\w+\b)\s*==\s*(?:".*"|\b\w+\b)"#,
164 api_call: "==",
165 message: "String comparison with == checks reference identity, not value equality",
166 fix_suggestion: "Use .equals(...) or Objects.equals(...) for string values",
167 },
168 RegexRuleSpec {
169 id: "JV002",
170 name: "runtime-exec",
171 category: MisuseCategory::Security,
172 severity: MisuseSeverity::High,
173 description: "Runtime.exec is dangerous with dynamic input and hard to sandbox correctly",
174 correct_usage: "Use structured APIs or a ProcessBuilder with validated arguments",
175 pattern: r"\bRuntime\.getRuntime\(\)\.exec\s*\(",
176 api_call: "Runtime.exec",
177 message: "Runtime.exec is a common command injection footgun",
178 fix_suggestion: "Prefer library APIs or tightly validated ProcessBuilder arguments",
179 },
180 RegexRuleSpec {
181 id: "JV003",
182 name: "objectinputstream-deserialization",
183 category: MisuseCategory::Security,
184 severity: MisuseSeverity::High,
185 description: "ObjectInputStream on untrusted data can trigger unsafe deserialization gadgets",
186 correct_usage: "Use safer formats like JSON with explicit schemas",
187 pattern: r"\bnew\s+ObjectInputStream\s*\(",
188 api_call: "ObjectInputStream",
189 message: "ObjectInputStream enables unsafe native Java deserialization",
190 fix_suggestion: "Replace native object deserialization with a schema-driven format",
191 },
192 RegexRuleSpec {
193 id: "JV004",
194 name: "create-statement",
195 category: MisuseCategory::Security,
196 severity: MisuseSeverity::Medium,
197 description: "createStatement often leads to string-built SQL instead of prepared statements",
198 correct_usage: "Use prepareStatement with placeholders",
199 pattern: r"\bcreateStatement\s*\(",
200 api_call: "createStatement",
201 message: "createStatement encourages dynamic SQL and weak parameter handling",
202 fix_suggestion: "Use prepareStatement with bound parameters",
203 },
204 RegexRuleSpec {
205 id: "JV005",
206 name: "system-gc-call",
207 category: MisuseCategory::Resources,
208 severity: MisuseSeverity::Low,
209 description: "System.gc() is usually a performance smell and not a reliable memory fix",
210 correct_usage: "Remove manual GC triggers and profile allocations instead",
211 pattern: r"\bSystem\.gc\s*\(",
212 api_call: "System.gc",
213 message: "System.gc() is an unreliable manual GC hint and often harms latency",
214 fix_suggestion: "Remove the call and fix the underlying allocation or lifetime issue",
215 },
216];
217
218const JAVASCRIPT_RULE_SPECS: &[RegexRuleSpec] = &[
219 RegexRuleSpec {
220 id: "JS001",
221 name: "loose-equality",
222 category: MisuseCategory::CallOrder,
223 severity: MisuseSeverity::Medium,
224 description: "Loose equality allows coercions that frequently hide correctness bugs",
225 correct_usage: "Use === / !== except in deliberately reviewed coercion cases",
226 pattern: r"\s==\s|\s!=\s",
227 api_call: "==",
228 message: "Loose equality can coerce values unexpectedly",
229 fix_suggestion: "Use === or !== and handle explicit type conversion",
230 },
231 RegexRuleSpec {
232 id: "JS002",
233 name: "parseint-without-radix",
234 category: MisuseCategory::Parameters,
235 severity: MisuseSeverity::Low,
236 description: "parseInt without a radix is ambiguous and less explicit than required",
237 correct_usage: "Use parseInt(value, 10)",
238 pattern: r"\bparseInt\s*\(\s*[^,\)]+\)",
239 api_call: "parseInt",
240 message: "parseInt called without an explicit radix",
241 fix_suggestion: "Pass a radix explicitly, usually parseInt(value, 10)",
242 },
243 RegexRuleSpec {
244 id: "JS003",
245 name: "json-parse-without-guard",
246 category: MisuseCategory::ErrorHandling,
247 severity: MisuseSeverity::Low,
248 description: "JSON.parse throws on malformed input and should usually be guarded",
249 correct_usage: "Wrap JSON.parse in try/catch when input is not fully trusted",
250 pattern: r"\bJSON\.parse\s*\(",
251 api_call: "JSON.parse",
252 message: "JSON.parse can throw and should be guarded for untrusted input",
253 fix_suggestion: "Use try/catch or validated parsing for untrusted payloads",
254 },
255 RegexRuleSpec {
256 id: "JS004",
257 name: "document-write",
258 category: MisuseCategory::Security,
259 severity: MisuseSeverity::High,
260 description: "document.write is legacy, brittle, and can inject unsanitized HTML",
261 correct_usage: "Use DOM APIs like textContent/appendChild instead",
262 pattern: r"\bdocument\.write(?:ln)?\s*\(",
263 api_call: "document.write",
264 message: "document.write is unsafe and can enable XSS",
265 fix_suggestion: "Use safe DOM APIs instead of writing raw HTML strings",
266 },
267 RegexRuleSpec {
268 id: "JS005",
269 name: "eval-call",
270 category: MisuseCategory::Security,
271 severity: MisuseSeverity::High,
272 description: "eval executes dynamic code and should be avoided",
273 correct_usage: "Use structured data parsing or explicit dispatch tables",
274 pattern: r"\beval\s*\(",
275 api_call: "eval",
276 message: "eval executes dynamic code and creates major security risk",
277 fix_suggestion: "Replace eval with data parsing or explicit function dispatch",
278 },
279];
280
281const TYPESCRIPT_RULE_SPECS: &[RegexRuleSpec] = &[
282 RegexRuleSpec {
283 id: "TS001",
284 name: "loose-equality",
285 category: MisuseCategory::CallOrder,
286 severity: MisuseSeverity::Medium,
287 description: "Loose equality allows coercions that frequently hide correctness bugs",
288 correct_usage: "Use === / !== except in deliberately reviewed coercion cases",
289 pattern: r"\s==\s|\s!=\s",
290 api_call: "==",
291 message: "Loose equality can coerce values unexpectedly",
292 fix_suggestion: "Use === or !== and handle explicit type conversion",
293 },
294 RegexRuleSpec {
295 id: "TS002",
296 name: "parseint-without-radix",
297 category: MisuseCategory::Parameters,
298 severity: MisuseSeverity::Low,
299 description: "parseInt without a radix is ambiguous and less explicit than required",
300 correct_usage: "Use parseInt(value, 10)",
301 pattern: r"\bparseInt\s*\(\s*[^,\)]+\)",
302 api_call: "parseInt",
303 message: "parseInt called without an explicit radix",
304 fix_suggestion: "Pass a radix explicitly, usually parseInt(value, 10)",
305 },
306 RegexRuleSpec {
307 id: "TS003",
308 name: "json-parse-without-guard",
309 category: MisuseCategory::ErrorHandling,
310 severity: MisuseSeverity::Low,
311 description: "JSON.parse throws on malformed input and should usually be guarded",
312 correct_usage: "Wrap JSON.parse in try/catch when input is not fully trusted",
313 pattern: r"\bJSON\.parse\s*\(",
314 api_call: "JSON.parse",
315 message: "JSON.parse can throw and should be guarded for untrusted input",
316 fix_suggestion: "Use try/catch or validated parsing for untrusted payloads",
317 },
318 RegexRuleSpec {
319 id: "TS004",
320 name: "document-write",
321 category: MisuseCategory::Security,
322 severity: MisuseSeverity::High,
323 description: "document.write is legacy, brittle, and can inject unsanitized HTML",
324 correct_usage: "Use DOM APIs like textContent/appendChild instead",
325 pattern: r"\bdocument\.write(?:ln)?\s*\(",
326 api_call: "document.write",
327 message: "document.write is unsafe and can enable XSS",
328 fix_suggestion: "Use safe DOM APIs instead of writing raw HTML strings",
329 },
330 RegexRuleSpec {
331 id: "TS005",
332 name: "eval-call",
333 category: MisuseCategory::Security,
334 severity: MisuseSeverity::High,
335 description: "eval executes dynamic code and should be avoided",
336 correct_usage: "Use structured data parsing or explicit dispatch tables",
337 pattern: r"\beval\s*\(",
338 api_call: "eval",
339 message: "eval executes dynamic code and creates major security risk",
340 fix_suggestion: "Replace eval with data parsing or explicit function dispatch",
341 },
342];
343
344const C_RULE_SPECS: &[RegexRuleSpec] = &[
345 RegexRuleSpec {
346 id: "C001",
347 name: "gets-call",
348 category: MisuseCategory::Security,
349 severity: MisuseSeverity::High,
350 description: "gets cannot bound input and has been removed from the standard library",
351 correct_usage: "Use fgets with an explicit buffer length",
352 pattern: r"\bgets\s*\(",
353 api_call: "gets",
354 message: "gets is inherently unsafe and enables buffer overflows",
355 fix_suggestion: "Use fgets(buffer, size, stdin) or another bounded API",
356 },
357 RegexRuleSpec {
358 id: "C002",
359 name: "strcpy-call",
360 category: MisuseCategory::Security,
361 severity: MisuseSeverity::High,
362 description: "strcpy performs unbounded copies and easily overflows buffers",
363 correct_usage: "Use snprintf, strlcpy, or explicit bounds checks",
364 pattern: r"\bstrcpy\s*\(",
365 api_call: "strcpy",
366 message: "strcpy performs an unbounded copy",
367 fix_suggestion: "Replace strcpy with a bounded copy strategy",
368 },
369 RegexRuleSpec {
370 id: "C003",
371 name: "sprintf-call",
372 category: MisuseCategory::Security,
373 severity: MisuseSeverity::High,
374 description: "sprintf writes formatted data without a size bound",
375 correct_usage: "Use snprintf with the destination buffer size",
376 pattern: r"\bsprintf\s*\(",
377 api_call: "sprintf",
378 message: "sprintf can overflow fixed-size buffers",
379 fix_suggestion: "Use snprintf(buffer, size, ...) instead",
380 },
381 RegexRuleSpec {
382 id: "C004",
383 name: "scanf-string-without-width",
384 category: MisuseCategory::Security,
385 severity: MisuseSeverity::High,
386 description: "scanf with %s and no width limit can overflow the destination buffer",
387 correct_usage: "Provide a width specifier or use fgets",
388 pattern: r#"\bscanf\s*\(\s*"%s"#,
389 api_call: "scanf",
390 message: "scanf(\"%s\") reads unbounded input into a buffer",
391 fix_suggestion: "Add a width limit or use fgets plus parsing",
392 },
393 RegexRuleSpec {
394 id: "C005",
395 name: "system-call",
396 category: MisuseCategory::Security,
397 severity: MisuseSeverity::High,
398 description: "system executes a shell command and is dangerous with dynamic input",
399 correct_usage: "Use execve-family APIs with validated arguments where possible",
400 pattern: r"\bsystem\s*\(",
401 api_call: "system",
402 message: "system executes a shell and is a common command injection vector",
403 fix_suggestion: "Avoid shell execution or tightly validate the command source",
404 },
405];
406
407const CPP_RULE_SPECS: &[RegexRuleSpec] = &[
408 RegexRuleSpec {
409 id: "CPP001",
410 name: "strcpy-call",
411 category: MisuseCategory::Security,
412 severity: MisuseSeverity::High,
413 description: "strcpy performs unbounded copies and easily overflows buffers",
414 correct_usage: "Use std::string, snprintf, or another bounded copy strategy",
415 pattern: r"\bstrcpy\s*\(",
416 api_call: "strcpy",
417 message: "strcpy performs an unbounded copy",
418 fix_suggestion: "Use std::string or a bounded copy API instead",
419 },
420 RegexRuleSpec {
421 id: "CPP002",
422 name: "sprintf-call",
423 category: MisuseCategory::Security,
424 severity: MisuseSeverity::High,
425 description: "sprintf writes formatted data without a size bound",
426 correct_usage: "Use snprintf or std::format into a bounded container",
427 pattern: r"\bsprintf\s*\(",
428 api_call: "sprintf",
429 message: "sprintf can overflow fixed-size buffers",
430 fix_suggestion: "Use snprintf or a safer formatting abstraction",
431 },
432 RegexRuleSpec {
433 id: "CPP003",
434 name: "auto-ptr",
435 category: MisuseCategory::Resources,
436 severity: MisuseSeverity::Medium,
437 description: "std::auto_ptr is obsolete and has broken transfer semantics",
438 correct_usage: "Use std::unique_ptr or std::shared_ptr",
439 pattern: r"\bstd::auto_ptr\s*<",
440 api_call: "std::auto_ptr",
441 message: "std::auto_ptr is obsolete and unsafe by modern ownership standards",
442 fix_suggestion: "Replace std::auto_ptr with std::unique_ptr or std::shared_ptr",
443 },
444 RegexRuleSpec {
445 id: "CPP004",
446 name: "raw-new",
447 category: MisuseCategory::Resources,
448 severity: MisuseSeverity::Medium,
449 description: "Raw new often leads to leaks and exception-safety issues",
450 correct_usage: "Use std::make_unique or stack allocation where possible",
451 pattern: r"\bnew\s+\w",
452 api_call: "new",
453 message: "Raw new makes ownership and exception safety harder to reason about",
454 fix_suggestion: "Use std::make_unique, containers, or stack allocation",
455 },
456 RegexRuleSpec {
457 id: "CPP005",
458 name: "system-call",
459 category: MisuseCategory::Security,
460 severity: MisuseSeverity::High,
461 description: "system executes a shell command and is dangerous with dynamic input",
462 correct_usage: "Use direct process APIs with validated arguments when possible",
463 pattern: r"(?:\bstd::)?system\s*\(",
464 api_call: "system",
465 message: "system executes a shell and is a common command injection vector",
466 fix_suggestion: "Avoid shell execution or tightly validate all command components",
467 },
468];
469
470const RUBY_RULE_SPECS: &[RegexRuleSpec] = &[
471 RegexRuleSpec {
472 id: "RB001",
473 name: "eval-call",
474 category: MisuseCategory::Security,
475 severity: MisuseSeverity::High,
476 description: "eval executes dynamic Ruby code and should be avoided",
477 correct_usage: "Use explicit dispatch or data parsing instead of dynamic code execution",
478 pattern: r"\beval\s*\(",
479 api_call: "eval",
480 message: "eval executes dynamic code and creates major security risk",
481 fix_suggestion: "Replace eval with explicit dispatch or structured parsing",
482 },
483 RegexRuleSpec {
484 id: "RB002",
485 name: "dynamic-send",
486 category: MisuseCategory::Security,
487 severity: MisuseSeverity::Medium,
488 description: "send can invoke arbitrary methods when fed untrusted method names",
489 correct_usage: "Use public_send on a strict allowlist of method names",
490 pattern: r"\.send\s*\(",
491 api_call: "send",
492 message: "send can dispatch to unsafe or unexpected methods",
493 fix_suggestion: "Use public_send with a reviewed allowlist",
494 },
495 RegexRuleSpec {
496 id: "RB003",
497 name: "system-call",
498 category: MisuseCategory::Security,
499 severity: MisuseSeverity::High,
500 description: "system executes a shell command and is dangerous with interpolated input",
501 correct_usage: "Use array-form process APIs with validated arguments",
502 pattern: r"\bsystem\s*\(",
503 api_call: "system",
504 message: "system is a common command injection footgun",
505 fix_suggestion: "Avoid shell execution or pass validated argv-style arguments",
506 },
507 RegexRuleSpec {
508 id: "RB004",
509 name: "yaml-load",
510 category: MisuseCategory::Security,
511 severity: MisuseSeverity::High,
512 description: "YAML.load can instantiate arbitrary objects from untrusted input",
513 correct_usage: "Use YAML.safe_load with permitted classes",
514 pattern: r"\bYAML\.load\s*\(",
515 api_call: "YAML.load",
516 message: "YAML.load can deserialize unsafe objects",
517 fix_suggestion: "Use YAML.safe_load and restrict allowed classes",
518 },
519 RegexRuleSpec {
520 id: "RB005",
521 name: "marshal-load",
522 category: MisuseCategory::Security,
523 severity: MisuseSeverity::High,
524 description: "Marshal.load on untrusted data is unsafe deserialization",
525 correct_usage: "Use JSON or another safe, schema-checked format",
526 pattern: r"\bMarshal\.load\s*\(",
527 api_call: "Marshal.load",
528 message: "Marshal.load performs unsafe native deserialization",
529 fix_suggestion: "Replace Marshal.load with a safer serialization format",
530 },
531];
532
533const PHP_RULE_SPECS: &[RegexRuleSpec] = &[
534 RegexRuleSpec {
535 id: "PH001",
536 name: "deprecated-mysql-functions",
537 category: MisuseCategory::Security,
538 severity: MisuseSeverity::High,
539 description: "mysql_* APIs are removed and encourage unsafe query construction",
540 correct_usage: "Use PDO or mysqli with prepared statements",
541 pattern: r"\bmysql_[a-z_]+\s*\(",
542 api_call: "mysql_*",
543 message: "mysql_* functions are removed and unsafe by modern standards",
544 fix_suggestion: "Migrate to PDO or mysqli prepared statements",
545 },
546 RegexRuleSpec {
547 id: "PH002",
548 name: "extract-call",
549 category: MisuseCategory::Security,
550 severity: MisuseSeverity::Medium,
551 description: "extract pollutes local scope and can overwrite important variables",
552 correct_usage: "Read array keys explicitly instead of splatting them into scope",
553 pattern: r"\bextract\s*\(",
554 api_call: "extract",
555 message: "extract can overwrite local variables and hide data flow",
556 fix_suggestion: "Assign required keys explicitly instead of using extract",
557 },
558 RegexRuleSpec {
559 id: "PH003",
560 name: "eval-call",
561 category: MisuseCategory::Security,
562 severity: MisuseSeverity::High,
563 description: "eval executes dynamic PHP code and should be avoided",
564 correct_usage: "Use explicit dispatch or data parsing instead of dynamic code execution",
565 pattern: r"\beval\s*\(",
566 api_call: "eval",
567 message: "eval executes dynamic code and creates major security risk",
568 fix_suggestion: "Replace eval with explicit dispatch or structured parsing",
569 },
570 RegexRuleSpec {
571 id: "PH004",
572 name: "variable-variables",
573 category: MisuseCategory::Security,
574 severity: MisuseSeverity::Medium,
575 description: "Variable variables make scope mutation hard to reason about",
576 correct_usage: "Use associative arrays or explicit variables instead",
577 pattern: r"\$\$[A-Za-z_]",
578 api_call: "$$",
579 message: "Variable variables obscure data flow and can enable unsafe access patterns",
580 fix_suggestion: "Use an array/map or explicit variable names instead",
581 },
582 RegexRuleSpec {
583 id: "PH005",
584 name: "unserialize-call",
585 category: MisuseCategory::Security,
586 severity: MisuseSeverity::High,
587 description: "unserialize on untrusted data can trigger object injection chains",
588 correct_usage: "Use json_decode or a safer schema-checked format",
589 pattern: r"\bunserialize\s*\(",
590 api_call: "unserialize",
591 message: "unserialize enables unsafe object deserialization",
592 fix_suggestion: "Replace unserialize with json_decode or a safe serializer",
593 },
594];
595
596const KOTLIN_RULE_SPECS: &[RegexRuleSpec] = &[
597 RegexRuleSpec {
598 id: "KT001",
599 name: "force-unwrapped-null",
600 category: MisuseCategory::ErrorHandling,
601 severity: MisuseSeverity::Medium,
602 description: "!! converts nullable values into runtime crashes",
603 correct_usage: "Use safe calls, let, requireNotNull, or explicit branching",
604 pattern: r"!!",
605 api_call: "!!",
606 message: "!! will throw NullPointerException on null values",
607 fix_suggestion: "Use safe calls or explicit null handling instead of !!",
608 },
609 RegexRuleSpec {
610 id: "KT002",
611 name: "lateinit-var",
612 category: MisuseCategory::ErrorHandling,
613 severity: MisuseSeverity::Low,
614 description: "lateinit shifts initialization failures to runtime",
615 correct_usage: "Prefer constructor injection or nullable/state wrappers",
616 pattern: r"\blateinit\s+var\b",
617 api_call: "lateinit",
618 message: "lateinit can fail at runtime if the property is read before initialization",
619 fix_suggestion: "Prefer constructor injection or explicit nullable state",
620 },
621 RegexRuleSpec {
622 id: "KT003",
623 name: "globalscope-launch",
624 category: MisuseCategory::Concurrency,
625 severity: MisuseSeverity::Medium,
626 description: "GlobalScope.launch escapes structured concurrency and leaks work",
627 correct_usage: "Launch from a lifecycle-bound CoroutineScope",
628 pattern: r"\bGlobalScope\.launch\s*\(",
629 api_call: "GlobalScope.launch",
630 message: "GlobalScope.launch detaches work from structured concurrency",
631 fix_suggestion: "Use a lifecycle-bound CoroutineScope instead",
632 },
633 RegexRuleSpec {
634 id: "KT004",
635 name: "runtime-exec",
636 category: MisuseCategory::Security,
637 severity: MisuseSeverity::High,
638 description: "Runtime.exec is dangerous with dynamic input and hard to sandbox correctly",
639 correct_usage: "Use structured APIs or strictly validated ProcessBuilder arguments",
640 pattern: r"\bRuntime\.getRuntime\(\)\.exec\s*\(",
641 api_call: "Runtime.exec",
642 message: "Runtime.exec is a common command injection footgun",
643 fix_suggestion: "Prefer library APIs or tightly validated ProcessBuilder arguments",
644 },
645 RegexRuleSpec {
646 id: "KT005",
647 name: "thread-sleep",
648 category: MisuseCategory::Concurrency,
649 severity: MisuseSeverity::Low,
650 description: "Thread.sleep blocks threads directly and is usually wrong in coroutine-based code",
651 correct_usage: "Use delay(...) in coroutines or higher-level scheduling",
652 pattern: r"\bThread\.sleep\s*\(",
653 api_call: "Thread.sleep",
654 message: "Thread.sleep blocks the current thread directly",
655 fix_suggestion: "Use delay(...) or a proper scheduler instead",
656 },
657];
658
659const SWIFT_RULE_SPECS: &[RegexRuleSpec] = &[
660 RegexRuleSpec {
661 id: "SW001",
662 name: "forced-cast",
663 category: MisuseCategory::ErrorHandling,
664 severity: MisuseSeverity::Medium,
665 description: "as! crashes at runtime when the cast fails",
666 correct_usage: "Use as? with conditional handling",
667 pattern: r"\bas!\b",
668 api_call: "as!",
669 message: "Forced casts crash when the runtime type is different",
670 fix_suggestion: "Use as? and handle the nil case explicitly",
671 },
672 RegexRuleSpec {
673 id: "SW002",
674 name: "forced-try",
675 category: MisuseCategory::ErrorHandling,
676 severity: MisuseSeverity::Medium,
677 description: "try! crashes when the call throws",
678 correct_usage: "Use do/catch or try? with explicit fallback",
679 pattern: r"\btry!\b",
680 api_call: "try!",
681 message: "try! crashes the process on thrown errors",
682 fix_suggestion: "Use do/catch or try? and handle failure explicitly",
683 },
684 RegexRuleSpec {
685 id: "SW003",
686 name: "force-unwrap",
687 category: MisuseCategory::ErrorHandling,
688 severity: MisuseSeverity::Medium,
689 description: "Force unwrapping optionals crashes at runtime on nil",
690 correct_usage: "Use if let, guard let, or nil-coalescing",
691 pattern: r"\b[A-Za-z_][A-Za-z0-9_]*!",
692 api_call: "!",
693 message: "Force unwraps crash when the optional is nil",
694 fix_suggestion: "Use optional binding or nil-coalescing instead of force unwraps",
695 },
696 RegexRuleSpec {
697 id: "SW004",
698 name: "nskeyedunarchiver",
699 category: MisuseCategory::Security,
700 severity: MisuseSeverity::High,
701 description: "Legacy NSKeyedUnarchiver APIs on untrusted data are unsafe",
702 correct_usage: "Use secure decoding APIs with requiresSecureCoding",
703 pattern: r"\bNSKeyedUnarchiver\.unarchiveObject",
704 api_call: "NSKeyedUnarchiver",
705 message: "Legacy unarchiving can deserialize unexpected object graphs",
706 fix_suggestion: "Use secure coding APIs and schema-checked decoding",
707 },
708 RegexRuleSpec {
709 id: "SW005",
710 name: "fatalerror-call",
711 category: MisuseCategory::ErrorHandling,
712 severity: MisuseSeverity::Low,
713 description: "fatalError terminates the process and is risky outside clearly impossible states",
714 correct_usage: "Return/throw recoverable errors where possible",
715 pattern: r"\bfatalError\s*\(",
716 api_call: "fatalError",
717 message: "fatalError terminates the process immediately",
718 fix_suggestion: "Use recoverable error handling unless the state is truly unreachable",
719 },
720];
721
722const CSHARP_RULE_SPECS: &[RegexRuleSpec] = &[
723 RegexRuleSpec {
724 id: "CS001",
725 name: "binaryformatter",
726 category: MisuseCategory::Security,
727 severity: MisuseSeverity::High,
728 description: "BinaryFormatter is insecure and obsolete for untrusted data",
729 correct_usage: "Use System.Text.Json or another safe serializer",
730 pattern: r"\bBinaryFormatter\b",
731 api_call: "BinaryFormatter",
732 message: "BinaryFormatter is insecure and should not be used",
733 fix_suggestion: "Use System.Text.Json or another safe serializer",
734 },
735 RegexRuleSpec {
736 id: "CS002",
737 name: "gc-collect",
738 category: MisuseCategory::Resources,
739 severity: MisuseSeverity::Low,
740 description: "GC.Collect is rarely the right fix and often harms latency",
741 correct_usage: "Remove manual GC triggers and profile the real allocation issue",
742 pattern: r"\bGC\.Collect\s*\(",
743 api_call: "GC.Collect",
744 message: "GC.Collect is an unreliable manual GC hint and often harms performance",
745 fix_suggestion: "Remove the call and fix the underlying allocation issue",
746 },
747 RegexRuleSpec {
748 id: "CS003",
749 name: "task-result",
750 category: MisuseCategory::Concurrency,
751 severity: MisuseSeverity::Medium,
752 description: "Task.Result blocks synchronously and can deadlock async flows",
753 correct_usage: "Use await instead of blocking on Task.Result",
754 pattern: r"\.Result\b",
755 api_call: "Task.Result",
756 message: "Task.Result blocks synchronously and can deadlock async contexts",
757 fix_suggestion: "Use await and keep the async chain asynchronous",
758 },
759 RegexRuleSpec {
760 id: "CS004",
761 name: "task-wait",
762 category: MisuseCategory::Concurrency,
763 severity: MisuseSeverity::Medium,
764 description: "Task.Wait blocks synchronously and can deadlock async flows",
765 correct_usage: "Use await or WhenAll/WhenAny instead of blocking waits",
766 pattern: r"\.Wait\s*\(",
767 api_call: "Task.Wait",
768 message: "Task.Wait blocks synchronously and can deadlock async contexts",
769 fix_suggestion: "Use await or asynchronous coordination primitives instead",
770 },
771 RegexRuleSpec {
772 id: "CS005",
773 name: "process-start",
774 category: MisuseCategory::Security,
775 severity: MisuseSeverity::High,
776 description: "Process.Start is dangerous with untrusted paths or arguments",
777 correct_usage: "Use strict allowlists and avoid shell execution semantics",
778 pattern: r"\bProcess\.Start\s*\(",
779 api_call: "Process.Start",
780 message: "Process.Start can enable command injection with untrusted inputs",
781 fix_suggestion: "Validate executable and arguments against a strict allowlist",
782 },
783];
784
785const SCALA_RULE_SPECS: &[RegexRuleSpec] = &[
786 RegexRuleSpec {
787 id: "SC001",
788 name: "null-usage",
789 category: MisuseCategory::ErrorHandling,
790 severity: MisuseSeverity::Low,
791 description: "null bypasses Scala's stronger option-based absence modeling",
792 correct_usage: "Use Option instead of null",
793 pattern: r"\bnull\b",
794 api_call: "null",
795 message: "null reintroduces runtime absence bugs into Scala code",
796 fix_suggestion: "Use Option and explicit pattern matching instead",
797 },
798 RegexRuleSpec {
799 id: "SC002",
800 name: "asinstanceof-cast",
801 category: MisuseCategory::ErrorHandling,
802 severity: MisuseSeverity::Medium,
803 description: "asInstanceOf crashes at runtime when the type assumption is wrong",
804 correct_usage: "Use pattern matching or TypeTag/ClassTag-aware APIs",
805 pattern: r"\basInstanceOf\[",
806 api_call: "asInstanceOf",
807 message: "asInstanceOf creates unchecked runtime casts",
808 fix_suggestion: "Use pattern matching or safer typed abstractions",
809 },
810 RegexRuleSpec {
811 id: "SC003",
812 name: "await-result",
813 category: MisuseCategory::Concurrency,
814 severity: MisuseSeverity::Medium,
815 description: "Await.result blocks threads and can collapse asynchronous throughput",
816 correct_usage: "Compose futures asynchronously instead of blocking",
817 pattern: r"\bAwait\.result\s*\(",
818 api_call: "Await.result",
819 message: "Await.result blocks threads and can create deadlocks or latency spikes",
820 fix_suggestion: "Use map/flatMap/for-comprehensions instead of blocking",
821 },
822 RegexRuleSpec {
823 id: "SC004",
824 name: "mutable-collection",
825 category: MisuseCategory::Concurrency,
826 severity: MisuseSeverity::Low,
827 description: "scala.collection.mutable structures are harder to reason about under concurrency",
828 correct_usage: "Prefer immutable collections unless mutation is intentionally scoped",
829 pattern: r"\bscala\.collection\.mutable\.",
830 api_call: "scala.collection.mutable",
831 message: "Mutable collections can hide shared-state bugs",
832 fix_suggestion: "Prefer immutable collections or encapsulate mutation carefully",
833 },
834 RegexRuleSpec {
835 id: "SC005",
836 name: "sys-process",
837 category: MisuseCategory::Security,
838 severity: MisuseSeverity::High,
839 description: "sys.process.Process executes external commands and is dangerous with input-derived values",
840 correct_usage: "Use library APIs or validate commands and arguments against an allowlist",
841 pattern: r"\bsys\.process\.Process\s*\(",
842 api_call: "sys.process.Process",
843 message: "sys.process.Process can enable command injection with untrusted input",
844 fix_suggestion: "Avoid shell-style execution or strictly validate all command parts",
845 },
846];
847
848const ELIXIR_RULE_SPECS: &[RegexRuleSpec] = &[
849 RegexRuleSpec {
850 id: "EX001",
851 name: "string-to-atom",
852 category: MisuseCategory::Security,
853 severity: MisuseSeverity::High,
854 description: "String.to_atom on untrusted input can exhaust the VM atom table",
855 correct_usage: "Use String.to_existing_atom only for reviewed values or keep strings",
856 pattern: r"\bString\.to_atom\s*\(",
857 api_call: "String.to_atom",
858 message: "String.to_atom can permanently grow the atom table from user input",
859 fix_suggestion: "Keep values as strings or use a reviewed to_existing_atom path",
860 },
861 RegexRuleSpec {
862 id: "EX002",
863 name: "code-eval-string",
864 category: MisuseCategory::Security,
865 severity: MisuseSeverity::High,
866 description: "Code.eval_string executes dynamic Elixir code and should be avoided",
867 correct_usage: "Use explicit dispatch or data parsing instead of dynamic evaluation",
868 pattern: r"\bCode\.eval_string\s*\(",
869 api_call: "Code.eval_string",
870 message: "Code.eval_string executes dynamic code and is a major security risk",
871 fix_suggestion: "Replace dynamic evaluation with explicit dispatch or parsing",
872 },
873 RegexRuleSpec {
874 id: "EX003",
875 name: "binary-to-term",
876 category: MisuseCategory::Security,
877 severity: MisuseSeverity::High,
878 description: ":erlang.binary_to_term on untrusted data is unsafe deserialization",
879 correct_usage: "Use safe formats like JSON or term_to_binary only for trusted data",
880 pattern: r":erlang\.binary_to_term\s*\(",
881 api_call: ":erlang.binary_to_term",
882 message: ":erlang.binary_to_term can deserialize unsafe terms from untrusted input",
883 fix_suggestion: "Use a safer serialization format for external input",
884 },
885 RegexRuleSpec {
886 id: "EX004",
887 name: "file-read-bang",
888 category: MisuseCategory::ErrorHandling,
889 severity: MisuseSeverity::Low,
890 description: "Bang file APIs raise instead of returning tagged tuples",
891 correct_usage: "Prefer File.read/1 with explicit {:ok, data} / {:error, reason} handling",
892 pattern: r"\bFile\.read!\s*\(",
893 api_call: "File.read!",
894 message: "File.read! raises on failure instead of returning a recoverable error",
895 fix_suggestion: "Use File.read/1 and handle the returned tuple explicitly",
896 },
897 RegexRuleSpec {
898 id: "EX005",
899 name: "task-await-infinity",
900 category: MisuseCategory::Concurrency,
901 severity: MisuseSeverity::Medium,
902 description: "Task.await with :infinity can stall callers indefinitely",
903 correct_usage: "Use bounded timeouts and supervised retry/cancellation behavior",
904 pattern: r"\bTask\.await\s*\([^,]+,\s*:infinity\s*\)",
905 api_call: "Task.await",
906 message: "Task.await(..., :infinity) can block forever",
907 fix_suggestion: "Use a bounded timeout and explicit failure handling",
908 },
909];
910
911const LUA_RULE_SPECS: &[RegexRuleSpec] = &[
912 RegexRuleSpec {
913 id: "LU001",
914 name: "implicit-global",
915 category: MisuseCategory::CallOrder,
916 severity: MisuseSeverity::Low,
917 description: "Assigning without local leaks mutable globals and creates hidden coupling",
918 correct_usage: "Declare locals explicitly with local name = ...",
919 pattern: r"^[A-Za-z_][A-Za-z0-9_]*\s*=",
920 api_call: "global assignment",
921 message: "Implicit global assignment leaks state outside local scope",
922 fix_suggestion: "Prefix the binding with local to keep scope explicit",
923 },
924 RegexRuleSpec {
925 id: "LU002",
926 name: "dynamic-load",
927 category: MisuseCategory::Security,
928 severity: MisuseSeverity::High,
929 description: "load/loadstring execute dynamic Lua code and should be avoided",
930 correct_usage: "Use structured parsing or explicit dispatch instead of dynamic evaluation",
931 pattern: r"\b(?:loadstring|load)\s*\(",
932 api_call: "load",
933 message: "Dynamic code loading executes attacker-controlled Lua if fed untrusted input",
934 fix_suggestion: "Replace dynamic evaluation with explicit dispatch or parsing",
935 },
936 RegexRuleSpec {
937 id: "LU003",
938 name: "os-execute",
939 category: MisuseCategory::Security,
940 severity: MisuseSeverity::High,
941 description: "os.execute shells out and is dangerous with dynamic input",
942 correct_usage: "Avoid shell execution or validate every command component",
943 pattern: r"\bos\.execute\s*\(",
944 api_call: "os.execute",
945 message: "os.execute can enable command injection with untrusted input",
946 fix_suggestion: "Avoid shelling out or strictly validate the command source",
947 },
948 RegexRuleSpec {
949 id: "LU004",
950 name: "io-popen",
951 category: MisuseCategory::Security,
952 severity: MisuseSeverity::High,
953 description: "io.popen launches shell commands and should be treated as high risk",
954 correct_usage: "Use safer process APIs or validate all command components",
955 pattern: r"\bio\.popen\s*\(",
956 api_call: "io.popen",
957 message: "io.popen can enable command injection with untrusted input",
958 fix_suggestion: "Avoid shell execution or validate every command component",
959 },
960 RegexRuleSpec {
961 id: "LU005",
962 name: "dofile-loadfile",
963 category: MisuseCategory::Security,
964 severity: MisuseSeverity::Medium,
965 description: "dofile/loadfile execute external files and are risky with user-controlled paths",
966 correct_usage: "Validate file origins strictly before executing them",
967 pattern: r"\b(?:dofile|loadfile)\s*\(",
968 api_call: "dofile",
969 message: "Executing external files is dangerous when the path is not fully trusted",
970 fix_suggestion: "Avoid dynamic file execution or tightly validate trusted origins",
971 },
972];
973
974const OCAML_RULE_SPECS: &[RegexRuleSpec] = &[
975 RegexRuleSpec {
976 id: "OC001",
977 name: "marshal-from-string",
978 category: MisuseCategory::Security,
979 severity: MisuseSeverity::High,
980 description: "Marshal.from_string on untrusted data is unsafe native deserialization",
981 correct_usage: "Use a safe, schema-checked serialization format",
982 pattern: r"\bMarshal\.from_string\b",
983 api_call: "Marshal.from_string",
984 message: "Marshal.from_string can deserialize unsafe values from untrusted input",
985 fix_suggestion: "Use a safer serialization format for external input",
986 },
987 RegexRuleSpec {
988 id: "OC002",
989 name: "marshal-from-channel",
990 category: MisuseCategory::Security,
991 severity: MisuseSeverity::High,
992 description: "Marshal.from_channel on untrusted data is unsafe native deserialization",
993 correct_usage: "Use a safe, schema-checked serialization format",
994 pattern: r"\bMarshal\.from_channel\b",
995 api_call: "Marshal.from_channel",
996 message: "Marshal.from_channel can deserialize unsafe values from untrusted input",
997 fix_suggestion: "Use a safer serialization format for external input",
998 },
999 RegexRuleSpec {
1000 id: "OC003",
1001 name: "sys-command",
1002 category: MisuseCategory::Security,
1003 severity: MisuseSeverity::High,
1004 description: "Sys.command executes a shell command and is dangerous with dynamic input",
1005 correct_usage: "Prefer direct library APIs or validate allowed commands strictly",
1006 pattern: r"\bSys\.command\b",
1007 api_call: "Sys.command",
1008 message: "Sys.command can enable command injection with untrusted input",
1009 fix_suggestion: "Avoid shell execution or tightly validate the command source",
1010 },
1011 RegexRuleSpec {
1012 id: "OC004",
1013 name: "obj-magic",
1014 category: MisuseCategory::ErrorHandling,
1015 severity: MisuseSeverity::High,
1016 description: "Obj.magic bypasses the type system and can produce memory-unsound behavior",
1017 correct_usage: "Use typed abstractions or explicit variant handling",
1018 pattern: r"\bObj\.magic\b",
1019 api_call: "Obj.magic",
1020 message: "Obj.magic bypasses type safety and can create undefined behavior",
1021 fix_suggestion: "Refactor to a typed abstraction instead of coercing with Obj.magic",
1022 },
1023 RegexRuleSpec {
1024 id: "OC005",
1025 name: "open-in-out",
1026 category: MisuseCategory::Resources,
1027 severity: MisuseSeverity::Low,
1028 description: "open_in/open_out require explicit close calls and are easy to leak",
1029 correct_usage: "Use In_channel.with_open_* or Out_channel.with_open_* helpers",
1030 pattern: r"\b(?:open_in|open_out)\b",
1031 api_call: "open_in",
1032 message: "open_in/open_out require explicit close handling and are easy to leak",
1033 fix_suggestion: "Use with_open_* helpers to scope the channel lifetime",
1034 },
1035];
1036
1037const ALL_API_LANGUAGES: &[ApiLanguage] = &[
1038 ApiLanguage::Python,
1039 ApiLanguage::Rust,
1040 ApiLanguage::Go,
1041 ApiLanguage::Java,
1042 ApiLanguage::JavaScript,
1043 ApiLanguage::TypeScript,
1044 ApiLanguage::C,
1045 ApiLanguage::Cpp,
1046 ApiLanguage::Ruby,
1047 ApiLanguage::Php,
1048 ApiLanguage::Kotlin,
1049 ApiLanguage::Swift,
1050 ApiLanguage::CSharp,
1051 ApiLanguage::Scala,
1052 ApiLanguage::Elixir,
1053 ApiLanguage::Lua,
1054 ApiLanguage::Luau,
1055 ApiLanguage::Ocaml,
1056];
1057
1058fn python_rules() -> Vec<APIRule> {
1064 vec![
1065 APIRule {
1066 id: "PY001".to_string(),
1067 name: "missing-timeout".to_string(),
1068 category: MisuseCategory::Parameters,
1069 severity: MisuseSeverity::High,
1070 description: "requests.get/post/etc without timeout parameter can hang indefinitely"
1071 .to_string(),
1072 correct_usage: "requests.get(url, timeout=30)".to_string(),
1073 },
1074 APIRule {
1075 id: "PY002".to_string(),
1076 name: "bare-except".to_string(),
1077 category: MisuseCategory::ErrorHandling,
1078 severity: MisuseSeverity::Medium,
1079 description: "Bare except clause catches all exceptions including KeyboardInterrupt"
1080 .to_string(),
1081 correct_usage: "except Exception as e:".to_string(),
1082 },
1083 APIRule {
1084 id: "PY003".to_string(),
1085 name: "weak-hash-md5".to_string(),
1086 category: MisuseCategory::Crypto,
1087 severity: MisuseSeverity::High,
1088 description: "MD5 is cryptographically broken, don't use for security purposes"
1089 .to_string(),
1090 correct_usage: "hashlib.sha256() or bcrypt for passwords".to_string(),
1091 },
1092 APIRule {
1093 id: "PY004".to_string(),
1094 name: "weak-hash-sha1".to_string(),
1095 category: MisuseCategory::Crypto,
1096 severity: MisuseSeverity::High,
1097 description: "SHA1 is cryptographically weak, don't use for security purposes"
1098 .to_string(),
1099 correct_usage: "hashlib.sha256() or stronger".to_string(),
1100 },
1101 APIRule {
1102 id: "PY005".to_string(),
1103 name: "unclosed-file".to_string(),
1104 category: MisuseCategory::Resources,
1105 severity: MisuseSeverity::Medium,
1106 description: "File opened without context manager may not be properly closed"
1107 .to_string(),
1108 correct_usage: "with open(path) as f:".to_string(),
1109 },
1110 APIRule {
1111 id: "PY006".to_string(),
1112 name: "insecure-random".to_string(),
1113 category: MisuseCategory::Security,
1114 severity: MisuseSeverity::High,
1115 description: "random module is not cryptographically secure".to_string(),
1116 correct_usage: "secrets.token_bytes() or secrets.token_hex()".to_string(),
1117 },
1118 ]
1119}
1120
1121fn rust_rules() -> Vec<APIRule> {
1123 vec![
1124 APIRule {
1125 id: "RS001".to_string(),
1126 name: "mutex-lock-unwrap".to_string(),
1127 category: MisuseCategory::Concurrency,
1128 severity: MisuseSeverity::Medium,
1129 description: "Mutex::lock().unwrap() can panic and amplify lock contention (CWE-833)"
1130 .to_string(),
1131 correct_usage:
1132 "Prefer try_lock()/error handling or explicit poison recovery instead of unwrap()"
1133 .to_string(),
1134 },
1135 APIRule {
1136 id: "RS002".to_string(),
1137 name: "file-open-without-context".to_string(),
1138 category: MisuseCategory::ErrorHandling,
1139 severity: MisuseSeverity::Low,
1140 description:
1141 "File::open without contextual error mapping makes failures hard to triage"
1142 .to_string(),
1143 correct_usage:
1144 "File::open(path).with_context(|| format!(\"opening {}\", path.display()))?"
1145 .to_string(),
1146 },
1147 APIRule {
1148 id: "RS003".to_string(),
1149 name: "unbounded-with-capacity".to_string(),
1150 category: MisuseCategory::Resources,
1151 severity: MisuseSeverity::High,
1152 description:
1153 "Vec::with_capacity fed from unbounded input can cause memory exhaustion (CWE-770)"
1154 .to_string(),
1155 correct_usage: "Clamp capacity input before allocation (e.g. min(user_len, MAX))"
1156 .to_string(),
1157 },
1158 APIRule {
1159 id: "RS004".to_string(),
1160 name: "detached-tokio-spawn".to_string(),
1161 category: MisuseCategory::Concurrency,
1162 severity: MisuseSeverity::Medium,
1163 description: "tokio::spawn without retaining JoinHandle risks silent task failures"
1164 .to_string(),
1165 correct_usage: "Store JoinHandle values and await/join them".to_string(),
1166 },
1167 APIRule {
1168 id: "RS005".to_string(),
1169 name: "hashmap-order-dependence".to_string(),
1170 category: MisuseCategory::CallOrder,
1171 severity: MisuseSeverity::Low,
1172 description:
1173 "HashMap iteration order is non-deterministic; relying on it can break logic"
1174 .to_string(),
1175 correct_usage:
1176 "Collect keys and sort them, or use BTreeMap/IndexMap when stable order is required"
1177 .to_string(),
1178 },
1179 APIRule {
1180 id: "RS006".to_string(),
1181 name: "clone-in-hot-loop".to_string(),
1182 category: MisuseCategory::Resources,
1183 severity: MisuseSeverity::Low,
1184 description: "clone() inside loop bodies can create avoidable allocation pressure"
1185 .to_string(),
1186 correct_usage: "Borrow or move values instead of cloning in tight loops".to_string(),
1187 },
1188 ]
1189}
1190
1191fn regex_rule_specs_for_language(language: ApiLanguage) -> &'static [RegexRuleSpec] {
1192 match language {
1193 ApiLanguage::Python | ApiLanguage::Rust => &[],
1194 ApiLanguage::Go => GO_RULE_SPECS,
1195 ApiLanguage::Java => JAVA_RULE_SPECS,
1196 ApiLanguage::JavaScript => JAVASCRIPT_RULE_SPECS,
1197 ApiLanguage::TypeScript => TYPESCRIPT_RULE_SPECS,
1198 ApiLanguage::C => C_RULE_SPECS,
1199 ApiLanguage::Cpp => CPP_RULE_SPECS,
1200 ApiLanguage::Ruby => RUBY_RULE_SPECS,
1201 ApiLanguage::Php => PHP_RULE_SPECS,
1202 ApiLanguage::Kotlin => KOTLIN_RULE_SPECS,
1203 ApiLanguage::Swift => SWIFT_RULE_SPECS,
1204 ApiLanguage::CSharp => CSHARP_RULE_SPECS,
1205 ApiLanguage::Scala => SCALA_RULE_SPECS,
1206 ApiLanguage::Elixir => ELIXIR_RULE_SPECS,
1207 ApiLanguage::Lua | ApiLanguage::Luau => LUA_RULE_SPECS,
1208 ApiLanguage::Ocaml => OCAML_RULE_SPECS,
1209 }
1210}
1211
1212fn all_api_languages() -> &'static [ApiLanguage] {
1213 ALL_API_LANGUAGES
1214}
1215
1216#[derive(Debug, Args)]
1233pub struct ApiCheckArgs {
1234 #[arg(value_name = "path")]
1236 pub path: PathBuf,
1237
1238 #[arg(long, value_delimiter = ',')]
1240 pub category: Option<Vec<MisuseCategory>>,
1241
1242 #[arg(long, value_delimiter = ',')]
1244 pub severity: Option<Vec<MisuseSeverity>>,
1245
1246 #[arg(long, short = 'O')]
1248 pub output: Option<PathBuf>,
1249}
1250
1251impl ApiCheckArgs {
1252 pub fn run(&self, format: crate::output::OutputFormat, quiet: bool) -> Result<()> {
1254 let writer = OutputWriter::new(format, quiet);
1255
1256 writer.progress(&format!(
1257 "Checking {} for API misuse patterns...",
1258 self.path.display()
1259 ));
1260
1261 if !self.path.exists() {
1263 return Err(RemainingError::file_not_found(&self.path).into());
1264 }
1265
1266 let all_rules_count = all_api_languages()
1267 .iter()
1268 .map(|language| rules_for_language(*language).len() as u32)
1269 .sum();
1270
1271 let files = collect_files(&self.path)?;
1273 writer.progress(&format!("Found {} files to analyze", files.len()));
1274
1275 let mut all_findings: Vec<MisuseFinding> = Vec::new();
1277 let mut files_scanned = 0u32;
1278
1279 for file_path in &files {
1280 let Some(language) = detect_language(file_path) else {
1281 continue;
1282 };
1283 let rules = rules_for_language(language);
1284 match analyze_file(file_path, &rules, language) {
1285 Ok(findings) => {
1286 all_findings.extend(findings);
1287 files_scanned += 1;
1288 }
1289 Err(e) => {
1290 writer.progress(&format!(
1291 "Warning: Failed to analyze {}: {}",
1292 file_path.display(),
1293 e
1294 ));
1295 }
1296 }
1297 }
1298
1299 let filtered_findings = filter_findings(
1301 all_findings,
1302 self.category.as_deref(),
1303 self.severity.as_deref(),
1304 );
1305
1306 let summary = build_summary(&filtered_findings, files_scanned);
1308
1309 let report = APICheckReport {
1311 findings: filtered_findings,
1312 summary,
1313 rules_applied: all_rules_count,
1314 };
1315
1316 if let Some(ref output_path) = self.output {
1318 if writer.is_text() {
1319 let text = format_api_check_text(&report);
1320 fs::write(output_path, text)?;
1321 } else {
1322 let json = serde_json::to_string_pretty(&report)?;
1323 fs::write(output_path, json)?;
1324 }
1325 } else if writer.is_text() {
1326 let text = format_api_check_text(&report);
1327 writer.write_text(&text)?;
1328 } else {
1329 writer.write(&report)?;
1330 }
1331
1332 Ok(())
1333 }
1334}
1335
1336fn collect_files(path: &Path) -> Result<Vec<PathBuf>> {
1342 let mut files = Vec::new();
1343
1344 if path.is_file() {
1345 if is_supported_file(path) {
1346 files.push(path.to_path_buf());
1347 }
1348 } else if path.is_dir() {
1349 for entry in WalkDir::new(path)
1350 .follow_links(false)
1351 .into_iter()
1352 .filter_map(|e| e.ok())
1353 {
1354 if files.len() >= MAX_DIRECTORY_FILES as usize {
1355 break;
1356 }
1357
1358 let entry_path = entry.path();
1359 if entry_path.is_file() && is_supported_file(entry_path) {
1360 if let Ok(metadata) = fs::metadata(entry_path) {
1362 if metadata.len() <= MAX_FILE_SIZE {
1363 files.push(entry_path.to_path_buf());
1364 }
1365 }
1366 }
1367 }
1368 }
1369
1370 Ok(files)
1371}
1372
1373fn is_supported_file(path: &Path) -> bool {
1375 detect_language(path).is_some()
1376}
1377
1378pub(crate) fn detect_language(path: &Path) -> Option<ApiLanguage> {
1379 match path.extension().and_then(|e| e.to_str()) {
1380 Some("py") => Some(ApiLanguage::Python),
1381 Some("rs") => Some(ApiLanguage::Rust),
1382 Some("go") => Some(ApiLanguage::Go),
1383 Some("java") => Some(ApiLanguage::Java),
1384 Some("js") | Some("jsx") | Some("mjs") | Some("cjs") => Some(ApiLanguage::JavaScript),
1385 Some("ts") | Some("tsx") => Some(ApiLanguage::TypeScript),
1386 Some("c") | Some("h") => Some(ApiLanguage::C),
1387 Some("cpp") | Some("hpp") | Some("cc") | Some("cxx") => Some(ApiLanguage::Cpp),
1388 Some("rb") => Some(ApiLanguage::Ruby),
1389 Some("php") => Some(ApiLanguage::Php),
1390 Some("kt") | Some("kts") => Some(ApiLanguage::Kotlin),
1391 Some("swift") => Some(ApiLanguage::Swift),
1392 Some("cs") => Some(ApiLanguage::CSharp),
1393 Some("scala") => Some(ApiLanguage::Scala),
1394 Some("ex") | Some("exs") => Some(ApiLanguage::Elixir),
1395 Some("lua") => Some(ApiLanguage::Lua),
1396 Some("luau") => Some(ApiLanguage::Luau),
1397 Some("ml") | Some("mli") => Some(ApiLanguage::Ocaml),
1398 _ => None,
1399 }
1400}
1401
1402pub(crate) fn rules_for_language(language: ApiLanguage) -> Vec<APIRule> {
1403 match language {
1404 ApiLanguage::Python => python_rules(),
1405 ApiLanguage::Rust => rust_rules(),
1406 _ => regex_rule_specs_for_language(language)
1407 .iter()
1408 .copied()
1409 .map(RegexRuleSpec::rule)
1410 .collect(),
1411 }
1412}
1413
1414pub(crate) fn analyze_file(
1420 path: &Path,
1421 rules: &[APIRule],
1422 language: ApiLanguage,
1423) -> Result<Vec<MisuseFinding>> {
1424 let content = fs::read_to_string(path)?;
1425 let file_str = path.display().to_string();
1426 let mut findings = Vec::new();
1427 let mut prev_trimmed = String::new();
1428 let file_has_hashmap = matches!(language, ApiLanguage::Rust) && content.contains("HashMap");
1429
1430 for (line_num, line) in content.lines().enumerate() {
1431 let line_number = (line_num + 1) as u32;
1432 let trimmed = line.trim();
1433 let rust_ctx = RustLineContext {
1434 file_has_hashmap,
1435 previous_line: prev_trimmed.as_str(),
1436 previous_is_loop: prev_trimmed.starts_with("for ")
1437 || prev_trimmed.starts_with("while "),
1438 };
1439
1440 for rule in rules {
1442 if let Some(finding) =
1443 check_rule(rule, &file_str, line_number, line, language, &rust_ctx)
1444 {
1445 findings.push(finding);
1446 }
1447 }
1448 prev_trimmed = trimmed.to_string();
1449 }
1450
1451 Ok(findings)
1452}
1453
1454struct RustLineContext<'a> {
1455 file_has_hashmap: bool,
1456 previous_line: &'a str,
1457 previous_is_loop: bool,
1458}
1459
1460fn check_rule(
1462 rule: &APIRule,
1463 file: &str,
1464 line: u32,
1465 line_text: &str,
1466 language: ApiLanguage,
1467 rust_ctx: &RustLineContext<'_>,
1468) -> Option<MisuseFinding> {
1469 let trimmed = line_text.trim();
1470
1471 if is_comment_line(trimmed, language) {
1473 return None;
1474 }
1475
1476 match rule.id.as_str() {
1477 "PY001" => check_missing_timeout(rule, file, line, trimmed),
1478 "PY002" => check_bare_except(rule, file, line, trimmed),
1479 "PY003" => check_md5_usage(rule, file, line, trimmed),
1480 "PY004" => check_sha1_usage(rule, file, line, trimmed),
1481 "PY005" => check_unclosed_file(rule, file, line, trimmed),
1482 "PY006" => check_insecure_random(rule, file, line, trimmed),
1483 "RS001" => check_mutex_lock_unwrap(rule, file, line, trimmed),
1484 "RS002" => check_file_open_without_context(rule, file, line, trimmed),
1485 "RS003" => check_unbounded_with_capacity(rule, file, line, trimmed),
1486 "RS004" => check_detached_tokio_spawn(rule, file, line, trimmed),
1487 "RS005" => check_hashmap_order_dependence(rule, file, line, trimmed, rust_ctx),
1488 "RS006" => check_clone_in_hot_loop(rule, file, line, trimmed, rust_ctx),
1489 _ => check_regex_rule(rule, file, line, trimmed, language),
1490 }
1491}
1492
1493fn is_comment_line(trimmed: &str, language: ApiLanguage) -> bool {
1494 match language {
1495 ApiLanguage::Python | ApiLanguage::Ruby | ApiLanguage::Elixir => trimmed.starts_with('#'),
1496 ApiLanguage::Rust
1497 | ApiLanguage::Go
1498 | ApiLanguage::Java
1499 | ApiLanguage::JavaScript
1500 | ApiLanguage::TypeScript
1501 | ApiLanguage::C
1502 | ApiLanguage::Cpp
1503 | ApiLanguage::Kotlin
1504 | ApiLanguage::Swift
1505 | ApiLanguage::CSharp
1506 | ApiLanguage::Scala => trimmed.starts_with("//"),
1507 ApiLanguage::Php => trimmed.starts_with("//") || trimmed.starts_with('#'),
1508 ApiLanguage::Lua | ApiLanguage::Luau => trimmed.starts_with("--"),
1509 ApiLanguage::Ocaml => trimmed.starts_with("(*"),
1510 }
1511}
1512
1513fn check_regex_rule(
1514 rule: &APIRule,
1515 file: &str,
1516 line: u32,
1517 line_text: &str,
1518 language: ApiLanguage,
1519) -> Option<MisuseFinding> {
1520 let spec = regex_rule_specs_for_language(language)
1521 .iter()
1522 .find(|spec| spec.id == rule.id)?;
1523 let regex = Regex::new(spec.pattern).ok()?;
1524 if !regex.is_match(line_text) {
1525 return None;
1526 }
1527
1528 let column = regex.find(line_text).map(|m| m.start()).unwrap_or(0) as u32;
1529 Some(MisuseFinding {
1530 file: file.to_string(),
1531 line,
1532 column,
1533 rule: rule.clone(),
1534 api_call: spec.api_call.to_string(),
1535 message: spec.message.to_string(),
1536 fix_suggestion: spec.fix_suggestion.to_string(),
1537 code_context: line_text.to_string(),
1538 })
1539}
1540
1541fn check_missing_timeout(
1543 rule: &APIRule,
1544 file: &str,
1545 line: u32,
1546 line_text: &str,
1547) -> Option<MisuseFinding> {
1548 let request_patterns = [
1550 "requests.get(",
1551 "requests.post(",
1552 "requests.put(",
1553 "requests.delete(",
1554 "requests.patch(",
1555 "requests.head(",
1556 "requests.options(",
1557 ];
1558
1559 for pattern in &request_patterns {
1560 if line_text.contains(pattern) && !line_text.contains("timeout") {
1561 let column = line_text.find(pattern).unwrap_or(0) as u32;
1562 return Some(MisuseFinding {
1563 file: file.to_string(),
1564 line,
1565 column,
1566 rule: rule.clone(),
1567 api_call: pattern.trim_end_matches('(').to_string(),
1568 message: format!(
1569 "{} called without timeout parameter",
1570 pattern.trim_end_matches('(')
1571 ),
1572 fix_suggestion: format!("Add timeout parameter: {}url, timeout=30)", pattern),
1573 code_context: line_text.to_string(),
1574 });
1575 }
1576 }
1577
1578 None
1579}
1580
1581fn check_bare_except(
1583 rule: &APIRule,
1584 file: &str,
1585 line: u32,
1586 line_text: &str,
1587) -> Option<MisuseFinding> {
1588 if line_text.starts_with("except:") || line_text.contains(" except:") {
1591 let column = line_text.find("except:").unwrap_or(0) as u32;
1592 return Some(MisuseFinding {
1593 file: file.to_string(),
1594 line,
1595 column,
1596 rule: rule.clone(),
1597 api_call: "except".to_string(),
1598 message: "Bare except clause catches all exceptions including KeyboardInterrupt and SystemExit".to_string(),
1599 fix_suggestion: "Use 'except Exception as e:' to catch only program exceptions".to_string(),
1600 code_context: line_text.to_string(),
1601 });
1602 }
1603
1604 None
1605}
1606
1607fn check_md5_usage(
1609 rule: &APIRule,
1610 file: &str,
1611 line: u32,
1612 line_text: &str,
1613) -> Option<MisuseFinding> {
1614 if line_text.contains("hashlib.md5") || line_text.contains("md5(") {
1616 let column = line_text
1617 .find("hashlib.md5")
1618 .or_else(|| line_text.find("md5("))
1619 .unwrap_or(0) as u32;
1620 return Some(MisuseFinding {
1621 file: file.to_string(),
1622 line,
1623 column,
1624 rule: rule.clone(),
1625 api_call: "hashlib.md5".to_string(),
1626 message: "MD5 is cryptographically broken and should not be used for security purposes"
1627 .to_string(),
1628 fix_suggestion: "Use hashlib.sha256() or stronger. For passwords, use bcrypt or argon2"
1629 .to_string(),
1630 code_context: line_text.to_string(),
1631 });
1632 }
1633
1634 None
1635}
1636
1637fn check_sha1_usage(
1639 rule: &APIRule,
1640 file: &str,
1641 line: u32,
1642 line_text: &str,
1643) -> Option<MisuseFinding> {
1644 if line_text.contains("hashlib.sha1") || line_text.contains("sha1(") {
1646 let column = line_text
1647 .find("hashlib.sha1")
1648 .or_else(|| line_text.find("sha1("))
1649 .unwrap_or(0) as u32;
1650 return Some(MisuseFinding {
1651 file: file.to_string(),
1652 line,
1653 column,
1654 rule: rule.clone(),
1655 api_call: "hashlib.sha1".to_string(),
1656 message: "SHA1 is cryptographically weak and should not be used for security purposes"
1657 .to_string(),
1658 fix_suggestion: "Use hashlib.sha256() or stronger".to_string(),
1659 code_context: line_text.to_string(),
1660 });
1661 }
1662
1663 None
1664}
1665
1666fn check_unclosed_file(
1668 rule: &APIRule,
1669 file: &str,
1670 line: u32,
1671 line_text: &str,
1672) -> Option<MisuseFinding> {
1673 if line_text.contains("open(")
1676 && !line_text.contains("with ")
1677 && !line_text.starts_with("with ")
1678 {
1679 if line_text.contains("= open(") || line_text.contains("=open(") {
1681 let column = line_text.find("open(").unwrap_or(0) as u32;
1682 return Some(MisuseFinding {
1683 file: file.to_string(),
1684 line,
1685 column,
1686 rule: rule.clone(),
1687 api_call: "open".to_string(),
1688 message: "File opened without context manager may not be properly closed"
1689 .to_string(),
1690 fix_suggestion: "Use 'with open(path) as f:' to ensure file is closed".to_string(),
1691 code_context: line_text.to_string(),
1692 });
1693 }
1694 }
1695
1696 None
1697}
1698
1699fn check_insecure_random(
1701 rule: &APIRule,
1702 file: &str,
1703 line: u32,
1704 line_text: &str,
1705) -> Option<MisuseFinding> {
1706 let insecure_patterns = [
1708 "random.randint(",
1709 "random.random(",
1710 "random.choice(",
1711 "random.randrange(",
1712 ];
1713
1714 let security_indicators = ["token", "secret", "password", "key", "auth", "session"];
1717
1718 for pattern in &insecure_patterns {
1719 if line_text.contains(pattern) {
1720 let line_lower = line_text.to_lowercase();
1722 for indicator in &security_indicators {
1723 if line_lower.contains(indicator) {
1724 let column = line_text.find(pattern).unwrap_or(0) as u32;
1725 return Some(MisuseFinding {
1726 file: file.to_string(),
1727 line,
1728 column,
1729 rule: rule.clone(),
1730 api_call: pattern.trim_end_matches('(').to_string(),
1731 message: format!(
1732 "{} is not cryptographically secure, don't use for security purposes",
1733 pattern.trim_end_matches('(')
1734 ),
1735 fix_suggestion:
1736 "Use secrets.token_bytes() or secrets.token_hex() for security"
1737 .to_string(),
1738 code_context: line_text.to_string(),
1739 });
1740 }
1741 }
1742 }
1743 }
1744
1745 None
1746}
1747
1748fn check_mutex_lock_unwrap(
1750 rule: &APIRule,
1751 file: &str,
1752 line: u32,
1753 line_text: &str,
1754) -> Option<MisuseFinding> {
1755 if line_text.contains(".lock().unwrap()") {
1756 let column = line_text.find(".lock().unwrap()").unwrap_or(0) as u32;
1757 return Some(MisuseFinding {
1758 file: file.to_string(),
1759 line,
1760 column,
1761 rule: rule.clone(),
1762 api_call: "Mutex::lock".to_string(),
1763 message:
1764 "Mutex::lock().unwrap() can panic on poisoned locks and hide deadlock behavior"
1765 .to_string(),
1766 fix_suggestion:
1767 "Handle lock errors explicitly (match/if let), or use try_lock with backoff"
1768 .to_string(),
1769 code_context: line_text.to_string(),
1770 });
1771 }
1772 None
1773}
1774
1775fn check_file_open_without_context(
1777 rule: &APIRule,
1778 file: &str,
1779 line: u32,
1780 line_text: &str,
1781) -> Option<MisuseFinding> {
1782 if line_text.contains("File::open(")
1783 && !line_text.contains(".context(")
1784 && !line_text.contains(".with_context(")
1785 && !line_text.contains("map_err(")
1786 {
1787 let column = line_text.find("File::open(").unwrap_or(0) as u32;
1788 return Some(MisuseFinding {
1789 file: file.to_string(),
1790 line,
1791 column,
1792 rule: rule.clone(),
1793 api_call: "File::open".to_string(),
1794 message: "File::open used without contextual error mapping".to_string(),
1795 fix_suggestion:
1796 "Wrap errors with context (with_context/context/map_err) before propagating"
1797 .to_string(),
1798 code_context: line_text.to_string(),
1799 });
1800 }
1801 None
1802}
1803
1804fn check_unbounded_with_capacity(
1806 rule: &APIRule,
1807 file: &str,
1808 line: u32,
1809 line_text: &str,
1810) -> Option<MisuseFinding> {
1811 if line_text.contains("Vec::with_capacity(") {
1812 let line_lower = line_text.to_lowercase();
1813 let user_input_markers = [
1814 "input", "args", "user", "request", "len", "size",
1815 ];
1816 if user_input_markers.iter().any(|m| line_lower.contains(m)) {
1817 let column = line_text.find("Vec::with_capacity(").unwrap_or(0) as u32;
1818 return Some(MisuseFinding {
1819 file: file.to_string(),
1820 line,
1821 column,
1822 rule: rule.clone(),
1823 api_call: "Vec::with_capacity".to_string(),
1824 message: "Vec::with_capacity appears to use unbounded external input".to_string(),
1825 fix_suggestion:
1826 "Clamp requested capacity with a hard upper bound before allocation".to_string(),
1827 code_context: line_text.to_string(),
1828 });
1829 }
1830 }
1831 None
1832}
1833
1834fn check_detached_tokio_spawn(
1836 rule: &APIRule,
1837 file: &str,
1838 line: u32,
1839 line_text: &str,
1840) -> Option<MisuseFinding> {
1841 if line_text.contains("tokio::spawn(")
1842 && !line_text.contains('=')
1843 && !line_text.contains("handles.push")
1844 {
1845 let column = line_text.find("tokio::spawn(").unwrap_or(0) as u32;
1846 return Some(MisuseFinding {
1847 file: file.to_string(),
1848 line,
1849 column,
1850 rule: rule.clone(),
1851 api_call: "tokio::spawn".to_string(),
1852 message: "tokio::spawn used without keeping JoinHandle".to_string(),
1853 fix_suggestion: "Store JoinHandle values and await them to surface task errors"
1854 .to_string(),
1855 code_context: line_text.to_string(),
1856 });
1857 }
1858 None
1859}
1860
1861fn check_hashmap_order_dependence(
1863 rule: &APIRule,
1864 file: &str,
1865 line: u32,
1866 line_text: &str,
1867 rust_ctx: &RustLineContext<'_>,
1868) -> Option<MisuseFinding> {
1869 let looks_like_hashmap_iteration = line_text.contains(".iter()")
1870 && (line_text.contains("for ") || rust_ctx.previous_line.starts_with("for "))
1871 && rust_ctx.file_has_hashmap;
1872 if looks_like_hashmap_iteration {
1873 let column = line_text.find(".iter()").unwrap_or(0) as u32;
1874 return Some(MisuseFinding {
1875 file: file.to_string(),
1876 line,
1877 column,
1878 rule: rule.clone(),
1879 api_call: "HashMap::iter".to_string(),
1880 message: "Potential logic dependence on HashMap iteration order".to_string(),
1881 fix_suggestion: "Use BTreeMap/IndexMap or sort keys before ordered operations"
1882 .to_string(),
1883 code_context: line_text.to_string(),
1884 });
1885 }
1886 None
1887}
1888
1889fn check_clone_in_hot_loop(
1891 rule: &APIRule,
1892 file: &str,
1893 line: u32,
1894 line_text: &str,
1895 rust_ctx: &RustLineContext<'_>,
1896) -> Option<MisuseFinding> {
1897 if line_text.contains(".clone()")
1898 && (line_text.contains("for ") || line_text.contains("while ") || rust_ctx.previous_is_loop)
1899 {
1900 let column = line_text.find(".clone()").unwrap_or(0) as u32;
1901 return Some(MisuseFinding {
1902 file: file.to_string(),
1903 line,
1904 column,
1905 rule: rule.clone(),
1906 api_call: "clone".to_string(),
1907 message: "clone() in loop context may create avoidable allocation overhead".to_string(),
1908 fix_suggestion: "Prefer borrowing/references or move semantics inside hot loops"
1909 .to_string(),
1910 code_context: line_text.to_string(),
1911 });
1912 }
1913 None
1914}
1915
1916fn filter_findings(
1922 findings: Vec<MisuseFinding>,
1923 categories: Option<&[MisuseCategory]>,
1924 severities: Option<&[MisuseSeverity]>,
1925) -> Vec<MisuseFinding> {
1926 findings
1927 .into_iter()
1928 .filter(|f| {
1929 if let Some(cats) = categories {
1931 if !cats.contains(&f.rule.category) {
1932 return false;
1933 }
1934 }
1935
1936 if let Some(sevs) = severities {
1938 if !sevs.contains(&f.rule.severity) {
1939 return false;
1940 }
1941 }
1942
1943 true
1944 })
1945 .collect()
1946}
1947
1948fn build_summary(findings: &[MisuseFinding], files_scanned: u32) -> APICheckSummary {
1954 let mut by_category: HashMap<String, u32> = HashMap::new();
1955 let mut by_severity: HashMap<String, u32> = HashMap::new();
1956 let mut apis_checked: Vec<String> = Vec::new();
1957
1958 for finding in findings {
1959 let cat_str = format!("{:?}", finding.rule.category).to_lowercase();
1961 *by_category.entry(cat_str).or_insert(0) += 1;
1962
1963 let sev_str = format!("{:?}", finding.rule.severity).to_lowercase();
1965 *by_severity.entry(sev_str).or_insert(0) += 1;
1966
1967 if !apis_checked.contains(&finding.api_call) {
1969 apis_checked.push(finding.api_call.clone());
1970 }
1971 }
1972
1973 APICheckSummary {
1974 total_findings: findings.len() as u32,
1975 by_category,
1976 by_severity,
1977 apis_checked,
1978 files_scanned,
1979 }
1980}
1981
1982fn format_api_check_text(report: &APICheckReport) -> String {
1988 let mut output = String::new();
1989
1990 output.push_str("=== API Check Report ===\n\n");
1991
1992 output.push_str(&format!(
1994 "Files scanned: {}\n",
1995 report.summary.files_scanned
1996 ));
1997 output.push_str(&format!("Rules applied: {}\n", report.rules_applied));
1998 output.push_str(&format!(
1999 "Total findings: {}\n\n",
2000 report.summary.total_findings
2001 ));
2002
2003 if !report.summary.by_severity.is_empty() {
2005 output.push_str("By Severity:\n");
2006 for (severity, count) in &report.summary.by_severity {
2007 output.push_str(&format!(" {}: {}\n", severity, count));
2008 }
2009 output.push('\n');
2010 }
2011
2012 if !report.summary.by_category.is_empty() {
2014 output.push_str("By Category:\n");
2015 for (category, count) in &report.summary.by_category {
2016 output.push_str(&format!(" {}: {}\n", category, count));
2017 }
2018 output.push('\n');
2019 }
2020
2021 if !report.findings.is_empty() {
2023 output.push_str("Findings:\n");
2024 output.push_str(&"-".repeat(60));
2025 output.push('\n');
2026
2027 for finding in &report.findings {
2028 output.push_str(&format!(
2029 "[{:?}] {} ({})\n",
2030 finding.rule.severity, finding.rule.name, finding.rule.id
2031 ));
2032 output.push_str(&format!(
2033 " Location: {}:{}:{}\n",
2034 finding.file, finding.line, finding.column
2035 ));
2036 output.push_str(&format!(" API: {}\n", finding.api_call));
2037 output.push_str(&format!(" Message: {}\n", finding.message));
2038 output.push_str(&format!(" Fix: {}\n", finding.fix_suggestion));
2039 if !finding.code_context.is_empty() {
2040 output.push_str(&format!(" Context: {}\n", finding.code_context.trim()));
2041 }
2042 output.push('\n');
2043 }
2044 } else {
2045 output.push_str("No API misuse patterns detected.\n");
2046 }
2047
2048 output
2049}
2050
2051#[cfg(test)]
2056mod tests {
2057 use super::*;
2058 use tempfile::TempDir;
2059
2060 #[test]
2061 fn test_python_rules_defined() {
2062 let rules = python_rules();
2063 assert!(!rules.is_empty());
2064 assert!(rules.iter().any(|r| r.id == "PY001")); assert!(rules.iter().any(|r| r.id == "PY002")); assert!(rules.iter().any(|r| r.id == "PY003")); assert!(rules.iter().any(|r| r.id == "PY005")); }
2069
2070 #[test]
2071 fn test_rust_rules_defined() {
2072 let rules = rust_rules();
2073 assert!(!rules.is_empty());
2074 assert!(rules.iter().any(|r| r.id == "RS001"));
2075 assert!(rules.iter().any(|r| r.id == "RS002"));
2076 assert!(rules.iter().any(|r| r.id == "RS003"));
2077 assert!(rules.iter().any(|r| r.id == "RS004"));
2078 assert!(rules.iter().any(|r| r.id == "RS005"));
2079 assert!(rules.iter().any(|r| r.id == "RS006"));
2080 }
2081
2082 #[test]
2083 fn test_all_supported_languages_have_rules() {
2084 for language in all_api_languages() {
2085 let rules = rules_for_language(*language);
2086 assert!(
2087 !rules.is_empty(),
2088 "expected at least one api-check rule for {:?}",
2089 language
2090 );
2091 }
2092 }
2093
2094 #[test]
2095 fn test_detect_language_extended_extensions() {
2096 let cases = [
2097 ("main.go", ApiLanguage::Go),
2098 ("Main.java", ApiLanguage::Java),
2099 ("app.js", ApiLanguage::JavaScript),
2100 ("component.tsx", ApiLanguage::TypeScript),
2101 ("main.c", ApiLanguage::C),
2102 ("main.cpp", ApiLanguage::Cpp),
2103 ("app.rb", ApiLanguage::Ruby),
2104 ("index.php", ApiLanguage::Php),
2105 ("Main.kt", ApiLanguage::Kotlin),
2106 ("main.swift", ApiLanguage::Swift),
2107 ("Program.cs", ApiLanguage::CSharp),
2108 ("Main.scala", ApiLanguage::Scala),
2109 ("app.ex", ApiLanguage::Elixir),
2110 ("main.lua", ApiLanguage::Lua),
2111 ("game.luau", ApiLanguage::Luau),
2112 ("main.ml", ApiLanguage::Ocaml),
2113 ];
2114
2115 for (path, expected) in cases {
2116 assert_eq!(detect_language(Path::new(path)), Some(expected), "{path}");
2117 }
2118 }
2119
2120 #[test]
2121 fn test_check_missing_timeout() {
2122 let rule = &python_rules()[0]; let finding = check_missing_timeout(rule, "test.py", 1, "response = requests.get(url)");
2126 assert!(finding.is_some());
2127
2128 let finding = check_missing_timeout(
2130 rule,
2131 "test.py",
2132 1,
2133 "response = requests.get(url, timeout=30)",
2134 );
2135 assert!(finding.is_none());
2136 }
2137
2138 #[test]
2139 fn test_check_bare_except() {
2140 let rule = &python_rules()[1]; let finding = check_bare_except(rule, "test.py", 1, "except:");
2144 assert!(finding.is_some());
2145
2146 let finding = check_bare_except(rule, "test.py", 1, "except Exception:");
2148 assert!(finding.is_none());
2149 }
2150
2151 #[test]
2152 fn test_check_md5_usage() {
2153 let rule = &python_rules()[2]; let finding = check_md5_usage(rule, "test.py", 1, "hash = hashlib.md5(data)");
2157 assert!(finding.is_some());
2158
2159 let finding = check_md5_usage(rule, "test.py", 1, "hash = hashlib.sha256(data)");
2161 assert!(finding.is_none());
2162 }
2163
2164 #[test]
2165 fn test_check_unclosed_file() {
2166 let rule = &python_rules()[4]; let finding = check_unclosed_file(rule, "test.py", 1, "f = open('data.txt')");
2170 assert!(finding.is_some());
2171
2172 let finding = check_unclosed_file(rule, "test.py", 1, "with open('data.txt') as f:");
2174 assert!(finding.is_none());
2175 }
2176
2177 #[test]
2178 fn test_filter_by_category() {
2179 let findings = vec![
2180 MisuseFinding {
2181 file: "test.py".to_string(),
2182 line: 1,
2183 column: 0,
2184 rule: APIRule {
2185 id: "PY001".to_string(),
2186 name: "test".to_string(),
2187 category: MisuseCategory::Parameters,
2188 severity: MisuseSeverity::High,
2189 description: "test".to_string(),
2190 correct_usage: "test".to_string(),
2191 },
2192 api_call: "test".to_string(),
2193 message: "test".to_string(),
2194 fix_suggestion: "test".to_string(),
2195 code_context: "test".to_string(),
2196 },
2197 MisuseFinding {
2198 file: "test.py".to_string(),
2199 line: 2,
2200 column: 0,
2201 rule: APIRule {
2202 id: "PY003".to_string(),
2203 name: "test".to_string(),
2204 category: MisuseCategory::Crypto,
2205 severity: MisuseSeverity::High,
2206 description: "test".to_string(),
2207 correct_usage: "test".to_string(),
2208 },
2209 api_call: "test".to_string(),
2210 message: "test".to_string(),
2211 fix_suggestion: "test".to_string(),
2212 code_context: "test".to_string(),
2213 },
2214 ];
2215
2216 let filtered = filter_findings(findings, Some(&[MisuseCategory::Crypto]), None);
2217 assert_eq!(filtered.len(), 1);
2218 assert_eq!(filtered[0].rule.category, MisuseCategory::Crypto);
2219 }
2220
2221 #[test]
2222 fn test_build_summary() {
2223 let findings = vec![MisuseFinding {
2224 file: "test.py".to_string(),
2225 line: 1,
2226 column: 0,
2227 rule: APIRule {
2228 id: "PY001".to_string(),
2229 name: "test".to_string(),
2230 category: MisuseCategory::Parameters,
2231 severity: MisuseSeverity::High,
2232 description: "test".to_string(),
2233 correct_usage: "test".to_string(),
2234 },
2235 api_call: "requests.get".to_string(),
2236 message: "test".to_string(),
2237 fix_suggestion: "test".to_string(),
2238 code_context: "test".to_string(),
2239 }];
2240
2241 let summary = build_summary(&findings, 5);
2242 assert_eq!(summary.total_findings, 1);
2243 assert_eq!(summary.files_scanned, 5);
2244 assert!(summary.apis_checked.contains(&"requests.get".to_string()));
2245 }
2246
2247 #[test]
2248 fn test_collect_files_includes_rust() {
2249 let temp = TempDir::new().unwrap();
2250 let py = temp.path().join("a.py");
2251 let rs = temp.path().join("b.rs");
2252 let go = temp.path().join("c.go");
2253 let txt = temp.path().join("c.txt");
2254 fs::write(&py, "print('ok')").unwrap();
2255 fs::write(&rs, "fn main() {}").unwrap();
2256 fs::write(&go, "package main").unwrap();
2257 fs::write(&txt, "ignore").unwrap();
2258
2259 let files = collect_files(temp.path()).unwrap();
2260 assert!(files.iter().any(|f| f.ends_with("a.py")));
2261 assert!(files.iter().any(|f| f.ends_with("b.rs")));
2262 assert!(files.iter().any(|f| f.ends_with("c.go")));
2263 assert!(!files.iter().any(|f| f.ends_with("c.txt")));
2264 }
2265
2266 #[test]
2267 fn test_check_mutex_lock_unwrap() {
2268 let rule = &rust_rules()[0];
2269 let finding =
2270 check_mutex_lock_unwrap(rule, "lib.rs", 10, "let guard = shared.lock().unwrap();");
2271 assert!(finding.is_some());
2272 }
2273
2274 #[test]
2275 fn test_check_file_open_without_context() {
2276 let rule = &rust_rules()[1];
2277 let finding = check_file_open_without_context(rule, "lib.rs", 8, "let f = File::open(p)?;");
2278 assert!(finding.is_some());
2279
2280 let contextual = check_file_open_without_context(
2281 rule,
2282 "lib.rs",
2283 9,
2284 "let f = File::open(p).with_context(|| \"open\".to_string())?;",
2285 );
2286 assert!(contextual.is_none());
2287 }
2288
2289 #[test]
2290 fn test_check_unbounded_with_capacity() {
2291 let rule = &rust_rules()[2];
2292 let finding =
2293 check_unbounded_with_capacity(rule, "lib.rs", 12, "let v = Vec::with_capacity(len);");
2294 assert!(finding.is_some());
2295
2296 let bounded = check_unbounded_with_capacity(
2297 rule,
2298 "lib.rs",
2299 13,
2300 "let v = Vec::with_capacity(256);",
2301 );
2302 assert!(bounded.is_none());
2303 }
2304
2305 #[test]
2306 fn test_check_tokio_spawn_detached() {
2307 let rule = &rust_rules()[3];
2308 let detached = check_detached_tokio_spawn(
2309 rule,
2310 "lib.rs",
2311 3,
2312 "tokio::spawn(async move { work().await; });",
2313 );
2314 let tracked = check_detached_tokio_spawn(
2315 rule,
2316 "lib.rs",
2317 4,
2318 "let handle = tokio::spawn(async move { work().await; });",
2319 );
2320 assert!(detached.is_some());
2321 assert!(tracked.is_none());
2322 }
2323
2324 #[test]
2325 fn test_check_hashmap_order_dependence() {
2326 let rule = &rust_rules()[4];
2327 let ctx = RustLineContext {
2328 file_has_hashmap: true,
2329 previous_line: "for (k, v) in map",
2330 previous_is_loop: true,
2331 };
2332 let finding = check_hashmap_order_dependence(rule, "lib.rs", 12, " .iter()", &ctx);
2333 assert!(finding.is_some());
2334 }
2335
2336 #[test]
2337 fn test_check_clone_in_hot_loop() {
2338 let rule = &rust_rules()[5];
2339 let ctx = RustLineContext {
2340 file_has_hashmap: false,
2341 previous_line: "for item in items {",
2342 previous_is_loop: true,
2343 };
2344 let finding = check_clone_in_hot_loop(rule, "lib.rs", 20, "value.clone()", &ctx);
2345 assert!(finding.is_some());
2346 }
2347
2348 fn assert_language_findings(
2349 filename: &str,
2350 language: ApiLanguage,
2351 source: &str,
2352 expected_rule_id: &str,
2353 ) {
2354 let temp = TempDir::new().unwrap();
2355 let path = temp.path().join(filename);
2356 fs::write(&path, source).unwrap();
2357 let rules = rules_for_language(language);
2358 let findings = analyze_file(&path, &rules, language).unwrap();
2359 assert!(
2360 findings.iter().any(|finding| finding.rule.id == expected_rule_id),
2361 "expected {expected_rule_id} for {filename}, got {:?}",
2362 findings.iter().map(|f| f.rule.id.clone()).collect::<Vec<_>>()
2363 );
2364 }
2365
2366 #[test]
2367 fn test_extended_language_rule_detection() {
2368 let cases = [
2369 ("main.go", ApiLanguage::Go, "data, _ := ioutil.ReadFile(path)", "GO001"),
2370 (
2371 "Main.java",
2372 ApiLanguage::Java,
2373 "if (name == otherName) { }",
2374 "JV001",
2375 ),
2376 ("app.js", ApiLanguage::JavaScript, "if (a == b) {}", "JS001"),
2377 ("app.ts", ApiLanguage::TypeScript, "if (a == b) {}", "TS001"),
2378 ("main.c", ApiLanguage::C, "gets(buffer);", "C001"),
2379 ("main.cpp", ApiLanguage::Cpp, "std::auto_ptr<Foo> p;", "CPP003"),
2380 ("app.rb", ApiLanguage::Ruby, "eval(params[:code])", "RB001"),
2381 ("index.php", ApiLanguage::Php, "unserialize($payload);", "PH005"),
2382 ("Main.kt", ApiLanguage::Kotlin, "val name = user!!", "KT001"),
2383 ("main.swift", ApiLanguage::Swift, "let name = value!", "SW003"),
2384 ("Program.cs", ApiLanguage::CSharp, "var x = task.Result;", "CS003"),
2385 ("Main.scala", ApiLanguage::Scala, "val casted = value.asInstanceOf[String]", "SC002"),
2386 ("app.ex", ApiLanguage::Elixir, "String.to_atom(param)", "EX001"),
2387 ("main.lua", ApiLanguage::Lua, "value = 1", "LU001"),
2388 ("game.luau", ApiLanguage::Luau, "os.execute(cmd)", "LU003"),
2389 ("main.ml", ApiLanguage::Ocaml, "Obj.magic value", "OC004"),
2390 ];
2391
2392 for (filename, language, source, expected_rule_id) in cases {
2393 assert_language_findings(filename, language, source, expected_rule_id);
2394 }
2395 }
2396}