Skip to main content

tldr_cli/commands/remaining/
api_check.rs

1//! API Check command - Detect API misuse patterns
2//!
3//! Analyzes Python code for common API misuse patterns:
4//! - Timeout issues (requests.get without timeout)
5//! - Bare except clauses (catching all exceptions)
6//! - Weak crypto (MD5, SHA1 for security purposes)
7//! - Unclosed resources (files not using context managers)
8//!
9//! # Example
10//!
11//! ```bash
12//! tldr api-check src/
13//! tldr api-check src/main.py --category crypto
14//! tldr api-check src/ --severity high --format text
15//! ```
16
17use std::collections::HashMap;
18use std::fs;
19use std::path::{Path, PathBuf};
20
21use anyhow::Result;
22use clap::Args;
23use regex::Regex;
24use tldr_core::walker::walk_project;
25
26use super::error::RemainingError;
27use super::types::{
28    APICheckReport, APICheckSummary, APIRule, MisuseCategory, MisuseFinding, MisuseSeverity,
29};
30
31use crate::output::OutputWriter;
32
33// =============================================================================
34// Constants
35// =============================================================================
36
37/// Maximum files to analyze in a directory
38const MAX_DIRECTORY_FILES: u32 = 1000;
39
40/// Maximum file size to analyze (10 MB)
41const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub(crate) enum ApiLanguage {
45    Python,
46    Rust,
47    Go,
48    Java,
49    JavaScript,
50    TypeScript,
51    C,
52    Cpp,
53    Ruby,
54    Php,
55    Kotlin,
56    Swift,
57    CSharp,
58    Scala,
59    Elixir,
60    Lua,
61    Luau,
62    Ocaml,
63}
64
65#[derive(Clone, Copy)]
66struct RegexRuleSpec {
67    id: &'static str,
68    name: &'static str,
69    category: MisuseCategory,
70    severity: MisuseSeverity,
71    description: &'static str,
72    correct_usage: &'static str,
73    pattern: &'static str,
74    api_call: &'static str,
75    message: &'static str,
76    fix_suggestion: &'static str,
77}
78
79impl RegexRuleSpec {
80    fn rule(self) -> APIRule {
81        APIRule {
82            id: self.id.to_string(),
83            name: self.name.to_string(),
84            category: self.category,
85            severity: self.severity,
86            description: self.description.to_string(),
87            correct_usage: self.correct_usage.to_string(),
88        }
89    }
90}
91
92const GO_RULE_SPECS: &[RegexRuleSpec] = &[
93    RegexRuleSpec {
94        id: "GO001",
95        name: "deprecated-ioutil-readfile",
96        category: MisuseCategory::Resources,
97        severity: MisuseSeverity::Low,
98        description: "ioutil.ReadFile is deprecated and encourages unbounded whole-file reads",
99        correct_usage: "Use os.ReadFile or stream with bufio.Scanner/Reader",
100        pattern: r"\bioutil\.ReadFile\s*\(",
101        api_call: "ioutil.ReadFile",
102        message: "ioutil.ReadFile is deprecated and can load unbounded content into memory",
103        fix_suggestion: "Use os.ReadFile for simple reads or bufio.Reader for bounded streaming",
104    },
105    RegexRuleSpec {
106        id: "GO002",
107        name: "http-get-without-timeout",
108        category: MisuseCategory::Parameters,
109        severity: MisuseSeverity::Medium,
110        description: "http.Get uses the default client and provides no call-specific timeout",
111        correct_usage: "Use an http.Client with Timeout or context-aware requests",
112        pattern: r"\bhttp\.Get\s*\(",
113        api_call: "http.Get",
114        message: "http.Get without an explicit timeout can hang indefinitely",
115        fix_suggestion: "Use an http.Client{Timeout: ...} or NewRequestWithContext",
116    },
117    RegexRuleSpec {
118        id: "GO003",
119        name: "exec-command",
120        category: MisuseCategory::Security,
121        severity: MisuseSeverity::High,
122        description: "exec.Command is risky when arguments or executable names come from input",
123        correct_usage: "Prefer direct library APIs or strictly validate allowed commands",
124        pattern: r"\bexec\.Command\s*\(",
125        api_call: "exec.Command",
126        message: "exec.Command can enable command injection when fed user-controlled values",
127        fix_suggestion: "Validate commands against an allowlist and avoid shell-like execution",
128    },
129    RegexRuleSpec {
130        id: "GO004",
131        name: "template-html-cast",
132        category: MisuseCategory::Security,
133        severity: MisuseSeverity::High,
134        description: "template.HTML bypasses html/template escaping guarantees",
135        correct_usage: "Pass plain strings to templates and let html/template escape them",
136        pattern: r"\btemplate\.HTML\s*\(",
137        api_call: "template.HTML",
138        message: "template.HTML disables escaping and can introduce XSS",
139        fix_suggestion: "Remove the cast and rely on html/template auto-escaping",
140    },
141    RegexRuleSpec {
142        id: "GO005",
143        name: "sql-query-without-context",
144        category: MisuseCategory::CallOrder,
145        severity: MisuseSeverity::Medium,
146        description:
147            "sql.DB.Query lacks cancellation and timeout propagation compared with QueryContext",
148        correct_usage: "Use db.QueryContext(ctx, query, args...)",
149        pattern: r"\bsql\.Query\s*\(",
150        api_call: "sql.Query",
151        message: "sql.Query omits context-driven cancellation and timeout handling",
152        fix_suggestion: "Use QueryContext/ExecContext with a bounded context",
153    },
154];
155
156const JAVA_RULE_SPECS: &[RegexRuleSpec] = &[
157    RegexRuleSpec {
158        id: "JV001",
159        name: "string-comparison-with-double-equals",
160        category: MisuseCategory::CallOrder,
161        severity: MisuseSeverity::Medium,
162        description: "Using == on strings compares references instead of values",
163        correct_usage: "Use value.equals(other) or Objects.equals(a, b)",
164        pattern: r#"(?:".*"|\b\w+\b)\s*==\s*(?:".*"|\b\w+\b)"#,
165        api_call: "==",
166        message: "String comparison with == checks reference identity, not value equality",
167        fix_suggestion: "Use .equals(...) or Objects.equals(...) for string values",
168    },
169    RegexRuleSpec {
170        id: "JV002",
171        name: "runtime-exec",
172        category: MisuseCategory::Security,
173        severity: MisuseSeverity::High,
174        description: "Runtime.exec is dangerous with dynamic input and hard to sandbox correctly",
175        correct_usage: "Use structured APIs or a ProcessBuilder with validated arguments",
176        pattern: r"\bRuntime\.getRuntime\(\)\.exec\s*\(",
177        api_call: "Runtime.exec",
178        message: "Runtime.exec is a common command injection footgun",
179        fix_suggestion: "Prefer library APIs or tightly validated ProcessBuilder arguments",
180    },
181    RegexRuleSpec {
182        id: "JV003",
183        name: "objectinputstream-deserialization",
184        category: MisuseCategory::Security,
185        severity: MisuseSeverity::High,
186        description:
187            "ObjectInputStream on untrusted data can trigger unsafe deserialization gadgets",
188        correct_usage: "Use safer formats like JSON with explicit schemas",
189        pattern: r"\bnew\s+ObjectInputStream\s*\(",
190        api_call: "ObjectInputStream",
191        message: "ObjectInputStream enables unsafe native Java deserialization",
192        fix_suggestion: "Replace native object deserialization with a schema-driven format",
193    },
194    RegexRuleSpec {
195        id: "JV004",
196        name: "create-statement",
197        category: MisuseCategory::Security,
198        severity: MisuseSeverity::Medium,
199        description:
200            "createStatement often leads to string-built SQL instead of prepared statements",
201        correct_usage: "Use prepareStatement with placeholders",
202        pattern: r"\bcreateStatement\s*\(",
203        api_call: "createStatement",
204        message: "createStatement encourages dynamic SQL and weak parameter handling",
205        fix_suggestion: "Use prepareStatement with bound parameters",
206    },
207    RegexRuleSpec {
208        id: "JV005",
209        name: "system-gc-call",
210        category: MisuseCategory::Resources,
211        severity: MisuseSeverity::Low,
212        description: "System.gc() is usually a performance smell and not a reliable memory fix",
213        correct_usage: "Remove manual GC triggers and profile allocations instead",
214        pattern: r"\bSystem\.gc\s*\(",
215        api_call: "System.gc",
216        message: "System.gc() is an unreliable manual GC hint and often harms latency",
217        fix_suggestion: "Remove the call and fix the underlying allocation or lifetime issue",
218    },
219];
220
221const JAVASCRIPT_RULE_SPECS: &[RegexRuleSpec] = &[
222    RegexRuleSpec {
223        id: "JS001",
224        name: "loose-equality",
225        category: MisuseCategory::CallOrder,
226        severity: MisuseSeverity::Medium,
227        description: "Loose equality allows coercions that frequently hide correctness bugs",
228        correct_usage: "Use === / !== except in deliberately reviewed coercion cases",
229        pattern: r"\s==\s|\s!=\s",
230        api_call: "==",
231        message: "Loose equality can coerce values unexpectedly",
232        fix_suggestion: "Use === or !== and handle explicit type conversion",
233    },
234    RegexRuleSpec {
235        id: "JS002",
236        name: "parseint-without-radix",
237        category: MisuseCategory::Parameters,
238        severity: MisuseSeverity::Low,
239        description: "parseInt without a radix is ambiguous and less explicit than required",
240        correct_usage: "Use parseInt(value, 10)",
241        pattern: r"\bparseInt\s*\(\s*[^,\)]+\)",
242        api_call: "parseInt",
243        message: "parseInt called without an explicit radix",
244        fix_suggestion: "Pass a radix explicitly, usually parseInt(value, 10)",
245    },
246    RegexRuleSpec {
247        id: "JS003",
248        name: "json-parse-without-guard",
249        category: MisuseCategory::ErrorHandling,
250        severity: MisuseSeverity::Low,
251        description: "JSON.parse throws on malformed input and should usually be guarded",
252        correct_usage: "Wrap JSON.parse in try/catch when input is not fully trusted",
253        pattern: r"\bJSON\.parse\s*\(",
254        api_call: "JSON.parse",
255        message: "JSON.parse can throw and should be guarded for untrusted input",
256        fix_suggestion: "Use try/catch or validated parsing for untrusted payloads",
257    },
258    RegexRuleSpec {
259        id: "JS004",
260        name: "document-write",
261        category: MisuseCategory::Security,
262        severity: MisuseSeverity::High,
263        description: "document.write is legacy, brittle, and can inject unsanitized HTML",
264        correct_usage: "Use DOM APIs like textContent/appendChild instead",
265        pattern: r"\bdocument\.write(?:ln)?\s*\(",
266        api_call: "document.write",
267        message: "document.write is unsafe and can enable XSS",
268        fix_suggestion: "Use safe DOM APIs instead of writing raw HTML strings",
269    },
270    RegexRuleSpec {
271        id: "JS005",
272        name: "eval-call",
273        category: MisuseCategory::Security,
274        severity: MisuseSeverity::High,
275        description: "eval executes dynamic code and should be avoided",
276        correct_usage: "Use structured data parsing or explicit dispatch tables",
277        pattern: r"\beval\s*\(",
278        api_call: "eval",
279        message: "eval executes dynamic code and creates major security risk",
280        fix_suggestion: "Replace eval with data parsing or explicit function dispatch",
281    },
282];
283
284const TYPESCRIPT_RULE_SPECS: &[RegexRuleSpec] = &[
285    RegexRuleSpec {
286        id: "TS001",
287        name: "loose-equality",
288        category: MisuseCategory::CallOrder,
289        severity: MisuseSeverity::Medium,
290        description: "Loose equality allows coercions that frequently hide correctness bugs",
291        correct_usage: "Use === / !== except in deliberately reviewed coercion cases",
292        pattern: r"\s==\s|\s!=\s",
293        api_call: "==",
294        message: "Loose equality can coerce values unexpectedly",
295        fix_suggestion: "Use === or !== and handle explicit type conversion",
296    },
297    RegexRuleSpec {
298        id: "TS002",
299        name: "parseint-without-radix",
300        category: MisuseCategory::Parameters,
301        severity: MisuseSeverity::Low,
302        description: "parseInt without a radix is ambiguous and less explicit than required",
303        correct_usage: "Use parseInt(value, 10)",
304        pattern: r"\bparseInt\s*\(\s*[^,\)]+\)",
305        api_call: "parseInt",
306        message: "parseInt called without an explicit radix",
307        fix_suggestion: "Pass a radix explicitly, usually parseInt(value, 10)",
308    },
309    RegexRuleSpec {
310        id: "TS003",
311        name: "json-parse-without-guard",
312        category: MisuseCategory::ErrorHandling,
313        severity: MisuseSeverity::Low,
314        description: "JSON.parse throws on malformed input and should usually be guarded",
315        correct_usage: "Wrap JSON.parse in try/catch when input is not fully trusted",
316        pattern: r"\bJSON\.parse\s*\(",
317        api_call: "JSON.parse",
318        message: "JSON.parse can throw and should be guarded for untrusted input",
319        fix_suggestion: "Use try/catch or validated parsing for untrusted payloads",
320    },
321    RegexRuleSpec {
322        id: "TS004",
323        name: "document-write",
324        category: MisuseCategory::Security,
325        severity: MisuseSeverity::High,
326        description: "document.write is legacy, brittle, and can inject unsanitized HTML",
327        correct_usage: "Use DOM APIs like textContent/appendChild instead",
328        pattern: r"\bdocument\.write(?:ln)?\s*\(",
329        api_call: "document.write",
330        message: "document.write is unsafe and can enable XSS",
331        fix_suggestion: "Use safe DOM APIs instead of writing raw HTML strings",
332    },
333    RegexRuleSpec {
334        id: "TS005",
335        name: "eval-call",
336        category: MisuseCategory::Security,
337        severity: MisuseSeverity::High,
338        description: "eval executes dynamic code and should be avoided",
339        correct_usage: "Use structured data parsing or explicit dispatch tables",
340        pattern: r"\beval\s*\(",
341        api_call: "eval",
342        message: "eval executes dynamic code and creates major security risk",
343        fix_suggestion: "Replace eval with data parsing or explicit function dispatch",
344    },
345];
346
347const C_RULE_SPECS: &[RegexRuleSpec] = &[
348    RegexRuleSpec {
349        id: "C001",
350        name: "gets-call",
351        category: MisuseCategory::Security,
352        severity: MisuseSeverity::High,
353        description: "gets cannot bound input and has been removed from the standard library",
354        correct_usage: "Use fgets with an explicit buffer length",
355        pattern: r"\bgets\s*\(",
356        api_call: "gets",
357        message: "gets is inherently unsafe and enables buffer overflows",
358        fix_suggestion: "Use fgets(buffer, size, stdin) or another bounded API",
359    },
360    RegexRuleSpec {
361        id: "C002",
362        name: "strcpy-call",
363        category: MisuseCategory::Security,
364        severity: MisuseSeverity::High,
365        description: "strcpy performs unbounded copies and easily overflows buffers",
366        correct_usage: "Use snprintf, strlcpy, or explicit bounds checks",
367        pattern: r"\bstrcpy\s*\(",
368        api_call: "strcpy",
369        message: "strcpy performs an unbounded copy",
370        fix_suggestion: "Replace strcpy with a bounded copy strategy",
371    },
372    RegexRuleSpec {
373        id: "C003",
374        name: "sprintf-call",
375        category: MisuseCategory::Security,
376        severity: MisuseSeverity::High,
377        description: "sprintf writes formatted data without a size bound",
378        correct_usage: "Use snprintf with the destination buffer size",
379        pattern: r"\bsprintf\s*\(",
380        api_call: "sprintf",
381        message: "sprintf can overflow fixed-size buffers",
382        fix_suggestion: "Use snprintf(buffer, size, ...) instead",
383    },
384    RegexRuleSpec {
385        id: "C004",
386        name: "scanf-string-without-width",
387        category: MisuseCategory::Security,
388        severity: MisuseSeverity::High,
389        description: "scanf with %s and no width limit can overflow the destination buffer",
390        correct_usage: "Provide a width specifier or use fgets",
391        pattern: r#"\bscanf\s*\(\s*"%s"#,
392        api_call: "scanf",
393        message: "scanf(\"%s\") reads unbounded input into a buffer",
394        fix_suggestion: "Add a width limit or use fgets plus parsing",
395    },
396    RegexRuleSpec {
397        id: "C005",
398        name: "system-call",
399        category: MisuseCategory::Security,
400        severity: MisuseSeverity::High,
401        description: "system executes a shell command and is dangerous with dynamic input",
402        correct_usage: "Use execve-family APIs with validated arguments where possible",
403        pattern: r"\bsystem\s*\(",
404        api_call: "system",
405        message: "system executes a shell and is a common command injection vector",
406        fix_suggestion: "Avoid shell execution or tightly validate the command source",
407    },
408];
409
410const CPP_RULE_SPECS: &[RegexRuleSpec] = &[
411    RegexRuleSpec {
412        id: "CPP001",
413        name: "strcpy-call",
414        category: MisuseCategory::Security,
415        severity: MisuseSeverity::High,
416        description: "strcpy performs unbounded copies and easily overflows buffers",
417        correct_usage: "Use std::string, snprintf, or another bounded copy strategy",
418        pattern: r"\bstrcpy\s*\(",
419        api_call: "strcpy",
420        message: "strcpy performs an unbounded copy",
421        fix_suggestion: "Use std::string or a bounded copy API instead",
422    },
423    RegexRuleSpec {
424        id: "CPP002",
425        name: "sprintf-call",
426        category: MisuseCategory::Security,
427        severity: MisuseSeverity::High,
428        description: "sprintf writes formatted data without a size bound",
429        correct_usage: "Use snprintf or std::format into a bounded container",
430        pattern: r"\bsprintf\s*\(",
431        api_call: "sprintf",
432        message: "sprintf can overflow fixed-size buffers",
433        fix_suggestion: "Use snprintf or a safer formatting abstraction",
434    },
435    RegexRuleSpec {
436        id: "CPP003",
437        name: "auto-ptr",
438        category: MisuseCategory::Resources,
439        severity: MisuseSeverity::Medium,
440        description: "std::auto_ptr is obsolete and has broken transfer semantics",
441        correct_usage: "Use std::unique_ptr or std::shared_ptr",
442        pattern: r"\bstd::auto_ptr\s*<",
443        api_call: "std::auto_ptr",
444        message: "std::auto_ptr is obsolete and unsafe by modern ownership standards",
445        fix_suggestion: "Replace std::auto_ptr with std::unique_ptr or std::shared_ptr",
446    },
447    RegexRuleSpec {
448        id: "CPP004",
449        name: "raw-new",
450        category: MisuseCategory::Resources,
451        severity: MisuseSeverity::Medium,
452        description: "Raw new often leads to leaks and exception-safety issues",
453        correct_usage: "Use std::make_unique or stack allocation where possible",
454        pattern: r"\bnew\s+\w",
455        api_call: "new",
456        message: "Raw new makes ownership and exception safety harder to reason about",
457        fix_suggestion: "Use std::make_unique, containers, or stack allocation",
458    },
459    RegexRuleSpec {
460        id: "CPP005",
461        name: "system-call",
462        category: MisuseCategory::Security,
463        severity: MisuseSeverity::High,
464        description: "system executes a shell command and is dangerous with dynamic input",
465        correct_usage: "Use direct process APIs with validated arguments when possible",
466        pattern: r"(?:\bstd::)?system\s*\(",
467        api_call: "system",
468        message: "system executes a shell and is a common command injection vector",
469        fix_suggestion: "Avoid shell execution or tightly validate all command components",
470    },
471];
472
473const RUBY_RULE_SPECS: &[RegexRuleSpec] = &[
474    RegexRuleSpec {
475        id: "RB001",
476        name: "eval-call",
477        category: MisuseCategory::Security,
478        severity: MisuseSeverity::High,
479        description: "eval executes dynamic Ruby code and should be avoided",
480        correct_usage: "Use explicit dispatch or data parsing instead of dynamic code execution",
481        pattern: r"\beval\s*\(",
482        api_call: "eval",
483        message: "eval executes dynamic code and creates major security risk",
484        fix_suggestion: "Replace eval with explicit dispatch or structured parsing",
485    },
486    RegexRuleSpec {
487        id: "RB002",
488        name: "dynamic-send",
489        category: MisuseCategory::Security,
490        severity: MisuseSeverity::Medium,
491        description: "send can invoke arbitrary methods when fed untrusted method names",
492        correct_usage: "Use public_send on a strict allowlist of method names",
493        pattern: r"\.send\s*\(",
494        api_call: "send",
495        message: "send can dispatch to unsafe or unexpected methods",
496        fix_suggestion: "Use public_send with a reviewed allowlist",
497    },
498    RegexRuleSpec {
499        id: "RB003",
500        name: "system-call",
501        category: MisuseCategory::Security,
502        severity: MisuseSeverity::High,
503        description: "system executes a shell command and is dangerous with interpolated input",
504        correct_usage: "Use array-form process APIs with validated arguments",
505        pattern: r"\bsystem\s*\(",
506        api_call: "system",
507        message: "system is a common command injection footgun",
508        fix_suggestion: "Avoid shell execution or pass validated argv-style arguments",
509    },
510    RegexRuleSpec {
511        id: "RB004",
512        name: "yaml-load",
513        category: MisuseCategory::Security,
514        severity: MisuseSeverity::High,
515        description: "YAML.load can instantiate arbitrary objects from untrusted input",
516        correct_usage: "Use YAML.safe_load with permitted classes",
517        pattern: r"\bYAML\.load\s*\(",
518        api_call: "YAML.load",
519        message: "YAML.load can deserialize unsafe objects",
520        fix_suggestion: "Use YAML.safe_load and restrict allowed classes",
521    },
522    RegexRuleSpec {
523        id: "RB005",
524        name: "marshal-load",
525        category: MisuseCategory::Security,
526        severity: MisuseSeverity::High,
527        description: "Marshal.load on untrusted data is unsafe deserialization",
528        correct_usage: "Use JSON or another safe, schema-checked format",
529        pattern: r"\bMarshal\.load\s*\(",
530        api_call: "Marshal.load",
531        message: "Marshal.load performs unsafe native deserialization",
532        fix_suggestion: "Replace Marshal.load with a safer serialization format",
533    },
534];
535
536const PHP_RULE_SPECS: &[RegexRuleSpec] = &[
537    RegexRuleSpec {
538        id: "PH001",
539        name: "deprecated-mysql-functions",
540        category: MisuseCategory::Security,
541        severity: MisuseSeverity::High,
542        description: "mysql_* APIs are removed and encourage unsafe query construction",
543        correct_usage: "Use PDO or mysqli with prepared statements",
544        pattern: r"\bmysql_[a-z_]+\s*\(",
545        api_call: "mysql_*",
546        message: "mysql_* functions are removed and unsafe by modern standards",
547        fix_suggestion: "Migrate to PDO or mysqli prepared statements",
548    },
549    RegexRuleSpec {
550        id: "PH002",
551        name: "extract-call",
552        category: MisuseCategory::Security,
553        severity: MisuseSeverity::Medium,
554        description: "extract pollutes local scope and can overwrite important variables",
555        correct_usage: "Read array keys explicitly instead of splatting them into scope",
556        pattern: r"\bextract\s*\(",
557        api_call: "extract",
558        message: "extract can overwrite local variables and hide data flow",
559        fix_suggestion: "Assign required keys explicitly instead of using extract",
560    },
561    RegexRuleSpec {
562        id: "PH003",
563        name: "eval-call",
564        category: MisuseCategory::Security,
565        severity: MisuseSeverity::High,
566        description: "eval executes dynamic PHP code and should be avoided",
567        correct_usage: "Use explicit dispatch or data parsing instead of dynamic code execution",
568        pattern: r"\beval\s*\(",
569        api_call: "eval",
570        message: "eval executes dynamic code and creates major security risk",
571        fix_suggestion: "Replace eval with explicit dispatch or structured parsing",
572    },
573    RegexRuleSpec {
574        id: "PH004",
575        name: "variable-variables",
576        category: MisuseCategory::Security,
577        severity: MisuseSeverity::Medium,
578        description: "Variable variables make scope mutation hard to reason about",
579        correct_usage: "Use associative arrays or explicit variables instead",
580        pattern: r"\$\$[A-Za-z_]",
581        api_call: "$$",
582        message: "Variable variables obscure data flow and can enable unsafe access patterns",
583        fix_suggestion: "Use an array/map or explicit variable names instead",
584    },
585    RegexRuleSpec {
586        id: "PH005",
587        name: "unserialize-call",
588        category: MisuseCategory::Security,
589        severity: MisuseSeverity::High,
590        description: "unserialize on untrusted data can trigger object injection chains",
591        correct_usage: "Use json_decode or a safer schema-checked format",
592        pattern: r"\bunserialize\s*\(",
593        api_call: "unserialize",
594        message: "unserialize enables unsafe object deserialization",
595        fix_suggestion: "Replace unserialize with json_decode or a safe serializer",
596    },
597];
598
599const KOTLIN_RULE_SPECS: &[RegexRuleSpec] = &[
600    RegexRuleSpec {
601        id: "KT001",
602        name: "force-unwrapped-null",
603        category: MisuseCategory::ErrorHandling,
604        severity: MisuseSeverity::Medium,
605        description: "!! converts nullable values into runtime crashes",
606        correct_usage: "Use safe calls, let, requireNotNull, or explicit branching",
607        pattern: r"!!",
608        api_call: "!!",
609        message: "!! will throw NullPointerException on null values",
610        fix_suggestion: "Use safe calls or explicit null handling instead of !!",
611    },
612    RegexRuleSpec {
613        id: "KT002",
614        name: "lateinit-var",
615        category: MisuseCategory::ErrorHandling,
616        severity: MisuseSeverity::Low,
617        description: "lateinit shifts initialization failures to runtime",
618        correct_usage: "Prefer constructor injection or nullable/state wrappers",
619        pattern: r"\blateinit\s+var\b",
620        api_call: "lateinit",
621        message: "lateinit can fail at runtime if the property is read before initialization",
622        fix_suggestion: "Prefer constructor injection or explicit nullable state",
623    },
624    RegexRuleSpec {
625        id: "KT003",
626        name: "globalscope-launch",
627        category: MisuseCategory::Concurrency,
628        severity: MisuseSeverity::Medium,
629        description: "GlobalScope.launch escapes structured concurrency and leaks work",
630        correct_usage: "Launch from a lifecycle-bound CoroutineScope",
631        pattern: r"\bGlobalScope\.launch\s*\(",
632        api_call: "GlobalScope.launch",
633        message: "GlobalScope.launch detaches work from structured concurrency",
634        fix_suggestion: "Use a lifecycle-bound CoroutineScope instead",
635    },
636    RegexRuleSpec {
637        id: "KT004",
638        name: "runtime-exec",
639        category: MisuseCategory::Security,
640        severity: MisuseSeverity::High,
641        description: "Runtime.exec is dangerous with dynamic input and hard to sandbox correctly",
642        correct_usage: "Use structured APIs or strictly validated ProcessBuilder arguments",
643        pattern: r"\bRuntime\.getRuntime\(\)\.exec\s*\(",
644        api_call: "Runtime.exec",
645        message: "Runtime.exec is a common command injection footgun",
646        fix_suggestion: "Prefer library APIs or tightly validated ProcessBuilder arguments",
647    },
648    RegexRuleSpec {
649        id: "KT005",
650        name: "thread-sleep",
651        category: MisuseCategory::Concurrency,
652        severity: MisuseSeverity::Low,
653        description:
654            "Thread.sleep blocks threads directly and is usually wrong in coroutine-based code",
655        correct_usage: "Use delay(...) in coroutines or higher-level scheduling",
656        pattern: r"\bThread\.sleep\s*\(",
657        api_call: "Thread.sleep",
658        message: "Thread.sleep blocks the current thread directly",
659        fix_suggestion: "Use delay(...) or a proper scheduler instead",
660    },
661];
662
663const SWIFT_RULE_SPECS: &[RegexRuleSpec] = &[
664    RegexRuleSpec {
665        id: "SW001",
666        name: "forced-cast",
667        category: MisuseCategory::ErrorHandling,
668        severity: MisuseSeverity::Medium,
669        description: "as! crashes at runtime when the cast fails",
670        correct_usage: "Use as? with conditional handling",
671        pattern: r"\bas!\b",
672        api_call: "as!",
673        message: "Forced casts crash when the runtime type is different",
674        fix_suggestion: "Use as? and handle the nil case explicitly",
675    },
676    RegexRuleSpec {
677        id: "SW002",
678        name: "forced-try",
679        category: MisuseCategory::ErrorHandling,
680        severity: MisuseSeverity::Medium,
681        description: "try! crashes when the call throws",
682        correct_usage: "Use do/catch or try? with explicit fallback",
683        pattern: r"\btry!\b",
684        api_call: "try!",
685        message: "try! crashes the process on thrown errors",
686        fix_suggestion: "Use do/catch or try? and handle failure explicitly",
687    },
688    RegexRuleSpec {
689        id: "SW003",
690        name: "force-unwrap",
691        category: MisuseCategory::ErrorHandling,
692        severity: MisuseSeverity::Medium,
693        description: "Force unwrapping optionals crashes at runtime on nil",
694        correct_usage: "Use if let, guard let, or nil-coalescing",
695        pattern: r"\b[A-Za-z_][A-Za-z0-9_]*!",
696        api_call: "!",
697        message: "Force unwraps crash when the optional is nil",
698        fix_suggestion: "Use optional binding or nil-coalescing instead of force unwraps",
699    },
700    RegexRuleSpec {
701        id: "SW004",
702        name: "nskeyedunarchiver",
703        category: MisuseCategory::Security,
704        severity: MisuseSeverity::High,
705        description: "Legacy NSKeyedUnarchiver APIs on untrusted data are unsafe",
706        correct_usage: "Use secure decoding APIs with requiresSecureCoding",
707        pattern: r"\bNSKeyedUnarchiver\.unarchiveObject",
708        api_call: "NSKeyedUnarchiver",
709        message: "Legacy unarchiving can deserialize unexpected object graphs",
710        fix_suggestion: "Use secure coding APIs and schema-checked decoding",
711    },
712    RegexRuleSpec {
713        id: "SW005",
714        name: "fatalerror-call",
715        category: MisuseCategory::ErrorHandling,
716        severity: MisuseSeverity::Low,
717        description:
718            "fatalError terminates the process and is risky outside clearly impossible states",
719        correct_usage: "Return/throw recoverable errors where possible",
720        pattern: r"\bfatalError\s*\(",
721        api_call: "fatalError",
722        message: "fatalError terminates the process immediately",
723        fix_suggestion: "Use recoverable error handling unless the state is truly unreachable",
724    },
725];
726
727const CSHARP_RULE_SPECS: &[RegexRuleSpec] = &[
728    RegexRuleSpec {
729        id: "CS001",
730        name: "binaryformatter",
731        category: MisuseCategory::Security,
732        severity: MisuseSeverity::High,
733        description: "BinaryFormatter is insecure and obsolete for untrusted data",
734        correct_usage: "Use System.Text.Json or another safe serializer",
735        pattern: r"\bBinaryFormatter\b",
736        api_call: "BinaryFormatter",
737        message: "BinaryFormatter is insecure and should not be used",
738        fix_suggestion: "Use System.Text.Json or another safe serializer",
739    },
740    RegexRuleSpec {
741        id: "CS002",
742        name: "gc-collect",
743        category: MisuseCategory::Resources,
744        severity: MisuseSeverity::Low,
745        description: "GC.Collect is rarely the right fix and often harms latency",
746        correct_usage: "Remove manual GC triggers and profile the real allocation issue",
747        pattern: r"\bGC\.Collect\s*\(",
748        api_call: "GC.Collect",
749        message: "GC.Collect is an unreliable manual GC hint and often harms performance",
750        fix_suggestion: "Remove the call and fix the underlying allocation issue",
751    },
752    RegexRuleSpec {
753        id: "CS003",
754        name: "task-result",
755        category: MisuseCategory::Concurrency,
756        severity: MisuseSeverity::Medium,
757        description: "Task.Result blocks synchronously and can deadlock async flows",
758        correct_usage: "Use await instead of blocking on Task.Result",
759        pattern: r"\.Result\b",
760        api_call: "Task.Result",
761        message: "Task.Result blocks synchronously and can deadlock async contexts",
762        fix_suggestion: "Use await and keep the async chain asynchronous",
763    },
764    RegexRuleSpec {
765        id: "CS004",
766        name: "task-wait",
767        category: MisuseCategory::Concurrency,
768        severity: MisuseSeverity::Medium,
769        description: "Task.Wait blocks synchronously and can deadlock async flows",
770        correct_usage: "Use await or WhenAll/WhenAny instead of blocking waits",
771        pattern: r"\.Wait\s*\(",
772        api_call: "Task.Wait",
773        message: "Task.Wait blocks synchronously and can deadlock async contexts",
774        fix_suggestion: "Use await or asynchronous coordination primitives instead",
775    },
776    RegexRuleSpec {
777        id: "CS005",
778        name: "process-start",
779        category: MisuseCategory::Security,
780        severity: MisuseSeverity::High,
781        description: "Process.Start is dangerous with untrusted paths or arguments",
782        correct_usage: "Use strict allowlists and avoid shell execution semantics",
783        pattern: r"\bProcess\.Start\s*\(",
784        api_call: "Process.Start",
785        message: "Process.Start can enable command injection with untrusted inputs",
786        fix_suggestion: "Validate executable and arguments against a strict allowlist",
787    },
788];
789
790const SCALA_RULE_SPECS: &[RegexRuleSpec] = &[
791    RegexRuleSpec {
792        id: "SC001",
793        name: "null-usage",
794        category: MisuseCategory::ErrorHandling,
795        severity: MisuseSeverity::Low,
796        description: "null bypasses Scala's stronger option-based absence modeling",
797        correct_usage: "Use Option instead of null",
798        pattern: r"\bnull\b",
799        api_call: "null",
800        message: "null reintroduces runtime absence bugs into Scala code",
801        fix_suggestion: "Use Option and explicit pattern matching instead",
802    },
803    RegexRuleSpec {
804        id: "SC002",
805        name: "asinstanceof-cast",
806        category: MisuseCategory::ErrorHandling,
807        severity: MisuseSeverity::Medium,
808        description: "asInstanceOf crashes at runtime when the type assumption is wrong",
809        correct_usage: "Use pattern matching or TypeTag/ClassTag-aware APIs",
810        pattern: r"\basInstanceOf\[",
811        api_call: "asInstanceOf",
812        message: "asInstanceOf creates unchecked runtime casts",
813        fix_suggestion: "Use pattern matching or safer typed abstractions",
814    },
815    RegexRuleSpec {
816        id: "SC003",
817        name: "await-result",
818        category: MisuseCategory::Concurrency,
819        severity: MisuseSeverity::Medium,
820        description: "Await.result blocks threads and can collapse asynchronous throughput",
821        correct_usage: "Compose futures asynchronously instead of blocking",
822        pattern: r"\bAwait\.result\s*\(",
823        api_call: "Await.result",
824        message: "Await.result blocks threads and can create deadlocks or latency spikes",
825        fix_suggestion: "Use map/flatMap/for-comprehensions instead of blocking",
826    },
827    RegexRuleSpec {
828        id: "SC004",
829        name: "mutable-collection",
830        category: MisuseCategory::Concurrency,
831        severity: MisuseSeverity::Low,
832        description: "scala.collection.mutable structures are harder to reason about under concurrency",
833        correct_usage: "Prefer immutable collections unless mutation is intentionally scoped",
834        pattern: r"\bscala\.collection\.mutable\.",
835        api_call: "scala.collection.mutable",
836        message: "Mutable collections can hide shared-state bugs",
837        fix_suggestion: "Prefer immutable collections or encapsulate mutation carefully",
838    },
839    RegexRuleSpec {
840        id: "SC005",
841        name: "sys-process",
842        category: MisuseCategory::Security,
843        severity: MisuseSeverity::High,
844        description: "sys.process.Process executes external commands and is dangerous with input-derived values",
845        correct_usage: "Use library APIs or validate commands and arguments against an allowlist",
846        pattern: r"\bsys\.process\.Process\s*\(",
847        api_call: "sys.process.Process",
848        message: "sys.process.Process can enable command injection with untrusted input",
849        fix_suggestion: "Avoid shell-style execution or strictly validate all command parts",
850    },
851];
852
853const ELIXIR_RULE_SPECS: &[RegexRuleSpec] = &[
854    RegexRuleSpec {
855        id: "EX001",
856        name: "string-to-atom",
857        category: MisuseCategory::Security,
858        severity: MisuseSeverity::High,
859        description: "String.to_atom on untrusted input can exhaust the VM atom table",
860        correct_usage: "Use String.to_existing_atom only for reviewed values or keep strings",
861        pattern: r"\bString\.to_atom\s*\(",
862        api_call: "String.to_atom",
863        message: "String.to_atom can permanently grow the atom table from user input",
864        fix_suggestion: "Keep values as strings or use a reviewed to_existing_atom path",
865    },
866    RegexRuleSpec {
867        id: "EX002",
868        name: "code-eval-string",
869        category: MisuseCategory::Security,
870        severity: MisuseSeverity::High,
871        description: "Code.eval_string executes dynamic Elixir code and should be avoided",
872        correct_usage: "Use explicit dispatch or data parsing instead of dynamic evaluation",
873        pattern: r"\bCode\.eval_string\s*\(",
874        api_call: "Code.eval_string",
875        message: "Code.eval_string executes dynamic code and is a major security risk",
876        fix_suggestion: "Replace dynamic evaluation with explicit dispatch or parsing",
877    },
878    RegexRuleSpec {
879        id: "EX003",
880        name: "binary-to-term",
881        category: MisuseCategory::Security,
882        severity: MisuseSeverity::High,
883        description: ":erlang.binary_to_term on untrusted data is unsafe deserialization",
884        correct_usage: "Use safe formats like JSON or term_to_binary only for trusted data",
885        pattern: r":erlang\.binary_to_term\s*\(",
886        api_call: ":erlang.binary_to_term",
887        message: ":erlang.binary_to_term can deserialize unsafe terms from untrusted input",
888        fix_suggestion: "Use a safer serialization format for external input",
889    },
890    RegexRuleSpec {
891        id: "EX004",
892        name: "file-read-bang",
893        category: MisuseCategory::ErrorHandling,
894        severity: MisuseSeverity::Low,
895        description: "Bang file APIs raise instead of returning tagged tuples",
896        correct_usage: "Prefer File.read/1 with explicit {:ok, data} / {:error, reason} handling",
897        pattern: r"\bFile\.read!\s*\(",
898        api_call: "File.read!",
899        message: "File.read! raises on failure instead of returning a recoverable error",
900        fix_suggestion: "Use File.read/1 and handle the returned tuple explicitly",
901    },
902    RegexRuleSpec {
903        id: "EX005",
904        name: "task-await-infinity",
905        category: MisuseCategory::Concurrency,
906        severity: MisuseSeverity::Medium,
907        description: "Task.await with :infinity can stall callers indefinitely",
908        correct_usage: "Use bounded timeouts and supervised retry/cancellation behavior",
909        pattern: r"\bTask\.await\s*\([^,]+,\s*:infinity\s*\)",
910        api_call: "Task.await",
911        message: "Task.await(..., :infinity) can block forever",
912        fix_suggestion: "Use a bounded timeout and explicit failure handling",
913    },
914];
915
916const LUA_RULE_SPECS: &[RegexRuleSpec] = &[
917    RegexRuleSpec {
918        id: "LU001",
919        name: "implicit-global",
920        category: MisuseCategory::CallOrder,
921        severity: MisuseSeverity::Low,
922        description: "Assigning without local leaks mutable globals and creates hidden coupling",
923        correct_usage: "Declare locals explicitly with local name = ...",
924        pattern: r"^[A-Za-z_][A-Za-z0-9_]*\s*=",
925        api_call: "global assignment",
926        message: "Implicit global assignment leaks state outside local scope",
927        fix_suggestion: "Prefix the binding with local to keep scope explicit",
928    },
929    RegexRuleSpec {
930        id: "LU002",
931        name: "dynamic-load",
932        category: MisuseCategory::Security,
933        severity: MisuseSeverity::High,
934        description: "load/loadstring execute dynamic Lua code and should be avoided",
935        correct_usage: "Use structured parsing or explicit dispatch instead of dynamic evaluation",
936        pattern: r"\b(?:loadstring|load)\s*\(",
937        api_call: "load",
938        message: "Dynamic code loading executes attacker-controlled Lua if fed untrusted input",
939        fix_suggestion: "Replace dynamic evaluation with explicit dispatch or parsing",
940    },
941    RegexRuleSpec {
942        id: "LU003",
943        name: "os-execute",
944        category: MisuseCategory::Security,
945        severity: MisuseSeverity::High,
946        description: "os.execute shells out and is dangerous with dynamic input",
947        correct_usage: "Avoid shell execution or validate every command component",
948        pattern: r"\bos\.execute\s*\(",
949        api_call: "os.execute",
950        message: "os.execute can enable command injection with untrusted input",
951        fix_suggestion: "Avoid shelling out or strictly validate the command source",
952    },
953    RegexRuleSpec {
954        id: "LU004",
955        name: "io-popen",
956        category: MisuseCategory::Security,
957        severity: MisuseSeverity::High,
958        description: "io.popen launches shell commands and should be treated as high risk",
959        correct_usage: "Use safer process APIs or validate all command components",
960        pattern: r"\bio\.popen\s*\(",
961        api_call: "io.popen",
962        message: "io.popen can enable command injection with untrusted input",
963        fix_suggestion: "Avoid shell execution or validate every command component",
964    },
965    RegexRuleSpec {
966        id: "LU005",
967        name: "dofile-loadfile",
968        category: MisuseCategory::Security,
969        severity: MisuseSeverity::Medium,
970        description:
971            "dofile/loadfile execute external files and are risky with user-controlled paths",
972        correct_usage: "Validate file origins strictly before executing them",
973        pattern: r"\b(?:dofile|loadfile)\s*\(",
974        api_call: "dofile",
975        message: "Executing external files is dangerous when the path is not fully trusted",
976        fix_suggestion: "Avoid dynamic file execution or tightly validate trusted origins",
977    },
978];
979
980const OCAML_RULE_SPECS: &[RegexRuleSpec] = &[
981    RegexRuleSpec {
982        id: "OC001",
983        name: "marshal-from-string",
984        category: MisuseCategory::Security,
985        severity: MisuseSeverity::High,
986        description: "Marshal.from_string on untrusted data is unsafe native deserialization",
987        correct_usage: "Use a safe, schema-checked serialization format",
988        pattern: r"\bMarshal\.from_string\b",
989        api_call: "Marshal.from_string",
990        message: "Marshal.from_string can deserialize unsafe values from untrusted input",
991        fix_suggestion: "Use a safer serialization format for external input",
992    },
993    RegexRuleSpec {
994        id: "OC002",
995        name: "marshal-from-channel",
996        category: MisuseCategory::Security,
997        severity: MisuseSeverity::High,
998        description: "Marshal.from_channel on untrusted data is unsafe native deserialization",
999        correct_usage: "Use a safe, schema-checked serialization format",
1000        pattern: r"\bMarshal\.from_channel\b",
1001        api_call: "Marshal.from_channel",
1002        message: "Marshal.from_channel can deserialize unsafe values from untrusted input",
1003        fix_suggestion: "Use a safer serialization format for external input",
1004    },
1005    RegexRuleSpec {
1006        id: "OC003",
1007        name: "sys-command",
1008        category: MisuseCategory::Security,
1009        severity: MisuseSeverity::High,
1010        description: "Sys.command executes a shell command and is dangerous with dynamic input",
1011        correct_usage: "Prefer direct library APIs or validate allowed commands strictly",
1012        pattern: r"\bSys\.command\b",
1013        api_call: "Sys.command",
1014        message: "Sys.command can enable command injection with untrusted input",
1015        fix_suggestion: "Avoid shell execution or tightly validate the command source",
1016    },
1017    RegexRuleSpec {
1018        id: "OC004",
1019        name: "obj-magic",
1020        category: MisuseCategory::ErrorHandling,
1021        severity: MisuseSeverity::High,
1022        description: "Obj.magic bypasses the type system and can produce memory-unsound behavior",
1023        correct_usage: "Use typed abstractions or explicit variant handling",
1024        pattern: r"\bObj\.magic\b",
1025        api_call: "Obj.magic",
1026        message: "Obj.magic bypasses type safety and can create undefined behavior",
1027        fix_suggestion: "Refactor to a typed abstraction instead of coercing with Obj.magic",
1028    },
1029    RegexRuleSpec {
1030        id: "OC005",
1031        name: "open-in-out",
1032        category: MisuseCategory::Resources,
1033        severity: MisuseSeverity::Low,
1034        description: "open_in/open_out require explicit close calls and are easy to leak",
1035        correct_usage: "Use In_channel.with_open_* or Out_channel.with_open_* helpers",
1036        pattern: r"\b(?:open_in|open_out)\b",
1037        api_call: "open_in",
1038        message: "open_in/open_out require explicit close handling and are easy to leak",
1039        fix_suggestion: "Use with_open_* helpers to scope the channel lifetime",
1040    },
1041];
1042
1043const ALL_API_LANGUAGES: &[ApiLanguage] = &[
1044    ApiLanguage::Python,
1045    ApiLanguage::Rust,
1046    ApiLanguage::Go,
1047    ApiLanguage::Java,
1048    ApiLanguage::JavaScript,
1049    ApiLanguage::TypeScript,
1050    ApiLanguage::C,
1051    ApiLanguage::Cpp,
1052    ApiLanguage::Ruby,
1053    ApiLanguage::Php,
1054    ApiLanguage::Kotlin,
1055    ApiLanguage::Swift,
1056    ApiLanguage::CSharp,
1057    ApiLanguage::Scala,
1058    ApiLanguage::Elixir,
1059    ApiLanguage::Lua,
1060    ApiLanguage::Luau,
1061    ApiLanguage::Ocaml,
1062];
1063
1064// =============================================================================
1065// Rule Definitions
1066// =============================================================================
1067
1068/// Built-in Python API misuse rules
1069fn python_rules() -> Vec<APIRule> {
1070    vec![
1071        APIRule {
1072            id: "PY001".to_string(),
1073            name: "missing-timeout".to_string(),
1074            category: MisuseCategory::Parameters,
1075            severity: MisuseSeverity::High,
1076            description: "requests.get/post/etc without timeout parameter can hang indefinitely"
1077                .to_string(),
1078            correct_usage: "requests.get(url, timeout=30)".to_string(),
1079        },
1080        APIRule {
1081            id: "PY002".to_string(),
1082            name: "bare-except".to_string(),
1083            category: MisuseCategory::ErrorHandling,
1084            severity: MisuseSeverity::Medium,
1085            description: "Bare except clause catches all exceptions including KeyboardInterrupt"
1086                .to_string(),
1087            correct_usage: "except Exception as e:".to_string(),
1088        },
1089        APIRule {
1090            id: "PY003".to_string(),
1091            name: "weak-hash-md5".to_string(),
1092            category: MisuseCategory::Crypto,
1093            severity: MisuseSeverity::High,
1094            description: "MD5 is cryptographically broken, don't use for security purposes"
1095                .to_string(),
1096            correct_usage: "hashlib.sha256() or bcrypt for passwords".to_string(),
1097        },
1098        APIRule {
1099            id: "PY004".to_string(),
1100            name: "weak-hash-sha1".to_string(),
1101            category: MisuseCategory::Crypto,
1102            severity: MisuseSeverity::High,
1103            description: "SHA1 is cryptographically weak, don't use for security purposes"
1104                .to_string(),
1105            correct_usage: "hashlib.sha256() or stronger".to_string(),
1106        },
1107        APIRule {
1108            id: "PY005".to_string(),
1109            name: "unclosed-file".to_string(),
1110            category: MisuseCategory::Resources,
1111            severity: MisuseSeverity::Medium,
1112            description: "File opened without context manager may not be properly closed"
1113                .to_string(),
1114            correct_usage: "with open(path) as f:".to_string(),
1115        },
1116        APIRule {
1117            id: "PY006".to_string(),
1118            name: "insecure-random".to_string(),
1119            category: MisuseCategory::Security,
1120            severity: MisuseSeverity::High,
1121            description: "random module is not cryptographically secure".to_string(),
1122            correct_usage: "secrets.token_bytes() or secrets.token_hex()".to_string(),
1123        },
1124    ]
1125}
1126
1127/// Built-in Rust API misuse rules
1128fn rust_rules() -> Vec<APIRule> {
1129    vec![
1130        APIRule {
1131            id: "RS001".to_string(),
1132            name: "mutex-lock-unwrap".to_string(),
1133            category: MisuseCategory::Concurrency,
1134            severity: MisuseSeverity::Medium,
1135            description: "Mutex::lock().unwrap() can panic and amplify lock contention (CWE-833)"
1136                .to_string(),
1137            correct_usage:
1138                "Prefer try_lock()/error handling or explicit poison recovery instead of unwrap()"
1139                    .to_string(),
1140        },
1141        APIRule {
1142            id: "RS002".to_string(),
1143            name: "file-open-without-context".to_string(),
1144            category: MisuseCategory::ErrorHandling,
1145            severity: MisuseSeverity::Low,
1146            description:
1147                "File::open without contextual error mapping makes failures hard to triage"
1148                    .to_string(),
1149            correct_usage:
1150                "File::open(path).with_context(|| format!(\"opening {}\", path.display()))?"
1151                    .to_string(),
1152        },
1153        APIRule {
1154            id: "RS003".to_string(),
1155            name: "unbounded-with-capacity".to_string(),
1156            category: MisuseCategory::Resources,
1157            severity: MisuseSeverity::High,
1158            description:
1159                "Vec::with_capacity fed from unbounded input can cause memory exhaustion (CWE-770)"
1160                    .to_string(),
1161            correct_usage: "Clamp capacity input before allocation (e.g. min(user_len, MAX))"
1162                .to_string(),
1163        },
1164        APIRule {
1165            id: "RS004".to_string(),
1166            name: "detached-tokio-spawn".to_string(),
1167            category: MisuseCategory::Concurrency,
1168            severity: MisuseSeverity::Medium,
1169            description: "tokio::spawn without retaining JoinHandle risks silent task failures"
1170                .to_string(),
1171            correct_usage: "Store JoinHandle values and await/join them".to_string(),
1172        },
1173        APIRule {
1174            id: "RS005".to_string(),
1175            name: "hashmap-order-dependence".to_string(),
1176            category: MisuseCategory::CallOrder,
1177            severity: MisuseSeverity::Low,
1178            description:
1179                "HashMap iteration order is non-deterministic; relying on it can break logic"
1180                    .to_string(),
1181            correct_usage:
1182                "Collect keys and sort them, or use BTreeMap/IndexMap when stable order is required"
1183                    .to_string(),
1184        },
1185        APIRule {
1186            id: "RS006".to_string(),
1187            name: "clone-in-hot-loop".to_string(),
1188            category: MisuseCategory::Resources,
1189            severity: MisuseSeverity::Low,
1190            description: "clone() inside loop bodies can create avoidable allocation pressure"
1191                .to_string(),
1192            correct_usage: "Borrow or move values instead of cloning in tight loops".to_string(),
1193        },
1194    ]
1195}
1196
1197fn regex_rule_specs_for_language(language: ApiLanguage) -> &'static [RegexRuleSpec] {
1198    match language {
1199        ApiLanguage::Python | ApiLanguage::Rust => &[],
1200        ApiLanguage::Go => GO_RULE_SPECS,
1201        ApiLanguage::Java => JAVA_RULE_SPECS,
1202        ApiLanguage::JavaScript => JAVASCRIPT_RULE_SPECS,
1203        ApiLanguage::TypeScript => TYPESCRIPT_RULE_SPECS,
1204        ApiLanguage::C => C_RULE_SPECS,
1205        ApiLanguage::Cpp => CPP_RULE_SPECS,
1206        ApiLanguage::Ruby => RUBY_RULE_SPECS,
1207        ApiLanguage::Php => PHP_RULE_SPECS,
1208        ApiLanguage::Kotlin => KOTLIN_RULE_SPECS,
1209        ApiLanguage::Swift => SWIFT_RULE_SPECS,
1210        ApiLanguage::CSharp => CSHARP_RULE_SPECS,
1211        ApiLanguage::Scala => SCALA_RULE_SPECS,
1212        ApiLanguage::Elixir => ELIXIR_RULE_SPECS,
1213        ApiLanguage::Lua | ApiLanguage::Luau => LUA_RULE_SPECS,
1214        ApiLanguage::Ocaml => OCAML_RULE_SPECS,
1215    }
1216}
1217
1218fn all_api_languages() -> &'static [ApiLanguage] {
1219    ALL_API_LANGUAGES
1220}
1221
1222// =============================================================================
1223// CLI Arguments
1224// =============================================================================
1225
1226/// Detect API misuse patterns in code
1227///
1228/// Analyzes code for common API misuse patterns like missing timeouts,
1229/// bare except clauses, weak crypto usage, and unclosed resources.
1230///
1231/// # Example
1232///
1233/// ```bash
1234/// tldr api-check src/
1235/// tldr api-check src/main.py --category crypto
1236/// tldr api-check src/ --severity high
1237/// ```
1238#[derive(Debug, Args)]
1239pub struct ApiCheckArgs {
1240    /// File or directory to analyze (path to file or directory)
1241    #[arg(value_name = "path")]
1242    pub path: PathBuf,
1243
1244    /// Filter by misuse category
1245    #[arg(long, value_delimiter = ',')]
1246    pub category: Option<Vec<MisuseCategory>>,
1247
1248    /// Filter by minimum severity
1249    #[arg(long, value_delimiter = ',')]
1250    pub severity: Option<Vec<MisuseSeverity>>,
1251
1252    /// Output file (optional, stdout if not specified)
1253    #[arg(long, short = 'O')]
1254    pub output: Option<PathBuf>,
1255}
1256
1257impl ApiCheckArgs {
1258    /// Run the api-check command
1259    pub fn run(&self, format: crate::output::OutputFormat, quiet: bool) -> Result<()> {
1260        let writer = OutputWriter::new(format, quiet);
1261
1262        writer.progress(&format!(
1263            "Checking {} for API misuse patterns...",
1264            self.path.display()
1265        ));
1266
1267        // Validate path exists
1268        if !self.path.exists() {
1269            return Err(RemainingError::file_not_found(&self.path).into());
1270        }
1271
1272        let all_rules_count = all_api_languages()
1273            .iter()
1274            .map(|language| rules_for_language(*language).len() as u32)
1275            .sum();
1276
1277        // Collect files to analyze
1278        let files = collect_files(&self.path)?;
1279        writer.progress(&format!("Found {} files to analyze", files.len()));
1280
1281        // Analyze each file
1282        let mut all_findings: Vec<MisuseFinding> = Vec::new();
1283        let mut files_scanned = 0u32;
1284
1285        for file_path in &files {
1286            let Some(language) = detect_language(file_path) else {
1287                continue;
1288            };
1289            let rules = rules_for_language(language);
1290            match analyze_file(file_path, &rules, language) {
1291                Ok(findings) => {
1292                    all_findings.extend(findings);
1293                    files_scanned += 1;
1294                }
1295                Err(e) => {
1296                    writer.progress(&format!(
1297                        "Warning: Failed to analyze {}: {}",
1298                        file_path.display(),
1299                        e
1300                    ));
1301                }
1302            }
1303        }
1304
1305        // Apply filters
1306        let filtered_findings = filter_findings(
1307            all_findings,
1308            self.category.as_deref(),
1309            self.severity.as_deref(),
1310        );
1311
1312        // Build summary
1313        let summary = build_summary(&filtered_findings, files_scanned);
1314
1315        // Build report
1316        let report = APICheckReport {
1317            findings: filtered_findings,
1318            summary,
1319            rules_applied: all_rules_count,
1320        };
1321
1322        // Write output
1323        if let Some(ref output_path) = self.output {
1324            if writer.is_text() {
1325                let text = format_api_check_text(&report);
1326                fs::write(output_path, text)?;
1327            } else {
1328                let json = serde_json::to_string_pretty(&report)?;
1329                fs::write(output_path, json)?;
1330            }
1331        } else if writer.is_text() {
1332            let text = format_api_check_text(&report);
1333            writer.write_text(&text)?;
1334        } else {
1335            writer.write(&report)?;
1336        }
1337
1338        Ok(())
1339    }
1340}
1341
1342// =============================================================================
1343// File Collection
1344// =============================================================================
1345
1346/// Collect supported source files from a path
1347fn collect_files(path: &Path) -> Result<Vec<PathBuf>> {
1348    let mut files = Vec::new();
1349
1350    if path.is_file() {
1351        if is_supported_file(path) {
1352            files.push(path.to_path_buf());
1353        }
1354    } else if path.is_dir() {
1355        for entry in walk_project(path) {
1356            if files.len() >= MAX_DIRECTORY_FILES as usize {
1357                break;
1358            }
1359
1360            let entry_path = entry.path();
1361            if entry_path.is_file() && is_supported_file(entry_path) {
1362                // Check file size
1363                if let Ok(metadata) = fs::metadata(entry_path) {
1364                    if metadata.len() <= MAX_FILE_SIZE {
1365                        files.push(entry_path.to_path_buf());
1366                    }
1367                }
1368            }
1369        }
1370    }
1371
1372    Ok(files)
1373}
1374
1375/// Check if a path has a supported extension.
1376fn is_supported_file(path: &Path) -> bool {
1377    detect_language(path).is_some()
1378}
1379
1380pub(crate) fn detect_language(path: &Path) -> Option<ApiLanguage> {
1381    match path.extension().and_then(|e| e.to_str()) {
1382        Some("py") => Some(ApiLanguage::Python),
1383        Some("rs") => Some(ApiLanguage::Rust),
1384        Some("go") => Some(ApiLanguage::Go),
1385        Some("java") => Some(ApiLanguage::Java),
1386        Some("js") | Some("jsx") | Some("mjs") | Some("cjs") => Some(ApiLanguage::JavaScript),
1387        Some("ts") | Some("tsx") => Some(ApiLanguage::TypeScript),
1388        Some("c") | Some("h") => Some(ApiLanguage::C),
1389        Some("cpp") | Some("hpp") | Some("cc") | Some("cxx") => Some(ApiLanguage::Cpp),
1390        Some("rb") => Some(ApiLanguage::Ruby),
1391        Some("php") => Some(ApiLanguage::Php),
1392        Some("kt") | Some("kts") => Some(ApiLanguage::Kotlin),
1393        Some("swift") => Some(ApiLanguage::Swift),
1394        Some("cs") => Some(ApiLanguage::CSharp),
1395        Some("scala") => Some(ApiLanguage::Scala),
1396        Some("ex") | Some("exs") => Some(ApiLanguage::Elixir),
1397        Some("lua") => Some(ApiLanguage::Lua),
1398        Some("luau") => Some(ApiLanguage::Luau),
1399        Some("ml") | Some("mli") => Some(ApiLanguage::Ocaml),
1400        _ => None,
1401    }
1402}
1403
1404pub(crate) fn rules_for_language(language: ApiLanguage) -> Vec<APIRule> {
1405    match language {
1406        ApiLanguage::Python => python_rules(),
1407        ApiLanguage::Rust => rust_rules(),
1408        _ => regex_rule_specs_for_language(language)
1409            .iter()
1410            .copied()
1411            .map(RegexRuleSpec::rule)
1412            .collect(),
1413    }
1414}
1415
1416// =============================================================================
1417// Analysis Engine
1418// =============================================================================
1419
1420/// Analyze a single file for API misuse
1421pub(crate) fn analyze_file(
1422    path: &Path,
1423    rules: &[APIRule],
1424    language: ApiLanguage,
1425) -> Result<Vec<MisuseFinding>> {
1426    let content = fs::read_to_string(path)?;
1427    let file_str = path.display().to_string();
1428    let mut findings = Vec::new();
1429    let mut prev_trimmed = String::new();
1430    let file_has_hashmap = matches!(language, ApiLanguage::Rust) && content.contains("HashMap");
1431
1432    for (line_num, line) in content.lines().enumerate() {
1433        let line_number = (line_num + 1) as u32;
1434        let trimmed = line.trim();
1435        let rust_ctx = RustLineContext {
1436            file_has_hashmap,
1437            previous_line: prev_trimmed.as_str(),
1438            previous_is_loop: prev_trimmed.starts_with("for ")
1439                || prev_trimmed.starts_with("while "),
1440        };
1441
1442        // Check each rule
1443        for rule in rules {
1444            if let Some(finding) =
1445                check_rule(rule, &file_str, line_number, line, language, &rust_ctx)
1446            {
1447                findings.push(finding);
1448            }
1449        }
1450        prev_trimmed = trimmed.to_string();
1451    }
1452
1453    Ok(findings)
1454}
1455
1456struct RustLineContext<'a> {
1457    file_has_hashmap: bool,
1458    previous_line: &'a str,
1459    previous_is_loop: bool,
1460}
1461
1462/// Check a single rule against a line of code
1463fn check_rule(
1464    rule: &APIRule,
1465    file: &str,
1466    line: u32,
1467    line_text: &str,
1468    language: ApiLanguage,
1469    rust_ctx: &RustLineContext<'_>,
1470) -> Option<MisuseFinding> {
1471    let trimmed = line_text.trim();
1472
1473    // Skip comments
1474    if is_comment_line(trimmed, language) {
1475        return None;
1476    }
1477
1478    match rule.id.as_str() {
1479        "PY001" => check_missing_timeout(rule, file, line, trimmed),
1480        "PY002" => check_bare_except(rule, file, line, trimmed),
1481        "PY003" => check_md5_usage(rule, file, line, trimmed),
1482        "PY004" => check_sha1_usage(rule, file, line, trimmed),
1483        "PY005" => check_unclosed_file(rule, file, line, trimmed),
1484        "PY006" => check_insecure_random(rule, file, line, trimmed),
1485        "RS001" => check_mutex_lock_unwrap(rule, file, line, trimmed),
1486        "RS002" => check_file_open_without_context(rule, file, line, trimmed),
1487        "RS003" => check_unbounded_with_capacity(rule, file, line, trimmed),
1488        "RS004" => check_detached_tokio_spawn(rule, file, line, trimmed),
1489        "RS005" => check_hashmap_order_dependence(rule, file, line, trimmed, rust_ctx),
1490        "RS006" => check_clone_in_hot_loop(rule, file, line, trimmed, rust_ctx),
1491        _ => check_regex_rule(rule, file, line, trimmed, language),
1492    }
1493}
1494
1495fn is_comment_line(trimmed: &str, language: ApiLanguage) -> bool {
1496    match language {
1497        ApiLanguage::Python | ApiLanguage::Ruby | ApiLanguage::Elixir => trimmed.starts_with('#'),
1498        ApiLanguage::Rust
1499        | ApiLanguage::Go
1500        | ApiLanguage::Java
1501        | ApiLanguage::JavaScript
1502        | ApiLanguage::TypeScript
1503        | ApiLanguage::C
1504        | ApiLanguage::Cpp
1505        | ApiLanguage::Kotlin
1506        | ApiLanguage::Swift
1507        | ApiLanguage::CSharp
1508        | ApiLanguage::Scala => trimmed.starts_with("//"),
1509        ApiLanguage::Php => trimmed.starts_with("//") || trimmed.starts_with('#'),
1510        ApiLanguage::Lua | ApiLanguage::Luau => trimmed.starts_with("--"),
1511        ApiLanguage::Ocaml => trimmed.starts_with("(*"),
1512    }
1513}
1514
1515fn check_regex_rule(
1516    rule: &APIRule,
1517    file: &str,
1518    line: u32,
1519    line_text: &str,
1520    language: ApiLanguage,
1521) -> Option<MisuseFinding> {
1522    let spec = regex_rule_specs_for_language(language)
1523        .iter()
1524        .find(|spec| spec.id == rule.id)?;
1525    let regex = Regex::new(spec.pattern).ok()?;
1526    if !regex.is_match(line_text) {
1527        return None;
1528    }
1529
1530    let column = regex.find(line_text).map(|m| m.start()).unwrap_or(0) as u32;
1531    Some(MisuseFinding {
1532        file: file.to_string(),
1533        line,
1534        column,
1535        rule: rule.clone(),
1536        api_call: spec.api_call.to_string(),
1537        message: spec.message.to_string(),
1538        fix_suggestion: spec.fix_suggestion.to_string(),
1539        code_context: line_text.to_string(),
1540    })
1541}
1542
1543/// Check for requests without timeout
1544fn check_missing_timeout(
1545    rule: &APIRule,
1546    file: &str,
1547    line: u32,
1548    line_text: &str,
1549) -> Option<MisuseFinding> {
1550    // Look for requests.get/post/put/delete/patch without timeout
1551    let request_patterns = [
1552        "requests.get(",
1553        "requests.post(",
1554        "requests.put(",
1555        "requests.delete(",
1556        "requests.patch(",
1557        "requests.head(",
1558        "requests.options(",
1559    ];
1560
1561    for pattern in &request_patterns {
1562        if line_text.contains(pattern) && !line_text.contains("timeout") {
1563            let column = line_text.find(pattern).unwrap_or(0) as u32;
1564            return Some(MisuseFinding {
1565                file: file.to_string(),
1566                line,
1567                column,
1568                rule: rule.clone(),
1569                api_call: pattern.trim_end_matches('(').to_string(),
1570                message: format!(
1571                    "{} called without timeout parameter",
1572                    pattern.trim_end_matches('(')
1573                ),
1574                fix_suggestion: format!("Add timeout parameter: {}url, timeout=30)", pattern),
1575                code_context: line_text.to_string(),
1576            });
1577        }
1578    }
1579
1580    None
1581}
1582
1583/// Check for bare except clause
1584fn check_bare_except(
1585    rule: &APIRule,
1586    file: &str,
1587    line: u32,
1588    line_text: &str,
1589) -> Option<MisuseFinding> {
1590    // Look for "except:" without an exception type
1591    // Match "except:" but not "except SomeException:" or "except Exception as e:"
1592    if line_text.starts_with("except:") || line_text.contains(" except:") {
1593        let column = line_text.find("except:").unwrap_or(0) as u32;
1594        return Some(MisuseFinding {
1595            file: file.to_string(),
1596            line,
1597            column,
1598            rule: rule.clone(),
1599            api_call: "except".to_string(),
1600            message: "Bare except clause catches all exceptions including KeyboardInterrupt and SystemExit".to_string(),
1601            fix_suggestion: "Use 'except Exception as e:' to catch only program exceptions".to_string(),
1602            code_context: line_text.to_string(),
1603        });
1604    }
1605
1606    None
1607}
1608
1609/// Check for MD5 usage
1610fn check_md5_usage(
1611    rule: &APIRule,
1612    file: &str,
1613    line: u32,
1614    line_text: &str,
1615) -> Option<MisuseFinding> {
1616    // Look for hashlib.md5 usage
1617    if line_text.contains("hashlib.md5") || line_text.contains("md5(") {
1618        let column = line_text
1619            .find("hashlib.md5")
1620            .or_else(|| line_text.find("md5("))
1621            .unwrap_or(0) as u32;
1622        return Some(MisuseFinding {
1623            file: file.to_string(),
1624            line,
1625            column,
1626            rule: rule.clone(),
1627            api_call: "hashlib.md5".to_string(),
1628            message: "MD5 is cryptographically broken and should not be used for security purposes"
1629                .to_string(),
1630            fix_suggestion: "Use hashlib.sha256() or stronger. For passwords, use bcrypt or argon2"
1631                .to_string(),
1632            code_context: line_text.to_string(),
1633        });
1634    }
1635
1636    None
1637}
1638
1639/// Check for SHA1 usage
1640fn check_sha1_usage(
1641    rule: &APIRule,
1642    file: &str,
1643    line: u32,
1644    line_text: &str,
1645) -> Option<MisuseFinding> {
1646    // Look for hashlib.sha1 usage
1647    if line_text.contains("hashlib.sha1") || line_text.contains("sha1(") {
1648        let column = line_text
1649            .find("hashlib.sha1")
1650            .or_else(|| line_text.find("sha1("))
1651            .unwrap_or(0) as u32;
1652        return Some(MisuseFinding {
1653            file: file.to_string(),
1654            line,
1655            column,
1656            rule: rule.clone(),
1657            api_call: "hashlib.sha1".to_string(),
1658            message: "SHA1 is cryptographically weak and should not be used for security purposes"
1659                .to_string(),
1660            fix_suggestion: "Use hashlib.sha256() or stronger".to_string(),
1661            code_context: line_text.to_string(),
1662        });
1663    }
1664
1665    None
1666}
1667
1668/// Check for unclosed file
1669fn check_unclosed_file(
1670    rule: &APIRule,
1671    file: &str,
1672    line: u32,
1673    line_text: &str,
1674) -> Option<MisuseFinding> {
1675    // Look for "open(" that's not after "with "
1676    // This is a simplified check - a proper implementation would use AST
1677    if line_text.contains("open(")
1678        && !line_text.contains("with ")
1679        && !line_text.starts_with("with ")
1680    {
1681        // Check if it's an assignment (f = open(...))
1682        if line_text.contains("= open(") || line_text.contains("=open(") {
1683            let column = line_text.find("open(").unwrap_or(0) as u32;
1684            return Some(MisuseFinding {
1685                file: file.to_string(),
1686                line,
1687                column,
1688                rule: rule.clone(),
1689                api_call: "open".to_string(),
1690                message: "File opened without context manager may not be properly closed"
1691                    .to_string(),
1692                fix_suggestion: "Use 'with open(path) as f:' to ensure file is closed".to_string(),
1693                code_context: line_text.to_string(),
1694            });
1695        }
1696    }
1697
1698    None
1699}
1700
1701/// Check for insecure random usage
1702fn check_insecure_random(
1703    rule: &APIRule,
1704    file: &str,
1705    line: u32,
1706    line_text: &str,
1707) -> Option<MisuseFinding> {
1708    // Look for random.* usage that might be for security
1709    let insecure_patterns = [
1710        "random.randint(",
1711        "random.random(",
1712        "random.choice(",
1713        "random.randrange(",
1714    ];
1715
1716    // Only flag if it looks like it's being used for security
1717    // (contains words like token, secret, password, key)
1718    let security_indicators = ["token", "secret", "password", "key", "auth", "session"];
1719
1720    for pattern in &insecure_patterns {
1721        if line_text.contains(pattern) {
1722            // Check if the line or nearby context suggests security use
1723            let line_lower = line_text.to_lowercase();
1724            for indicator in &security_indicators {
1725                if line_lower.contains(indicator) {
1726                    let column = line_text.find(pattern).unwrap_or(0) as u32;
1727                    return Some(MisuseFinding {
1728                        file: file.to_string(),
1729                        line,
1730                        column,
1731                        rule: rule.clone(),
1732                        api_call: pattern.trim_end_matches('(').to_string(),
1733                        message: format!(
1734                            "{} is not cryptographically secure, don't use for security purposes",
1735                            pattern.trim_end_matches('(')
1736                        ),
1737                        fix_suggestion:
1738                            "Use secrets.token_bytes() or secrets.token_hex() for security"
1739                                .to_string(),
1740                        code_context: line_text.to_string(),
1741                    });
1742                }
1743            }
1744        }
1745    }
1746
1747    None
1748}
1749
1750/// Check for poisoned mutex lock unwrap.
1751fn check_mutex_lock_unwrap(
1752    rule: &APIRule,
1753    file: &str,
1754    line: u32,
1755    line_text: &str,
1756) -> Option<MisuseFinding> {
1757    if line_text.contains(".lock().unwrap()") {
1758        let column = line_text.find(".lock().unwrap()").unwrap_or(0) as u32;
1759        return Some(MisuseFinding {
1760            file: file.to_string(),
1761            line,
1762            column,
1763            rule: rule.clone(),
1764            api_call: "Mutex::lock".to_string(),
1765            message:
1766                "Mutex::lock().unwrap() can panic on poisoned locks and hide deadlock behavior"
1767                    .to_string(),
1768            fix_suggestion:
1769                "Handle lock errors explicitly (match/if let), or use try_lock with backoff"
1770                    .to_string(),
1771            code_context: line_text.to_string(),
1772        });
1773    }
1774    None
1775}
1776
1777/// Check for File::open without context propagation.
1778fn check_file_open_without_context(
1779    rule: &APIRule,
1780    file: &str,
1781    line: u32,
1782    line_text: &str,
1783) -> Option<MisuseFinding> {
1784    if line_text.contains("File::open(")
1785        && !line_text.contains(".context(")
1786        && !line_text.contains(".with_context(")
1787        && !line_text.contains("map_err(")
1788    {
1789        let column = line_text.find("File::open(").unwrap_or(0) as u32;
1790        return Some(MisuseFinding {
1791            file: file.to_string(),
1792            line,
1793            column,
1794            rule: rule.clone(),
1795            api_call: "File::open".to_string(),
1796            message: "File::open used without contextual error mapping".to_string(),
1797            fix_suggestion:
1798                "Wrap errors with context (with_context/context/map_err) before propagating"
1799                    .to_string(),
1800            code_context: line_text.to_string(),
1801        });
1802    }
1803    None
1804}
1805
1806/// Check for capacity allocations sourced from unbounded input.
1807fn check_unbounded_with_capacity(
1808    rule: &APIRule,
1809    file: &str,
1810    line: u32,
1811    line_text: &str,
1812) -> Option<MisuseFinding> {
1813    if line_text.contains("Vec::with_capacity(") {
1814        let line_lower = line_text.to_lowercase();
1815        let user_input_markers = ["input", "args", "user", "request", "len", "size"];
1816        if user_input_markers.iter().any(|m| line_lower.contains(m)) {
1817            let column = line_text.find("Vec::with_capacity(").unwrap_or(0) as u32;
1818            return Some(MisuseFinding {
1819                file: file.to_string(),
1820                line,
1821                column,
1822                rule: rule.clone(),
1823                api_call: "Vec::with_capacity".to_string(),
1824                message: "Vec::with_capacity appears to use unbounded external input".to_string(),
1825                fix_suggestion:
1826                    "Clamp requested capacity with a hard upper bound before allocation".to_string(),
1827                code_context: line_text.to_string(),
1828            });
1829        }
1830    }
1831    None
1832}
1833
1834/// Check for detached tokio tasks.
1835fn check_detached_tokio_spawn(
1836    rule: &APIRule,
1837    file: &str,
1838    line: u32,
1839    line_text: &str,
1840) -> Option<MisuseFinding> {
1841    if line_text.contains("tokio::spawn(")
1842        && !line_text.contains('=')
1843        && !line_text.contains("handles.push")
1844    {
1845        let column = line_text.find("tokio::spawn(").unwrap_or(0) as u32;
1846        return Some(MisuseFinding {
1847            file: file.to_string(),
1848            line,
1849            column,
1850            rule: rule.clone(),
1851            api_call: "tokio::spawn".to_string(),
1852            message: "tokio::spawn used without keeping JoinHandle".to_string(),
1853            fix_suggestion: "Store JoinHandle values and await them to surface task errors"
1854                .to_string(),
1855            code_context: line_text.to_string(),
1856        });
1857    }
1858    None
1859}
1860
1861/// Check for map iteration order assumptions.
1862fn check_hashmap_order_dependence(
1863    rule: &APIRule,
1864    file: &str,
1865    line: u32,
1866    line_text: &str,
1867    rust_ctx: &RustLineContext<'_>,
1868) -> Option<MisuseFinding> {
1869    let looks_like_hashmap_iteration = line_text.contains(".iter()")
1870        && (line_text.contains("for ") || rust_ctx.previous_line.starts_with("for "))
1871        && rust_ctx.file_has_hashmap;
1872    if looks_like_hashmap_iteration {
1873        let column = line_text.find(".iter()").unwrap_or(0) as u32;
1874        return Some(MisuseFinding {
1875            file: file.to_string(),
1876            line,
1877            column,
1878            rule: rule.clone(),
1879            api_call: "HashMap::iter".to_string(),
1880            message: "Potential logic dependence on HashMap iteration order".to_string(),
1881            fix_suggestion: "Use BTreeMap/IndexMap or sort keys before ordered operations"
1882                .to_string(),
1883            code_context: line_text.to_string(),
1884        });
1885    }
1886    None
1887}
1888
1889/// Check for clone usage in loop bodies.
1890fn check_clone_in_hot_loop(
1891    rule: &APIRule,
1892    file: &str,
1893    line: u32,
1894    line_text: &str,
1895    rust_ctx: &RustLineContext<'_>,
1896) -> Option<MisuseFinding> {
1897    if line_text.contains(".clone()")
1898        && (line_text.contains("for ") || line_text.contains("while ") || rust_ctx.previous_is_loop)
1899    {
1900        let column = line_text.find(".clone()").unwrap_or(0) as u32;
1901        return Some(MisuseFinding {
1902            file: file.to_string(),
1903            line,
1904            column,
1905            rule: rule.clone(),
1906            api_call: "clone".to_string(),
1907            message: "clone() in loop context may create avoidable allocation overhead".to_string(),
1908            fix_suggestion: "Prefer borrowing/references or move semantics inside hot loops"
1909                .to_string(),
1910            code_context: line_text.to_string(),
1911        });
1912    }
1913    None
1914}
1915
1916// =============================================================================
1917// Filtering
1918// =============================================================================
1919
1920/// Filter findings by category and severity
1921fn filter_findings(
1922    findings: Vec<MisuseFinding>,
1923    categories: Option<&[MisuseCategory]>,
1924    severities: Option<&[MisuseSeverity]>,
1925) -> Vec<MisuseFinding> {
1926    findings
1927        .into_iter()
1928        .filter(|f| {
1929            // Category filter
1930            if let Some(cats) = categories {
1931                if !cats.contains(&f.rule.category) {
1932                    return false;
1933                }
1934            }
1935
1936            // Severity filter
1937            if let Some(sevs) = severities {
1938                if !sevs.contains(&f.rule.severity) {
1939                    return false;
1940                }
1941            }
1942
1943            true
1944        })
1945        .collect()
1946}
1947
1948// =============================================================================
1949// Summary Building
1950// =============================================================================
1951
1952/// Build summary from findings
1953fn build_summary(findings: &[MisuseFinding], files_scanned: u32) -> APICheckSummary {
1954    let mut by_category: HashMap<String, u32> = HashMap::new();
1955    let mut by_severity: HashMap<String, u32> = HashMap::new();
1956    let mut apis_checked: Vec<String> = Vec::new();
1957
1958    for finding in findings {
1959        // Count by category
1960        let cat_str = format!("{:?}", finding.rule.category).to_lowercase();
1961        *by_category.entry(cat_str).or_insert(0) += 1;
1962
1963        // Count by severity
1964        let sev_str = format!("{:?}", finding.rule.severity).to_lowercase();
1965        *by_severity.entry(sev_str).or_insert(0) += 1;
1966
1967        // Track APIs
1968        if !apis_checked.contains(&finding.api_call) {
1969            apis_checked.push(finding.api_call.clone());
1970        }
1971    }
1972
1973    APICheckSummary {
1974        total_findings: findings.len() as u32,
1975        by_category,
1976        by_severity,
1977        apis_checked,
1978        files_scanned,
1979    }
1980}
1981
1982// =============================================================================
1983// Output Formatting
1984// =============================================================================
1985
1986/// Format report as human-readable text
1987fn format_api_check_text(report: &APICheckReport) -> String {
1988    let mut output = String::new();
1989
1990    output.push_str("=== API Check Report ===\n\n");
1991
1992    // Summary
1993    output.push_str(&format!(
1994        "Files scanned: {}\n",
1995        report.summary.files_scanned
1996    ));
1997    output.push_str(&format!("Rules applied: {}\n", report.rules_applied));
1998    output.push_str(&format!(
1999        "Total findings: {}\n\n",
2000        report.summary.total_findings
2001    ));
2002
2003    // By severity
2004    if !report.summary.by_severity.is_empty() {
2005        output.push_str("By Severity:\n");
2006        for (severity, count) in &report.summary.by_severity {
2007            output.push_str(&format!("  {}: {}\n", severity, count));
2008        }
2009        output.push('\n');
2010    }
2011
2012    // By category
2013    if !report.summary.by_category.is_empty() {
2014        output.push_str("By Category:\n");
2015        for (category, count) in &report.summary.by_category {
2016            output.push_str(&format!("  {}: {}\n", category, count));
2017        }
2018        output.push('\n');
2019    }
2020
2021    // Findings
2022    if !report.findings.is_empty() {
2023        output.push_str("Findings:\n");
2024        output.push_str(&"-".repeat(60));
2025        output.push('\n');
2026
2027        for finding in &report.findings {
2028            output.push_str(&format!(
2029                "[{:?}] {} ({})\n",
2030                finding.rule.severity, finding.rule.name, finding.rule.id
2031            ));
2032            output.push_str(&format!(
2033                "  Location: {}:{}:{}\n",
2034                finding.file, finding.line, finding.column
2035            ));
2036            output.push_str(&format!("  API: {}\n", finding.api_call));
2037            output.push_str(&format!("  Message: {}\n", finding.message));
2038            output.push_str(&format!("  Fix: {}\n", finding.fix_suggestion));
2039            if !finding.code_context.is_empty() {
2040                output.push_str(&format!("  Context: {}\n", finding.code_context.trim()));
2041            }
2042            output.push('\n');
2043        }
2044    } else {
2045        output.push_str("No API misuse patterns detected.\n");
2046    }
2047
2048    output
2049}
2050
2051// =============================================================================
2052// Tests
2053// =============================================================================
2054
2055#[cfg(test)]
2056mod tests {
2057    use super::*;
2058    use tempfile::TempDir;
2059
2060    #[test]
2061    fn test_python_rules_defined() {
2062        let rules = python_rules();
2063        assert!(!rules.is_empty());
2064        assert!(rules.iter().any(|r| r.id == "PY001")); // missing-timeout
2065        assert!(rules.iter().any(|r| r.id == "PY002")); // bare-except
2066        assert!(rules.iter().any(|r| r.id == "PY003")); // weak-hash-md5
2067        assert!(rules.iter().any(|r| r.id == "PY005")); // unclosed-file
2068    }
2069
2070    #[test]
2071    fn test_rust_rules_defined() {
2072        let rules = rust_rules();
2073        assert!(!rules.is_empty());
2074        assert!(rules.iter().any(|r| r.id == "RS001"));
2075        assert!(rules.iter().any(|r| r.id == "RS002"));
2076        assert!(rules.iter().any(|r| r.id == "RS003"));
2077        assert!(rules.iter().any(|r| r.id == "RS004"));
2078        assert!(rules.iter().any(|r| r.id == "RS005"));
2079        assert!(rules.iter().any(|r| r.id == "RS006"));
2080    }
2081
2082    #[test]
2083    fn test_all_supported_languages_have_rules() {
2084        for language in all_api_languages() {
2085            let rules = rules_for_language(*language);
2086            assert!(
2087                !rules.is_empty(),
2088                "expected at least one api-check rule for {:?}",
2089                language
2090            );
2091        }
2092    }
2093
2094    #[test]
2095    fn test_detect_language_extended_extensions() {
2096        let cases = [
2097            ("main.go", ApiLanguage::Go),
2098            ("Main.java", ApiLanguage::Java),
2099            ("app.js", ApiLanguage::JavaScript),
2100            ("component.tsx", ApiLanguage::TypeScript),
2101            ("main.c", ApiLanguage::C),
2102            ("main.cpp", ApiLanguage::Cpp),
2103            ("app.rb", ApiLanguage::Ruby),
2104            ("index.php", ApiLanguage::Php),
2105            ("Main.kt", ApiLanguage::Kotlin),
2106            ("main.swift", ApiLanguage::Swift),
2107            ("Program.cs", ApiLanguage::CSharp),
2108            ("Main.scala", ApiLanguage::Scala),
2109            ("app.ex", ApiLanguage::Elixir),
2110            ("main.lua", ApiLanguage::Lua),
2111            ("game.luau", ApiLanguage::Luau),
2112            ("main.ml", ApiLanguage::Ocaml),
2113        ];
2114
2115        for (path, expected) in cases {
2116            assert_eq!(detect_language(Path::new(path)), Some(expected), "{path}");
2117        }
2118    }
2119
2120    #[test]
2121    fn test_check_missing_timeout() {
2122        let rule = &python_rules()[0]; // PY001
2123
2124        // Should detect
2125        let finding = check_missing_timeout(rule, "test.py", 1, "response = requests.get(url)");
2126        assert!(finding.is_some());
2127
2128        // Should not detect (has timeout)
2129        let finding = check_missing_timeout(
2130            rule,
2131            "test.py",
2132            1,
2133            "response = requests.get(url, timeout=30)",
2134        );
2135        assert!(finding.is_none());
2136    }
2137
2138    #[test]
2139    fn test_check_bare_except() {
2140        let rule = &python_rules()[1]; // PY002
2141
2142        // Should detect
2143        let finding = check_bare_except(rule, "test.py", 1, "except:");
2144        assert!(finding.is_some());
2145
2146        // Should not detect (has exception type)
2147        let finding = check_bare_except(rule, "test.py", 1, "except Exception:");
2148        assert!(finding.is_none());
2149    }
2150
2151    #[test]
2152    fn test_check_md5_usage() {
2153        let rule = &python_rules()[2]; // PY003
2154
2155        // Should detect
2156        let finding = check_md5_usage(rule, "test.py", 1, "hash = hashlib.md5(data)");
2157        assert!(finding.is_some());
2158
2159        // Should not detect
2160        let finding = check_md5_usage(rule, "test.py", 1, "hash = hashlib.sha256(data)");
2161        assert!(finding.is_none());
2162    }
2163
2164    #[test]
2165    fn test_check_unclosed_file() {
2166        let rule = &python_rules()[4]; // PY005
2167
2168        // Should detect
2169        let finding = check_unclosed_file(rule, "test.py", 1, "f = open('data.txt')");
2170        assert!(finding.is_some());
2171
2172        // Should not detect (using context manager)
2173        let finding = check_unclosed_file(rule, "test.py", 1, "with open('data.txt') as f:");
2174        assert!(finding.is_none());
2175    }
2176
2177    #[test]
2178    fn test_filter_by_category() {
2179        let findings = vec![
2180            MisuseFinding {
2181                file: "test.py".to_string(),
2182                line: 1,
2183                column: 0,
2184                rule: APIRule {
2185                    id: "PY001".to_string(),
2186                    name: "test".to_string(),
2187                    category: MisuseCategory::Parameters,
2188                    severity: MisuseSeverity::High,
2189                    description: "test".to_string(),
2190                    correct_usage: "test".to_string(),
2191                },
2192                api_call: "test".to_string(),
2193                message: "test".to_string(),
2194                fix_suggestion: "test".to_string(),
2195                code_context: "test".to_string(),
2196            },
2197            MisuseFinding {
2198                file: "test.py".to_string(),
2199                line: 2,
2200                column: 0,
2201                rule: APIRule {
2202                    id: "PY003".to_string(),
2203                    name: "test".to_string(),
2204                    category: MisuseCategory::Crypto,
2205                    severity: MisuseSeverity::High,
2206                    description: "test".to_string(),
2207                    correct_usage: "test".to_string(),
2208                },
2209                api_call: "test".to_string(),
2210                message: "test".to_string(),
2211                fix_suggestion: "test".to_string(),
2212                code_context: "test".to_string(),
2213            },
2214        ];
2215
2216        let filtered = filter_findings(findings, Some(&[MisuseCategory::Crypto]), None);
2217        assert_eq!(filtered.len(), 1);
2218        assert_eq!(filtered[0].rule.category, MisuseCategory::Crypto);
2219    }
2220
2221    #[test]
2222    fn test_build_summary() {
2223        let findings = vec![MisuseFinding {
2224            file: "test.py".to_string(),
2225            line: 1,
2226            column: 0,
2227            rule: APIRule {
2228                id: "PY001".to_string(),
2229                name: "test".to_string(),
2230                category: MisuseCategory::Parameters,
2231                severity: MisuseSeverity::High,
2232                description: "test".to_string(),
2233                correct_usage: "test".to_string(),
2234            },
2235            api_call: "requests.get".to_string(),
2236            message: "test".to_string(),
2237            fix_suggestion: "test".to_string(),
2238            code_context: "test".to_string(),
2239        }];
2240
2241        let summary = build_summary(&findings, 5);
2242        assert_eq!(summary.total_findings, 1);
2243        assert_eq!(summary.files_scanned, 5);
2244        assert!(summary.apis_checked.contains(&"requests.get".to_string()));
2245    }
2246
2247    #[test]
2248    fn test_collect_files_includes_rust() {
2249        let temp = TempDir::new().unwrap();
2250        let py = temp.path().join("a.py");
2251        let rs = temp.path().join("b.rs");
2252        let go = temp.path().join("c.go");
2253        let txt = temp.path().join("c.txt");
2254        fs::write(&py, "print('ok')").unwrap();
2255        fs::write(&rs, "fn main() {}").unwrap();
2256        fs::write(&go, "package main").unwrap();
2257        fs::write(&txt, "ignore").unwrap();
2258
2259        let files = collect_files(temp.path()).unwrap();
2260        assert!(files.iter().any(|f| f.ends_with("a.py")));
2261        assert!(files.iter().any(|f| f.ends_with("b.rs")));
2262        assert!(files.iter().any(|f| f.ends_with("c.go")));
2263        assert!(!files.iter().any(|f| f.ends_with("c.txt")));
2264    }
2265
2266    #[test]
2267    fn test_check_mutex_lock_unwrap() {
2268        let rule = &rust_rules()[0];
2269        let finding =
2270            check_mutex_lock_unwrap(rule, "lib.rs", 10, "let guard = shared.lock().unwrap();");
2271        assert!(finding.is_some());
2272    }
2273
2274    #[test]
2275    fn test_check_file_open_without_context() {
2276        let rule = &rust_rules()[1];
2277        let finding = check_file_open_without_context(rule, "lib.rs", 8, "let f = File::open(p)?;");
2278        assert!(finding.is_some());
2279
2280        let contextual = check_file_open_without_context(
2281            rule,
2282            "lib.rs",
2283            9,
2284            "let f = File::open(p).with_context(|| \"open\".to_string())?;",
2285        );
2286        assert!(contextual.is_none());
2287    }
2288
2289    #[test]
2290    fn test_check_unbounded_with_capacity() {
2291        let rule = &rust_rules()[2];
2292        let finding =
2293            check_unbounded_with_capacity(rule, "lib.rs", 12, "let v = Vec::with_capacity(len);");
2294        assert!(finding.is_some());
2295
2296        let bounded =
2297            check_unbounded_with_capacity(rule, "lib.rs", 13, "let v = Vec::with_capacity(256);");
2298        assert!(bounded.is_none());
2299    }
2300
2301    #[test]
2302    fn test_check_tokio_spawn_detached() {
2303        let rule = &rust_rules()[3];
2304        let detached = check_detached_tokio_spawn(
2305            rule,
2306            "lib.rs",
2307            3,
2308            "tokio::spawn(async move { work().await; });",
2309        );
2310        let tracked = check_detached_tokio_spawn(
2311            rule,
2312            "lib.rs",
2313            4,
2314            "let handle = tokio::spawn(async move { work().await; });",
2315        );
2316        assert!(detached.is_some());
2317        assert!(tracked.is_none());
2318    }
2319
2320    #[test]
2321    fn test_check_hashmap_order_dependence() {
2322        let rule = &rust_rules()[4];
2323        let ctx = RustLineContext {
2324            file_has_hashmap: true,
2325            previous_line: "for (k, v) in map",
2326            previous_is_loop: true,
2327        };
2328        let finding = check_hashmap_order_dependence(rule, "lib.rs", 12, "    .iter()", &ctx);
2329        assert!(finding.is_some());
2330    }
2331
2332    #[test]
2333    fn test_check_clone_in_hot_loop() {
2334        let rule = &rust_rules()[5];
2335        let ctx = RustLineContext {
2336            file_has_hashmap: false,
2337            previous_line: "for item in items {",
2338            previous_is_loop: true,
2339        };
2340        let finding = check_clone_in_hot_loop(rule, "lib.rs", 20, "value.clone()", &ctx);
2341        assert!(finding.is_some());
2342    }
2343
2344    fn assert_language_findings(
2345        filename: &str,
2346        language: ApiLanguage,
2347        source: &str,
2348        expected_rule_id: &str,
2349    ) {
2350        let temp = TempDir::new().unwrap();
2351        let path = temp.path().join(filename);
2352        fs::write(&path, source).unwrap();
2353        let rules = rules_for_language(language);
2354        let findings = analyze_file(&path, &rules, language).unwrap();
2355        assert!(
2356            findings
2357                .iter()
2358                .any(|finding| finding.rule.id == expected_rule_id),
2359            "expected {expected_rule_id} for {filename}, got {:?}",
2360            findings
2361                .iter()
2362                .map(|f| f.rule.id.clone())
2363                .collect::<Vec<_>>()
2364        );
2365    }
2366
2367    #[test]
2368    fn test_extended_language_rule_detection() {
2369        let cases = [
2370            (
2371                "main.go",
2372                ApiLanguage::Go,
2373                "data, _ := ioutil.ReadFile(path)",
2374                "GO001",
2375            ),
2376            (
2377                "Main.java",
2378                ApiLanguage::Java,
2379                "if (name == otherName) { }",
2380                "JV001",
2381            ),
2382            ("app.js", ApiLanguage::JavaScript, "if (a == b) {}", "JS001"),
2383            ("app.ts", ApiLanguage::TypeScript, "if (a == b) {}", "TS001"),
2384            ("main.c", ApiLanguage::C, "gets(buffer);", "C001"),
2385            (
2386                "main.cpp",
2387                ApiLanguage::Cpp,
2388                "std::auto_ptr<Foo> p;",
2389                "CPP003",
2390            ),
2391            ("app.rb", ApiLanguage::Ruby, "eval(params[:code])", "RB001"),
2392            (
2393                "index.php",
2394                ApiLanguage::Php,
2395                "unserialize($payload);",
2396                "PH005",
2397            ),
2398            ("Main.kt", ApiLanguage::Kotlin, "val name = user!!", "KT001"),
2399            (
2400                "main.swift",
2401                ApiLanguage::Swift,
2402                "let name = value!",
2403                "SW003",
2404            ),
2405            (
2406                "Program.cs",
2407                ApiLanguage::CSharp,
2408                "var x = task.Result;",
2409                "CS003",
2410            ),
2411            (
2412                "Main.scala",
2413                ApiLanguage::Scala,
2414                "val casted = value.asInstanceOf[String]",
2415                "SC002",
2416            ),
2417            (
2418                "app.ex",
2419                ApiLanguage::Elixir,
2420                "String.to_atom(param)",
2421                "EX001",
2422            ),
2423            ("main.lua", ApiLanguage::Lua, "value = 1", "LU001"),
2424            ("game.luau", ApiLanguage::Luau, "os.execute(cmd)", "LU003"),
2425            ("main.ml", ApiLanguage::Ocaml, "Obj.magic value", "OC004"),
2426        ];
2427
2428        for (filename, language, source, expected_rule_id) in cases {
2429            assert_language_findings(filename, language, source, expected_rule_id);
2430        }
2431    }
2432}