tldr_cli/commands/remaining/
api_check.rs

1//! API Check command - Detect API misuse patterns
2//!
3//! Analyzes Python code for common API misuse patterns:
4//! - Timeout issues (requests.get without timeout)
5//! - Bare except clauses (catching all exceptions)
6//! - Weak crypto (MD5, SHA1 for security purposes)
7//! - Unclosed resources (files not using context managers)
8//!
9//! # Example
10//!
11//! ```bash
12//! tldr api-check src/
13//! tldr api-check src/main.py --category crypto
14//! tldr api-check src/ --severity high --format text
15//! ```
16
17use std::collections::HashMap;
18use std::fs;
19use std::path::{Path, PathBuf};
20
21use anyhow::Result;
22use clap::Args;
23use regex::Regex;
24use walkdir::WalkDir;
25
26use super::error::RemainingError;
27use super::types::{
28    APICheckReport, APICheckSummary, APIRule, MisuseCategory, MisuseFinding, MisuseSeverity,
29};
30
31use crate::output::OutputWriter;
32
33// =============================================================================
34// Constants
35// =============================================================================
36
37/// Maximum files to analyze in a directory
38const MAX_DIRECTORY_FILES: u32 = 1000;
39
40/// Maximum file size to analyze (10 MB)
41const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub(crate) enum ApiLanguage {
45    Python,
46    Rust,
47    Go,
48    Java,
49    JavaScript,
50    TypeScript,
51    C,
52    Cpp,
53    Ruby,
54    Php,
55    Kotlin,
56    Swift,
57    CSharp,
58    Scala,
59    Elixir,
60    Lua,
61    Luau,
62    Ocaml,
63}
64
65#[derive(Clone, Copy)]
66struct RegexRuleSpec {
67    id: &'static str,
68    name: &'static str,
69    category: MisuseCategory,
70    severity: MisuseSeverity,
71    description: &'static str,
72    correct_usage: &'static str,
73    pattern: &'static str,
74    api_call: &'static str,
75    message: &'static str,
76    fix_suggestion: &'static str,
77}
78
79impl RegexRuleSpec {
80    fn rule(self) -> APIRule {
81        APIRule {
82            id: self.id.to_string(),
83            name: self.name.to_string(),
84            category: self.category,
85            severity: self.severity,
86            description: self.description.to_string(),
87            correct_usage: self.correct_usage.to_string(),
88        }
89    }
90}
91
92const GO_RULE_SPECS: &[RegexRuleSpec] = &[
93    RegexRuleSpec {
94        id: "GO001",
95        name: "deprecated-ioutil-readfile",
96        category: MisuseCategory::Resources,
97        severity: MisuseSeverity::Low,
98        description: "ioutil.ReadFile is deprecated and encourages unbounded whole-file reads",
99        correct_usage: "Use os.ReadFile or stream with bufio.Scanner/Reader",
100        pattern: r"\bioutil\.ReadFile\s*\(",
101        api_call: "ioutil.ReadFile",
102        message: "ioutil.ReadFile is deprecated and can load unbounded content into memory",
103        fix_suggestion: "Use os.ReadFile for simple reads or bufio.Reader for bounded streaming",
104    },
105    RegexRuleSpec {
106        id: "GO002",
107        name: "http-get-without-timeout",
108        category: MisuseCategory::Parameters,
109        severity: MisuseSeverity::Medium,
110        description: "http.Get uses the default client and provides no call-specific timeout",
111        correct_usage: "Use an http.Client with Timeout or context-aware requests",
112        pattern: r"\bhttp\.Get\s*\(",
113        api_call: "http.Get",
114        message: "http.Get without an explicit timeout can hang indefinitely",
115        fix_suggestion: "Use an http.Client{Timeout: ...} or NewRequestWithContext",
116    },
117    RegexRuleSpec {
118        id: "GO003",
119        name: "exec-command",
120        category: MisuseCategory::Security,
121        severity: MisuseSeverity::High,
122        description: "exec.Command is risky when arguments or executable names come from input",
123        correct_usage: "Prefer direct library APIs or strictly validate allowed commands",
124        pattern: r"\bexec\.Command\s*\(",
125        api_call: "exec.Command",
126        message: "exec.Command can enable command injection when fed user-controlled values",
127        fix_suggestion: "Validate commands against an allowlist and avoid shell-like execution",
128    },
129    RegexRuleSpec {
130        id: "GO004",
131        name: "template-html-cast",
132        category: MisuseCategory::Security,
133        severity: MisuseSeverity::High,
134        description: "template.HTML bypasses html/template escaping guarantees",
135        correct_usage: "Pass plain strings to templates and let html/template escape them",
136        pattern: r"\btemplate\.HTML\s*\(",
137        api_call: "template.HTML",
138        message: "template.HTML disables escaping and can introduce XSS",
139        fix_suggestion: "Remove the cast and rely on html/template auto-escaping",
140    },
141    RegexRuleSpec {
142        id: "GO005",
143        name: "sql-query-without-context",
144        category: MisuseCategory::CallOrder,
145        severity: MisuseSeverity::Medium,
146        description: "sql.DB.Query lacks cancellation and timeout propagation compared with QueryContext",
147        correct_usage: "Use db.QueryContext(ctx, query, args...)",
148        pattern: r"\bsql\.Query\s*\(",
149        api_call: "sql.Query",
150        message: "sql.Query omits context-driven cancellation and timeout handling",
151        fix_suggestion: "Use QueryContext/ExecContext with a bounded context",
152    },
153];
154
155const JAVA_RULE_SPECS: &[RegexRuleSpec] = &[
156    RegexRuleSpec {
157        id: "JV001",
158        name: "string-comparison-with-double-equals",
159        category: MisuseCategory::CallOrder,
160        severity: MisuseSeverity::Medium,
161        description: "Using == on strings compares references instead of values",
162        correct_usage: "Use value.equals(other) or Objects.equals(a, b)",
163        pattern: r#"(?:".*"|\b\w+\b)\s*==\s*(?:".*"|\b\w+\b)"#,
164        api_call: "==",
165        message: "String comparison with == checks reference identity, not value equality",
166        fix_suggestion: "Use .equals(...) or Objects.equals(...) for string values",
167    },
168    RegexRuleSpec {
169        id: "JV002",
170        name: "runtime-exec",
171        category: MisuseCategory::Security,
172        severity: MisuseSeverity::High,
173        description: "Runtime.exec is dangerous with dynamic input and hard to sandbox correctly",
174        correct_usage: "Use structured APIs or a ProcessBuilder with validated arguments",
175        pattern: r"\bRuntime\.getRuntime\(\)\.exec\s*\(",
176        api_call: "Runtime.exec",
177        message: "Runtime.exec is a common command injection footgun",
178        fix_suggestion: "Prefer library APIs or tightly validated ProcessBuilder arguments",
179    },
180    RegexRuleSpec {
181        id: "JV003",
182        name: "objectinputstream-deserialization",
183        category: MisuseCategory::Security,
184        severity: MisuseSeverity::High,
185        description: "ObjectInputStream on untrusted data can trigger unsafe deserialization gadgets",
186        correct_usage: "Use safer formats like JSON with explicit schemas",
187        pattern: r"\bnew\s+ObjectInputStream\s*\(",
188        api_call: "ObjectInputStream",
189        message: "ObjectInputStream enables unsafe native Java deserialization",
190        fix_suggestion: "Replace native object deserialization with a schema-driven format",
191    },
192    RegexRuleSpec {
193        id: "JV004",
194        name: "create-statement",
195        category: MisuseCategory::Security,
196        severity: MisuseSeverity::Medium,
197        description: "createStatement often leads to string-built SQL instead of prepared statements",
198        correct_usage: "Use prepareStatement with placeholders",
199        pattern: r"\bcreateStatement\s*\(",
200        api_call: "createStatement",
201        message: "createStatement encourages dynamic SQL and weak parameter handling",
202        fix_suggestion: "Use prepareStatement with bound parameters",
203    },
204    RegexRuleSpec {
205        id: "JV005",
206        name: "system-gc-call",
207        category: MisuseCategory::Resources,
208        severity: MisuseSeverity::Low,
209        description: "System.gc() is usually a performance smell and not a reliable memory fix",
210        correct_usage: "Remove manual GC triggers and profile allocations instead",
211        pattern: r"\bSystem\.gc\s*\(",
212        api_call: "System.gc",
213        message: "System.gc() is an unreliable manual GC hint and often harms latency",
214        fix_suggestion: "Remove the call and fix the underlying allocation or lifetime issue",
215    },
216];
217
218const JAVASCRIPT_RULE_SPECS: &[RegexRuleSpec] = &[
219    RegexRuleSpec {
220        id: "JS001",
221        name: "loose-equality",
222        category: MisuseCategory::CallOrder,
223        severity: MisuseSeverity::Medium,
224        description: "Loose equality allows coercions that frequently hide correctness bugs",
225        correct_usage: "Use === / !== except in deliberately reviewed coercion cases",
226        pattern: r"\s==\s|\s!=\s",
227        api_call: "==",
228        message: "Loose equality can coerce values unexpectedly",
229        fix_suggestion: "Use === or !== and handle explicit type conversion",
230    },
231    RegexRuleSpec {
232        id: "JS002",
233        name: "parseint-without-radix",
234        category: MisuseCategory::Parameters,
235        severity: MisuseSeverity::Low,
236        description: "parseInt without a radix is ambiguous and less explicit than required",
237        correct_usage: "Use parseInt(value, 10)",
238        pattern: r"\bparseInt\s*\(\s*[^,\)]+\)",
239        api_call: "parseInt",
240        message: "parseInt called without an explicit radix",
241        fix_suggestion: "Pass a radix explicitly, usually parseInt(value, 10)",
242    },
243    RegexRuleSpec {
244        id: "JS003",
245        name: "json-parse-without-guard",
246        category: MisuseCategory::ErrorHandling,
247        severity: MisuseSeverity::Low,
248        description: "JSON.parse throws on malformed input and should usually be guarded",
249        correct_usage: "Wrap JSON.parse in try/catch when input is not fully trusted",
250        pattern: r"\bJSON\.parse\s*\(",
251        api_call: "JSON.parse",
252        message: "JSON.parse can throw and should be guarded for untrusted input",
253        fix_suggestion: "Use try/catch or validated parsing for untrusted payloads",
254    },
255    RegexRuleSpec {
256        id: "JS004",
257        name: "document-write",
258        category: MisuseCategory::Security,
259        severity: MisuseSeverity::High,
260        description: "document.write is legacy, brittle, and can inject unsanitized HTML",
261        correct_usage: "Use DOM APIs like textContent/appendChild instead",
262        pattern: r"\bdocument\.write(?:ln)?\s*\(",
263        api_call: "document.write",
264        message: "document.write is unsafe and can enable XSS",
265        fix_suggestion: "Use safe DOM APIs instead of writing raw HTML strings",
266    },
267    RegexRuleSpec {
268        id: "JS005",
269        name: "eval-call",
270        category: MisuseCategory::Security,
271        severity: MisuseSeverity::High,
272        description: "eval executes dynamic code and should be avoided",
273        correct_usage: "Use structured data parsing or explicit dispatch tables",
274        pattern: r"\beval\s*\(",
275        api_call: "eval",
276        message: "eval executes dynamic code and creates major security risk",
277        fix_suggestion: "Replace eval with data parsing or explicit function dispatch",
278    },
279];
280
281const TYPESCRIPT_RULE_SPECS: &[RegexRuleSpec] = &[
282    RegexRuleSpec {
283        id: "TS001",
284        name: "loose-equality",
285        category: MisuseCategory::CallOrder,
286        severity: MisuseSeverity::Medium,
287        description: "Loose equality allows coercions that frequently hide correctness bugs",
288        correct_usage: "Use === / !== except in deliberately reviewed coercion cases",
289        pattern: r"\s==\s|\s!=\s",
290        api_call: "==",
291        message: "Loose equality can coerce values unexpectedly",
292        fix_suggestion: "Use === or !== and handle explicit type conversion",
293    },
294    RegexRuleSpec {
295        id: "TS002",
296        name: "parseint-without-radix",
297        category: MisuseCategory::Parameters,
298        severity: MisuseSeverity::Low,
299        description: "parseInt without a radix is ambiguous and less explicit than required",
300        correct_usage: "Use parseInt(value, 10)",
301        pattern: r"\bparseInt\s*\(\s*[^,\)]+\)",
302        api_call: "parseInt",
303        message: "parseInt called without an explicit radix",
304        fix_suggestion: "Pass a radix explicitly, usually parseInt(value, 10)",
305    },
306    RegexRuleSpec {
307        id: "TS003",
308        name: "json-parse-without-guard",
309        category: MisuseCategory::ErrorHandling,
310        severity: MisuseSeverity::Low,
311        description: "JSON.parse throws on malformed input and should usually be guarded",
312        correct_usage: "Wrap JSON.parse in try/catch when input is not fully trusted",
313        pattern: r"\bJSON\.parse\s*\(",
314        api_call: "JSON.parse",
315        message: "JSON.parse can throw and should be guarded for untrusted input",
316        fix_suggestion: "Use try/catch or validated parsing for untrusted payloads",
317    },
318    RegexRuleSpec {
319        id: "TS004",
320        name: "document-write",
321        category: MisuseCategory::Security,
322        severity: MisuseSeverity::High,
323        description: "document.write is legacy, brittle, and can inject unsanitized HTML",
324        correct_usage: "Use DOM APIs like textContent/appendChild instead",
325        pattern: r"\bdocument\.write(?:ln)?\s*\(",
326        api_call: "document.write",
327        message: "document.write is unsafe and can enable XSS",
328        fix_suggestion: "Use safe DOM APIs instead of writing raw HTML strings",
329    },
330    RegexRuleSpec {
331        id: "TS005",
332        name: "eval-call",
333        category: MisuseCategory::Security,
334        severity: MisuseSeverity::High,
335        description: "eval executes dynamic code and should be avoided",
336        correct_usage: "Use structured data parsing or explicit dispatch tables",
337        pattern: r"\beval\s*\(",
338        api_call: "eval",
339        message: "eval executes dynamic code and creates major security risk",
340        fix_suggestion: "Replace eval with data parsing or explicit function dispatch",
341    },
342];
343
344const C_RULE_SPECS: &[RegexRuleSpec] = &[
345    RegexRuleSpec {
346        id: "C001",
347        name: "gets-call",
348        category: MisuseCategory::Security,
349        severity: MisuseSeverity::High,
350        description: "gets cannot bound input and has been removed from the standard library",
351        correct_usage: "Use fgets with an explicit buffer length",
352        pattern: r"\bgets\s*\(",
353        api_call: "gets",
354        message: "gets is inherently unsafe and enables buffer overflows",
355        fix_suggestion: "Use fgets(buffer, size, stdin) or another bounded API",
356    },
357    RegexRuleSpec {
358        id: "C002",
359        name: "strcpy-call",
360        category: MisuseCategory::Security,
361        severity: MisuseSeverity::High,
362        description: "strcpy performs unbounded copies and easily overflows buffers",
363        correct_usage: "Use snprintf, strlcpy, or explicit bounds checks",
364        pattern: r"\bstrcpy\s*\(",
365        api_call: "strcpy",
366        message: "strcpy performs an unbounded copy",
367        fix_suggestion: "Replace strcpy with a bounded copy strategy",
368    },
369    RegexRuleSpec {
370        id: "C003",
371        name: "sprintf-call",
372        category: MisuseCategory::Security,
373        severity: MisuseSeverity::High,
374        description: "sprintf writes formatted data without a size bound",
375        correct_usage: "Use snprintf with the destination buffer size",
376        pattern: r"\bsprintf\s*\(",
377        api_call: "sprintf",
378        message: "sprintf can overflow fixed-size buffers",
379        fix_suggestion: "Use snprintf(buffer, size, ...) instead",
380    },
381    RegexRuleSpec {
382        id: "C004",
383        name: "scanf-string-without-width",
384        category: MisuseCategory::Security,
385        severity: MisuseSeverity::High,
386        description: "scanf with %s and no width limit can overflow the destination buffer",
387        correct_usage: "Provide a width specifier or use fgets",
388        pattern: r#"\bscanf\s*\(\s*"%s"#,
389        api_call: "scanf",
390        message: "scanf(\"%s\") reads unbounded input into a buffer",
391        fix_suggestion: "Add a width limit or use fgets plus parsing",
392    },
393    RegexRuleSpec {
394        id: "C005",
395        name: "system-call",
396        category: MisuseCategory::Security,
397        severity: MisuseSeverity::High,
398        description: "system executes a shell command and is dangerous with dynamic input",
399        correct_usage: "Use execve-family APIs with validated arguments where possible",
400        pattern: r"\bsystem\s*\(",
401        api_call: "system",
402        message: "system executes a shell and is a common command injection vector",
403        fix_suggestion: "Avoid shell execution or tightly validate the command source",
404    },
405];
406
407const CPP_RULE_SPECS: &[RegexRuleSpec] = &[
408    RegexRuleSpec {
409        id: "CPP001",
410        name: "strcpy-call",
411        category: MisuseCategory::Security,
412        severity: MisuseSeverity::High,
413        description: "strcpy performs unbounded copies and easily overflows buffers",
414        correct_usage: "Use std::string, snprintf, or another bounded copy strategy",
415        pattern: r"\bstrcpy\s*\(",
416        api_call: "strcpy",
417        message: "strcpy performs an unbounded copy",
418        fix_suggestion: "Use std::string or a bounded copy API instead",
419    },
420    RegexRuleSpec {
421        id: "CPP002",
422        name: "sprintf-call",
423        category: MisuseCategory::Security,
424        severity: MisuseSeverity::High,
425        description: "sprintf writes formatted data without a size bound",
426        correct_usage: "Use snprintf or std::format into a bounded container",
427        pattern: r"\bsprintf\s*\(",
428        api_call: "sprintf",
429        message: "sprintf can overflow fixed-size buffers",
430        fix_suggestion: "Use snprintf or a safer formatting abstraction",
431    },
432    RegexRuleSpec {
433        id: "CPP003",
434        name: "auto-ptr",
435        category: MisuseCategory::Resources,
436        severity: MisuseSeverity::Medium,
437        description: "std::auto_ptr is obsolete and has broken transfer semantics",
438        correct_usage: "Use std::unique_ptr or std::shared_ptr",
439        pattern: r"\bstd::auto_ptr\s*<",
440        api_call: "std::auto_ptr",
441        message: "std::auto_ptr is obsolete and unsafe by modern ownership standards",
442        fix_suggestion: "Replace std::auto_ptr with std::unique_ptr or std::shared_ptr",
443    },
444    RegexRuleSpec {
445        id: "CPP004",
446        name: "raw-new",
447        category: MisuseCategory::Resources,
448        severity: MisuseSeverity::Medium,
449        description: "Raw new often leads to leaks and exception-safety issues",
450        correct_usage: "Use std::make_unique or stack allocation where possible",
451        pattern: r"\bnew\s+\w",
452        api_call: "new",
453        message: "Raw new makes ownership and exception safety harder to reason about",
454        fix_suggestion: "Use std::make_unique, containers, or stack allocation",
455    },
456    RegexRuleSpec {
457        id: "CPP005",
458        name: "system-call",
459        category: MisuseCategory::Security,
460        severity: MisuseSeverity::High,
461        description: "system executes a shell command and is dangerous with dynamic input",
462        correct_usage: "Use direct process APIs with validated arguments when possible",
463        pattern: r"(?:\bstd::)?system\s*\(",
464        api_call: "system",
465        message: "system executes a shell and is a common command injection vector",
466        fix_suggestion: "Avoid shell execution or tightly validate all command components",
467    },
468];
469
470const RUBY_RULE_SPECS: &[RegexRuleSpec] = &[
471    RegexRuleSpec {
472        id: "RB001",
473        name: "eval-call",
474        category: MisuseCategory::Security,
475        severity: MisuseSeverity::High,
476        description: "eval executes dynamic Ruby code and should be avoided",
477        correct_usage: "Use explicit dispatch or data parsing instead of dynamic code execution",
478        pattern: r"\beval\s*\(",
479        api_call: "eval",
480        message: "eval executes dynamic code and creates major security risk",
481        fix_suggestion: "Replace eval with explicit dispatch or structured parsing",
482    },
483    RegexRuleSpec {
484        id: "RB002",
485        name: "dynamic-send",
486        category: MisuseCategory::Security,
487        severity: MisuseSeverity::Medium,
488        description: "send can invoke arbitrary methods when fed untrusted method names",
489        correct_usage: "Use public_send on a strict allowlist of method names",
490        pattern: r"\.send\s*\(",
491        api_call: "send",
492        message: "send can dispatch to unsafe or unexpected methods",
493        fix_suggestion: "Use public_send with a reviewed allowlist",
494    },
495    RegexRuleSpec {
496        id: "RB003",
497        name: "system-call",
498        category: MisuseCategory::Security,
499        severity: MisuseSeverity::High,
500        description: "system executes a shell command and is dangerous with interpolated input",
501        correct_usage: "Use array-form process APIs with validated arguments",
502        pattern: r"\bsystem\s*\(",
503        api_call: "system",
504        message: "system is a common command injection footgun",
505        fix_suggestion: "Avoid shell execution or pass validated argv-style arguments",
506    },
507    RegexRuleSpec {
508        id: "RB004",
509        name: "yaml-load",
510        category: MisuseCategory::Security,
511        severity: MisuseSeverity::High,
512        description: "YAML.load can instantiate arbitrary objects from untrusted input",
513        correct_usage: "Use YAML.safe_load with permitted classes",
514        pattern: r"\bYAML\.load\s*\(",
515        api_call: "YAML.load",
516        message: "YAML.load can deserialize unsafe objects",
517        fix_suggestion: "Use YAML.safe_load and restrict allowed classes",
518    },
519    RegexRuleSpec {
520        id: "RB005",
521        name: "marshal-load",
522        category: MisuseCategory::Security,
523        severity: MisuseSeverity::High,
524        description: "Marshal.load on untrusted data is unsafe deserialization",
525        correct_usage: "Use JSON or another safe, schema-checked format",
526        pattern: r"\bMarshal\.load\s*\(",
527        api_call: "Marshal.load",
528        message: "Marshal.load performs unsafe native deserialization",
529        fix_suggestion: "Replace Marshal.load with a safer serialization format",
530    },
531];
532
533const PHP_RULE_SPECS: &[RegexRuleSpec] = &[
534    RegexRuleSpec {
535        id: "PH001",
536        name: "deprecated-mysql-functions",
537        category: MisuseCategory::Security,
538        severity: MisuseSeverity::High,
539        description: "mysql_* APIs are removed and encourage unsafe query construction",
540        correct_usage: "Use PDO or mysqli with prepared statements",
541        pattern: r"\bmysql_[a-z_]+\s*\(",
542        api_call: "mysql_*",
543        message: "mysql_* functions are removed and unsafe by modern standards",
544        fix_suggestion: "Migrate to PDO or mysqli prepared statements",
545    },
546    RegexRuleSpec {
547        id: "PH002",
548        name: "extract-call",
549        category: MisuseCategory::Security,
550        severity: MisuseSeverity::Medium,
551        description: "extract pollutes local scope and can overwrite important variables",
552        correct_usage: "Read array keys explicitly instead of splatting them into scope",
553        pattern: r"\bextract\s*\(",
554        api_call: "extract",
555        message: "extract can overwrite local variables and hide data flow",
556        fix_suggestion: "Assign required keys explicitly instead of using extract",
557    },
558    RegexRuleSpec {
559        id: "PH003",
560        name: "eval-call",
561        category: MisuseCategory::Security,
562        severity: MisuseSeverity::High,
563        description: "eval executes dynamic PHP code and should be avoided",
564        correct_usage: "Use explicit dispatch or data parsing instead of dynamic code execution",
565        pattern: r"\beval\s*\(",
566        api_call: "eval",
567        message: "eval executes dynamic code and creates major security risk",
568        fix_suggestion: "Replace eval with explicit dispatch or structured parsing",
569    },
570    RegexRuleSpec {
571        id: "PH004",
572        name: "variable-variables",
573        category: MisuseCategory::Security,
574        severity: MisuseSeverity::Medium,
575        description: "Variable variables make scope mutation hard to reason about",
576        correct_usage: "Use associative arrays or explicit variables instead",
577        pattern: r"\$\$[A-Za-z_]",
578        api_call: "$$",
579        message: "Variable variables obscure data flow and can enable unsafe access patterns",
580        fix_suggestion: "Use an array/map or explicit variable names instead",
581    },
582    RegexRuleSpec {
583        id: "PH005",
584        name: "unserialize-call",
585        category: MisuseCategory::Security,
586        severity: MisuseSeverity::High,
587        description: "unserialize on untrusted data can trigger object injection chains",
588        correct_usage: "Use json_decode or a safer schema-checked format",
589        pattern: r"\bunserialize\s*\(",
590        api_call: "unserialize",
591        message: "unserialize enables unsafe object deserialization",
592        fix_suggestion: "Replace unserialize with json_decode or a safe serializer",
593    },
594];
595
596const KOTLIN_RULE_SPECS: &[RegexRuleSpec] = &[
597    RegexRuleSpec {
598        id: "KT001",
599        name: "force-unwrapped-null",
600        category: MisuseCategory::ErrorHandling,
601        severity: MisuseSeverity::Medium,
602        description: "!! converts nullable values into runtime crashes",
603        correct_usage: "Use safe calls, let, requireNotNull, or explicit branching",
604        pattern: r"!!",
605        api_call: "!!",
606        message: "!! will throw NullPointerException on null values",
607        fix_suggestion: "Use safe calls or explicit null handling instead of !!",
608    },
609    RegexRuleSpec {
610        id: "KT002",
611        name: "lateinit-var",
612        category: MisuseCategory::ErrorHandling,
613        severity: MisuseSeverity::Low,
614        description: "lateinit shifts initialization failures to runtime",
615        correct_usage: "Prefer constructor injection or nullable/state wrappers",
616        pattern: r"\blateinit\s+var\b",
617        api_call: "lateinit",
618        message: "lateinit can fail at runtime if the property is read before initialization",
619        fix_suggestion: "Prefer constructor injection or explicit nullable state",
620    },
621    RegexRuleSpec {
622        id: "KT003",
623        name: "globalscope-launch",
624        category: MisuseCategory::Concurrency,
625        severity: MisuseSeverity::Medium,
626        description: "GlobalScope.launch escapes structured concurrency and leaks work",
627        correct_usage: "Launch from a lifecycle-bound CoroutineScope",
628        pattern: r"\bGlobalScope\.launch\s*\(",
629        api_call: "GlobalScope.launch",
630        message: "GlobalScope.launch detaches work from structured concurrency",
631        fix_suggestion: "Use a lifecycle-bound CoroutineScope instead",
632    },
633    RegexRuleSpec {
634        id: "KT004",
635        name: "runtime-exec",
636        category: MisuseCategory::Security,
637        severity: MisuseSeverity::High,
638        description: "Runtime.exec is dangerous with dynamic input and hard to sandbox correctly",
639        correct_usage: "Use structured APIs or strictly validated ProcessBuilder arguments",
640        pattern: r"\bRuntime\.getRuntime\(\)\.exec\s*\(",
641        api_call: "Runtime.exec",
642        message: "Runtime.exec is a common command injection footgun",
643        fix_suggestion: "Prefer library APIs or tightly validated ProcessBuilder arguments",
644    },
645    RegexRuleSpec {
646        id: "KT005",
647        name: "thread-sleep",
648        category: MisuseCategory::Concurrency,
649        severity: MisuseSeverity::Low,
650        description: "Thread.sleep blocks threads directly and is usually wrong in coroutine-based code",
651        correct_usage: "Use delay(...) in coroutines or higher-level scheduling",
652        pattern: r"\bThread\.sleep\s*\(",
653        api_call: "Thread.sleep",
654        message: "Thread.sleep blocks the current thread directly",
655        fix_suggestion: "Use delay(...) or a proper scheduler instead",
656    },
657];
658
659const SWIFT_RULE_SPECS: &[RegexRuleSpec] = &[
660    RegexRuleSpec {
661        id: "SW001",
662        name: "forced-cast",
663        category: MisuseCategory::ErrorHandling,
664        severity: MisuseSeverity::Medium,
665        description: "as! crashes at runtime when the cast fails",
666        correct_usage: "Use as? with conditional handling",
667        pattern: r"\bas!\b",
668        api_call: "as!",
669        message: "Forced casts crash when the runtime type is different",
670        fix_suggestion: "Use as? and handle the nil case explicitly",
671    },
672    RegexRuleSpec {
673        id: "SW002",
674        name: "forced-try",
675        category: MisuseCategory::ErrorHandling,
676        severity: MisuseSeverity::Medium,
677        description: "try! crashes when the call throws",
678        correct_usage: "Use do/catch or try? with explicit fallback",
679        pattern: r"\btry!\b",
680        api_call: "try!",
681        message: "try! crashes the process on thrown errors",
682        fix_suggestion: "Use do/catch or try? and handle failure explicitly",
683    },
684    RegexRuleSpec {
685        id: "SW003",
686        name: "force-unwrap",
687        category: MisuseCategory::ErrorHandling,
688        severity: MisuseSeverity::Medium,
689        description: "Force unwrapping optionals crashes at runtime on nil",
690        correct_usage: "Use if let, guard let, or nil-coalescing",
691        pattern: r"\b[A-Za-z_][A-Za-z0-9_]*!",
692        api_call: "!",
693        message: "Force unwraps crash when the optional is nil",
694        fix_suggestion: "Use optional binding or nil-coalescing instead of force unwraps",
695    },
696    RegexRuleSpec {
697        id: "SW004",
698        name: "nskeyedunarchiver",
699        category: MisuseCategory::Security,
700        severity: MisuseSeverity::High,
701        description: "Legacy NSKeyedUnarchiver APIs on untrusted data are unsafe",
702        correct_usage: "Use secure decoding APIs with requiresSecureCoding",
703        pattern: r"\bNSKeyedUnarchiver\.unarchiveObject",
704        api_call: "NSKeyedUnarchiver",
705        message: "Legacy unarchiving can deserialize unexpected object graphs",
706        fix_suggestion: "Use secure coding APIs and schema-checked decoding",
707    },
708    RegexRuleSpec {
709        id: "SW005",
710        name: "fatalerror-call",
711        category: MisuseCategory::ErrorHandling,
712        severity: MisuseSeverity::Low,
713        description: "fatalError terminates the process and is risky outside clearly impossible states",
714        correct_usage: "Return/throw recoverable errors where possible",
715        pattern: r"\bfatalError\s*\(",
716        api_call: "fatalError",
717        message: "fatalError terminates the process immediately",
718        fix_suggestion: "Use recoverable error handling unless the state is truly unreachable",
719    },
720];
721
722const CSHARP_RULE_SPECS: &[RegexRuleSpec] = &[
723    RegexRuleSpec {
724        id: "CS001",
725        name: "binaryformatter",
726        category: MisuseCategory::Security,
727        severity: MisuseSeverity::High,
728        description: "BinaryFormatter is insecure and obsolete for untrusted data",
729        correct_usage: "Use System.Text.Json or another safe serializer",
730        pattern: r"\bBinaryFormatter\b",
731        api_call: "BinaryFormatter",
732        message: "BinaryFormatter is insecure and should not be used",
733        fix_suggestion: "Use System.Text.Json or another safe serializer",
734    },
735    RegexRuleSpec {
736        id: "CS002",
737        name: "gc-collect",
738        category: MisuseCategory::Resources,
739        severity: MisuseSeverity::Low,
740        description: "GC.Collect is rarely the right fix and often harms latency",
741        correct_usage: "Remove manual GC triggers and profile the real allocation issue",
742        pattern: r"\bGC\.Collect\s*\(",
743        api_call: "GC.Collect",
744        message: "GC.Collect is an unreliable manual GC hint and often harms performance",
745        fix_suggestion: "Remove the call and fix the underlying allocation issue",
746    },
747    RegexRuleSpec {
748        id: "CS003",
749        name: "task-result",
750        category: MisuseCategory::Concurrency,
751        severity: MisuseSeverity::Medium,
752        description: "Task.Result blocks synchronously and can deadlock async flows",
753        correct_usage: "Use await instead of blocking on Task.Result",
754        pattern: r"\.Result\b",
755        api_call: "Task.Result",
756        message: "Task.Result blocks synchronously and can deadlock async contexts",
757        fix_suggestion: "Use await and keep the async chain asynchronous",
758    },
759    RegexRuleSpec {
760        id: "CS004",
761        name: "task-wait",
762        category: MisuseCategory::Concurrency,
763        severity: MisuseSeverity::Medium,
764        description: "Task.Wait blocks synchronously and can deadlock async flows",
765        correct_usage: "Use await or WhenAll/WhenAny instead of blocking waits",
766        pattern: r"\.Wait\s*\(",
767        api_call: "Task.Wait",
768        message: "Task.Wait blocks synchronously and can deadlock async contexts",
769        fix_suggestion: "Use await or asynchronous coordination primitives instead",
770    },
771    RegexRuleSpec {
772        id: "CS005",
773        name: "process-start",
774        category: MisuseCategory::Security,
775        severity: MisuseSeverity::High,
776        description: "Process.Start is dangerous with untrusted paths or arguments",
777        correct_usage: "Use strict allowlists and avoid shell execution semantics",
778        pattern: r"\bProcess\.Start\s*\(",
779        api_call: "Process.Start",
780        message: "Process.Start can enable command injection with untrusted inputs",
781        fix_suggestion: "Validate executable and arguments against a strict allowlist",
782    },
783];
784
785const SCALA_RULE_SPECS: &[RegexRuleSpec] = &[
786    RegexRuleSpec {
787        id: "SC001",
788        name: "null-usage",
789        category: MisuseCategory::ErrorHandling,
790        severity: MisuseSeverity::Low,
791        description: "null bypasses Scala's stronger option-based absence modeling",
792        correct_usage: "Use Option instead of null",
793        pattern: r"\bnull\b",
794        api_call: "null",
795        message: "null reintroduces runtime absence bugs into Scala code",
796        fix_suggestion: "Use Option and explicit pattern matching instead",
797    },
798    RegexRuleSpec {
799        id: "SC002",
800        name: "asinstanceof-cast",
801        category: MisuseCategory::ErrorHandling,
802        severity: MisuseSeverity::Medium,
803        description: "asInstanceOf crashes at runtime when the type assumption is wrong",
804        correct_usage: "Use pattern matching or TypeTag/ClassTag-aware APIs",
805        pattern: r"\basInstanceOf\[",
806        api_call: "asInstanceOf",
807        message: "asInstanceOf creates unchecked runtime casts",
808        fix_suggestion: "Use pattern matching or safer typed abstractions",
809    },
810    RegexRuleSpec {
811        id: "SC003",
812        name: "await-result",
813        category: MisuseCategory::Concurrency,
814        severity: MisuseSeverity::Medium,
815        description: "Await.result blocks threads and can collapse asynchronous throughput",
816        correct_usage: "Compose futures asynchronously instead of blocking",
817        pattern: r"\bAwait\.result\s*\(",
818        api_call: "Await.result",
819        message: "Await.result blocks threads and can create deadlocks or latency spikes",
820        fix_suggestion: "Use map/flatMap/for-comprehensions instead of blocking",
821    },
822    RegexRuleSpec {
823        id: "SC004",
824        name: "mutable-collection",
825        category: MisuseCategory::Concurrency,
826        severity: MisuseSeverity::Low,
827        description: "scala.collection.mutable structures are harder to reason about under concurrency",
828        correct_usage: "Prefer immutable collections unless mutation is intentionally scoped",
829        pattern: r"\bscala\.collection\.mutable\.",
830        api_call: "scala.collection.mutable",
831        message: "Mutable collections can hide shared-state bugs",
832        fix_suggestion: "Prefer immutable collections or encapsulate mutation carefully",
833    },
834    RegexRuleSpec {
835        id: "SC005",
836        name: "sys-process",
837        category: MisuseCategory::Security,
838        severity: MisuseSeverity::High,
839        description: "sys.process.Process executes external commands and is dangerous with input-derived values",
840        correct_usage: "Use library APIs or validate commands and arguments against an allowlist",
841        pattern: r"\bsys\.process\.Process\s*\(",
842        api_call: "sys.process.Process",
843        message: "sys.process.Process can enable command injection with untrusted input",
844        fix_suggestion: "Avoid shell-style execution or strictly validate all command parts",
845    },
846];
847
848const ELIXIR_RULE_SPECS: &[RegexRuleSpec] = &[
849    RegexRuleSpec {
850        id: "EX001",
851        name: "string-to-atom",
852        category: MisuseCategory::Security,
853        severity: MisuseSeverity::High,
854        description: "String.to_atom on untrusted input can exhaust the VM atom table",
855        correct_usage: "Use String.to_existing_atom only for reviewed values or keep strings",
856        pattern: r"\bString\.to_atom\s*\(",
857        api_call: "String.to_atom",
858        message: "String.to_atom can permanently grow the atom table from user input",
859        fix_suggestion: "Keep values as strings or use a reviewed to_existing_atom path",
860    },
861    RegexRuleSpec {
862        id: "EX002",
863        name: "code-eval-string",
864        category: MisuseCategory::Security,
865        severity: MisuseSeverity::High,
866        description: "Code.eval_string executes dynamic Elixir code and should be avoided",
867        correct_usage: "Use explicit dispatch or data parsing instead of dynamic evaluation",
868        pattern: r"\bCode\.eval_string\s*\(",
869        api_call: "Code.eval_string",
870        message: "Code.eval_string executes dynamic code and is a major security risk",
871        fix_suggestion: "Replace dynamic evaluation with explicit dispatch or parsing",
872    },
873    RegexRuleSpec {
874        id: "EX003",
875        name: "binary-to-term",
876        category: MisuseCategory::Security,
877        severity: MisuseSeverity::High,
878        description: ":erlang.binary_to_term on untrusted data is unsafe deserialization",
879        correct_usage: "Use safe formats like JSON or term_to_binary only for trusted data",
880        pattern: r":erlang\.binary_to_term\s*\(",
881        api_call: ":erlang.binary_to_term",
882        message: ":erlang.binary_to_term can deserialize unsafe terms from untrusted input",
883        fix_suggestion: "Use a safer serialization format for external input",
884    },
885    RegexRuleSpec {
886        id: "EX004",
887        name: "file-read-bang",
888        category: MisuseCategory::ErrorHandling,
889        severity: MisuseSeverity::Low,
890        description: "Bang file APIs raise instead of returning tagged tuples",
891        correct_usage: "Prefer File.read/1 with explicit {:ok, data} / {:error, reason} handling",
892        pattern: r"\bFile\.read!\s*\(",
893        api_call: "File.read!",
894        message: "File.read! raises on failure instead of returning a recoverable error",
895        fix_suggestion: "Use File.read/1 and handle the returned tuple explicitly",
896    },
897    RegexRuleSpec {
898        id: "EX005",
899        name: "task-await-infinity",
900        category: MisuseCategory::Concurrency,
901        severity: MisuseSeverity::Medium,
902        description: "Task.await with :infinity can stall callers indefinitely",
903        correct_usage: "Use bounded timeouts and supervised retry/cancellation behavior",
904        pattern: r"\bTask\.await\s*\([^,]+,\s*:infinity\s*\)",
905        api_call: "Task.await",
906        message: "Task.await(..., :infinity) can block forever",
907        fix_suggestion: "Use a bounded timeout and explicit failure handling",
908    },
909];
910
911const LUA_RULE_SPECS: &[RegexRuleSpec] = &[
912    RegexRuleSpec {
913        id: "LU001",
914        name: "implicit-global",
915        category: MisuseCategory::CallOrder,
916        severity: MisuseSeverity::Low,
917        description: "Assigning without local leaks mutable globals and creates hidden coupling",
918        correct_usage: "Declare locals explicitly with local name = ...",
919        pattern: r"^[A-Za-z_][A-Za-z0-9_]*\s*=",
920        api_call: "global assignment",
921        message: "Implicit global assignment leaks state outside local scope",
922        fix_suggestion: "Prefix the binding with local to keep scope explicit",
923    },
924    RegexRuleSpec {
925        id: "LU002",
926        name: "dynamic-load",
927        category: MisuseCategory::Security,
928        severity: MisuseSeverity::High,
929        description: "load/loadstring execute dynamic Lua code and should be avoided",
930        correct_usage: "Use structured parsing or explicit dispatch instead of dynamic evaluation",
931        pattern: r"\b(?:loadstring|load)\s*\(",
932        api_call: "load",
933        message: "Dynamic code loading executes attacker-controlled Lua if fed untrusted input",
934        fix_suggestion: "Replace dynamic evaluation with explicit dispatch or parsing",
935    },
936    RegexRuleSpec {
937        id: "LU003",
938        name: "os-execute",
939        category: MisuseCategory::Security,
940        severity: MisuseSeverity::High,
941        description: "os.execute shells out and is dangerous with dynamic input",
942        correct_usage: "Avoid shell execution or validate every command component",
943        pattern: r"\bos\.execute\s*\(",
944        api_call: "os.execute",
945        message: "os.execute can enable command injection with untrusted input",
946        fix_suggestion: "Avoid shelling out or strictly validate the command source",
947    },
948    RegexRuleSpec {
949        id: "LU004",
950        name: "io-popen",
951        category: MisuseCategory::Security,
952        severity: MisuseSeverity::High,
953        description: "io.popen launches shell commands and should be treated as high risk",
954        correct_usage: "Use safer process APIs or validate all command components",
955        pattern: r"\bio\.popen\s*\(",
956        api_call: "io.popen",
957        message: "io.popen can enable command injection with untrusted input",
958        fix_suggestion: "Avoid shell execution or validate every command component",
959    },
960    RegexRuleSpec {
961        id: "LU005",
962        name: "dofile-loadfile",
963        category: MisuseCategory::Security,
964        severity: MisuseSeverity::Medium,
965        description: "dofile/loadfile execute external files and are risky with user-controlled paths",
966        correct_usage: "Validate file origins strictly before executing them",
967        pattern: r"\b(?:dofile|loadfile)\s*\(",
968        api_call: "dofile",
969        message: "Executing external files is dangerous when the path is not fully trusted",
970        fix_suggestion: "Avoid dynamic file execution or tightly validate trusted origins",
971    },
972];
973
974const OCAML_RULE_SPECS: &[RegexRuleSpec] = &[
975    RegexRuleSpec {
976        id: "OC001",
977        name: "marshal-from-string",
978        category: MisuseCategory::Security,
979        severity: MisuseSeverity::High,
980        description: "Marshal.from_string on untrusted data is unsafe native deserialization",
981        correct_usage: "Use a safe, schema-checked serialization format",
982        pattern: r"\bMarshal\.from_string\b",
983        api_call: "Marshal.from_string",
984        message: "Marshal.from_string can deserialize unsafe values from untrusted input",
985        fix_suggestion: "Use a safer serialization format for external input",
986    },
987    RegexRuleSpec {
988        id: "OC002",
989        name: "marshal-from-channel",
990        category: MisuseCategory::Security,
991        severity: MisuseSeverity::High,
992        description: "Marshal.from_channel on untrusted data is unsafe native deserialization",
993        correct_usage: "Use a safe, schema-checked serialization format",
994        pattern: r"\bMarshal\.from_channel\b",
995        api_call: "Marshal.from_channel",
996        message: "Marshal.from_channel can deserialize unsafe values from untrusted input",
997        fix_suggestion: "Use a safer serialization format for external input",
998    },
999    RegexRuleSpec {
1000        id: "OC003",
1001        name: "sys-command",
1002        category: MisuseCategory::Security,
1003        severity: MisuseSeverity::High,
1004        description: "Sys.command executes a shell command and is dangerous with dynamic input",
1005        correct_usage: "Prefer direct library APIs or validate allowed commands strictly",
1006        pattern: r"\bSys\.command\b",
1007        api_call: "Sys.command",
1008        message: "Sys.command can enable command injection with untrusted input",
1009        fix_suggestion: "Avoid shell execution or tightly validate the command source",
1010    },
1011    RegexRuleSpec {
1012        id: "OC004",
1013        name: "obj-magic",
1014        category: MisuseCategory::ErrorHandling,
1015        severity: MisuseSeverity::High,
1016        description: "Obj.magic bypasses the type system and can produce memory-unsound behavior",
1017        correct_usage: "Use typed abstractions or explicit variant handling",
1018        pattern: r"\bObj\.magic\b",
1019        api_call: "Obj.magic",
1020        message: "Obj.magic bypasses type safety and can create undefined behavior",
1021        fix_suggestion: "Refactor to a typed abstraction instead of coercing with Obj.magic",
1022    },
1023    RegexRuleSpec {
1024        id: "OC005",
1025        name: "open-in-out",
1026        category: MisuseCategory::Resources,
1027        severity: MisuseSeverity::Low,
1028        description: "open_in/open_out require explicit close calls and are easy to leak",
1029        correct_usage: "Use In_channel.with_open_* or Out_channel.with_open_* helpers",
1030        pattern: r"\b(?:open_in|open_out)\b",
1031        api_call: "open_in",
1032        message: "open_in/open_out require explicit close handling and are easy to leak",
1033        fix_suggestion: "Use with_open_* helpers to scope the channel lifetime",
1034    },
1035];
1036
1037const ALL_API_LANGUAGES: &[ApiLanguage] = &[
1038    ApiLanguage::Python,
1039    ApiLanguage::Rust,
1040    ApiLanguage::Go,
1041    ApiLanguage::Java,
1042    ApiLanguage::JavaScript,
1043    ApiLanguage::TypeScript,
1044    ApiLanguage::C,
1045    ApiLanguage::Cpp,
1046    ApiLanguage::Ruby,
1047    ApiLanguage::Php,
1048    ApiLanguage::Kotlin,
1049    ApiLanguage::Swift,
1050    ApiLanguage::CSharp,
1051    ApiLanguage::Scala,
1052    ApiLanguage::Elixir,
1053    ApiLanguage::Lua,
1054    ApiLanguage::Luau,
1055    ApiLanguage::Ocaml,
1056];
1057
1058// =============================================================================
1059// Rule Definitions
1060// =============================================================================
1061
1062/// Built-in Python API misuse rules
1063fn python_rules() -> Vec<APIRule> {
1064    vec![
1065        APIRule {
1066            id: "PY001".to_string(),
1067            name: "missing-timeout".to_string(),
1068            category: MisuseCategory::Parameters,
1069            severity: MisuseSeverity::High,
1070            description: "requests.get/post/etc without timeout parameter can hang indefinitely"
1071                .to_string(),
1072            correct_usage: "requests.get(url, timeout=30)".to_string(),
1073        },
1074        APIRule {
1075            id: "PY002".to_string(),
1076            name: "bare-except".to_string(),
1077            category: MisuseCategory::ErrorHandling,
1078            severity: MisuseSeverity::Medium,
1079            description: "Bare except clause catches all exceptions including KeyboardInterrupt"
1080                .to_string(),
1081            correct_usage: "except Exception as e:".to_string(),
1082        },
1083        APIRule {
1084            id: "PY003".to_string(),
1085            name: "weak-hash-md5".to_string(),
1086            category: MisuseCategory::Crypto,
1087            severity: MisuseSeverity::High,
1088            description: "MD5 is cryptographically broken, don't use for security purposes"
1089                .to_string(),
1090            correct_usage: "hashlib.sha256() or bcrypt for passwords".to_string(),
1091        },
1092        APIRule {
1093            id: "PY004".to_string(),
1094            name: "weak-hash-sha1".to_string(),
1095            category: MisuseCategory::Crypto,
1096            severity: MisuseSeverity::High,
1097            description: "SHA1 is cryptographically weak, don't use for security purposes"
1098                .to_string(),
1099            correct_usage: "hashlib.sha256() or stronger".to_string(),
1100        },
1101        APIRule {
1102            id: "PY005".to_string(),
1103            name: "unclosed-file".to_string(),
1104            category: MisuseCategory::Resources,
1105            severity: MisuseSeverity::Medium,
1106            description: "File opened without context manager may not be properly closed"
1107                .to_string(),
1108            correct_usage: "with open(path) as f:".to_string(),
1109        },
1110        APIRule {
1111            id: "PY006".to_string(),
1112            name: "insecure-random".to_string(),
1113            category: MisuseCategory::Security,
1114            severity: MisuseSeverity::High,
1115            description: "random module is not cryptographically secure".to_string(),
1116            correct_usage: "secrets.token_bytes() or secrets.token_hex()".to_string(),
1117        },
1118    ]
1119}
1120
1121/// Built-in Rust API misuse rules
1122fn rust_rules() -> Vec<APIRule> {
1123    vec![
1124        APIRule {
1125            id: "RS001".to_string(),
1126            name: "mutex-lock-unwrap".to_string(),
1127            category: MisuseCategory::Concurrency,
1128            severity: MisuseSeverity::Medium,
1129            description: "Mutex::lock().unwrap() can panic and amplify lock contention (CWE-833)"
1130                .to_string(),
1131            correct_usage:
1132                "Prefer try_lock()/error handling or explicit poison recovery instead of unwrap()"
1133                    .to_string(),
1134        },
1135        APIRule {
1136            id: "RS002".to_string(),
1137            name: "file-open-without-context".to_string(),
1138            category: MisuseCategory::ErrorHandling,
1139            severity: MisuseSeverity::Low,
1140            description:
1141                "File::open without contextual error mapping makes failures hard to triage"
1142                    .to_string(),
1143            correct_usage:
1144                "File::open(path).with_context(|| format!(\"opening {}\", path.display()))?"
1145                    .to_string(),
1146        },
1147        APIRule {
1148            id: "RS003".to_string(),
1149            name: "unbounded-with-capacity".to_string(),
1150            category: MisuseCategory::Resources,
1151            severity: MisuseSeverity::High,
1152            description:
1153                "Vec::with_capacity fed from unbounded input can cause memory exhaustion (CWE-770)"
1154                    .to_string(),
1155            correct_usage: "Clamp capacity input before allocation (e.g. min(user_len, MAX))"
1156                .to_string(),
1157        },
1158        APIRule {
1159            id: "RS004".to_string(),
1160            name: "detached-tokio-spawn".to_string(),
1161            category: MisuseCategory::Concurrency,
1162            severity: MisuseSeverity::Medium,
1163            description: "tokio::spawn without retaining JoinHandle risks silent task failures"
1164                .to_string(),
1165            correct_usage: "Store JoinHandle values and await/join them".to_string(),
1166        },
1167        APIRule {
1168            id: "RS005".to_string(),
1169            name: "hashmap-order-dependence".to_string(),
1170            category: MisuseCategory::CallOrder,
1171            severity: MisuseSeverity::Low,
1172            description:
1173                "HashMap iteration order is non-deterministic; relying on it can break logic"
1174                    .to_string(),
1175            correct_usage:
1176                "Collect keys and sort them, or use BTreeMap/IndexMap when stable order is required"
1177                    .to_string(),
1178        },
1179        APIRule {
1180            id: "RS006".to_string(),
1181            name: "clone-in-hot-loop".to_string(),
1182            category: MisuseCategory::Resources,
1183            severity: MisuseSeverity::Low,
1184            description: "clone() inside loop bodies can create avoidable allocation pressure"
1185                .to_string(),
1186            correct_usage: "Borrow or move values instead of cloning in tight loops".to_string(),
1187        },
1188    ]
1189}
1190
1191fn regex_rule_specs_for_language(language: ApiLanguage) -> &'static [RegexRuleSpec] {
1192    match language {
1193        ApiLanguage::Python | ApiLanguage::Rust => &[],
1194        ApiLanguage::Go => GO_RULE_SPECS,
1195        ApiLanguage::Java => JAVA_RULE_SPECS,
1196        ApiLanguage::JavaScript => JAVASCRIPT_RULE_SPECS,
1197        ApiLanguage::TypeScript => TYPESCRIPT_RULE_SPECS,
1198        ApiLanguage::C => C_RULE_SPECS,
1199        ApiLanguage::Cpp => CPP_RULE_SPECS,
1200        ApiLanguage::Ruby => RUBY_RULE_SPECS,
1201        ApiLanguage::Php => PHP_RULE_SPECS,
1202        ApiLanguage::Kotlin => KOTLIN_RULE_SPECS,
1203        ApiLanguage::Swift => SWIFT_RULE_SPECS,
1204        ApiLanguage::CSharp => CSHARP_RULE_SPECS,
1205        ApiLanguage::Scala => SCALA_RULE_SPECS,
1206        ApiLanguage::Elixir => ELIXIR_RULE_SPECS,
1207        ApiLanguage::Lua | ApiLanguage::Luau => LUA_RULE_SPECS,
1208        ApiLanguage::Ocaml => OCAML_RULE_SPECS,
1209    }
1210}
1211
1212fn all_api_languages() -> &'static [ApiLanguage] {
1213    ALL_API_LANGUAGES
1214}
1215
1216// =============================================================================
1217// CLI Arguments
1218// =============================================================================
1219
1220/// Detect API misuse patterns in code
1221///
1222/// Analyzes code for common API misuse patterns like missing timeouts,
1223/// bare except clauses, weak crypto usage, and unclosed resources.
1224///
1225/// # Example
1226///
1227/// ```bash
1228/// tldr api-check src/
1229/// tldr api-check src/main.py --category crypto
1230/// tldr api-check src/ --severity high
1231/// ```
1232#[derive(Debug, Args)]
1233pub struct ApiCheckArgs {
1234    /// File or directory to analyze (path to file or directory)
1235    #[arg(value_name = "path")]
1236    pub path: PathBuf,
1237
1238    /// Filter by misuse category
1239    #[arg(long, value_delimiter = ',')]
1240    pub category: Option<Vec<MisuseCategory>>,
1241
1242    /// Filter by minimum severity
1243    #[arg(long, value_delimiter = ',')]
1244    pub severity: Option<Vec<MisuseSeverity>>,
1245
1246    /// Output file (optional, stdout if not specified)
1247    #[arg(long, short = 'O')]
1248    pub output: Option<PathBuf>,
1249}
1250
1251impl ApiCheckArgs {
1252    /// Run the api-check command
1253    pub fn run(&self, format: crate::output::OutputFormat, quiet: bool) -> Result<()> {
1254        let writer = OutputWriter::new(format, quiet);
1255
1256        writer.progress(&format!(
1257            "Checking {} for API misuse patterns...",
1258            self.path.display()
1259        ));
1260
1261        // Validate path exists
1262        if !self.path.exists() {
1263            return Err(RemainingError::file_not_found(&self.path).into());
1264        }
1265
1266        let all_rules_count = all_api_languages()
1267            .iter()
1268            .map(|language| rules_for_language(*language).len() as u32)
1269            .sum();
1270
1271        // Collect files to analyze
1272        let files = collect_files(&self.path)?;
1273        writer.progress(&format!("Found {} files to analyze", files.len()));
1274
1275        // Analyze each file
1276        let mut all_findings: Vec<MisuseFinding> = Vec::new();
1277        let mut files_scanned = 0u32;
1278
1279        for file_path in &files {
1280            let Some(language) = detect_language(file_path) else {
1281                continue;
1282            };
1283            let rules = rules_for_language(language);
1284            match analyze_file(file_path, &rules, language) {
1285                Ok(findings) => {
1286                    all_findings.extend(findings);
1287                    files_scanned += 1;
1288                }
1289                Err(e) => {
1290                    writer.progress(&format!(
1291                        "Warning: Failed to analyze {}: {}",
1292                        file_path.display(),
1293                        e
1294                    ));
1295                }
1296            }
1297        }
1298
1299        // Apply filters
1300        let filtered_findings = filter_findings(
1301            all_findings,
1302            self.category.as_deref(),
1303            self.severity.as_deref(),
1304        );
1305
1306        // Build summary
1307        let summary = build_summary(&filtered_findings, files_scanned);
1308
1309        // Build report
1310        let report = APICheckReport {
1311            findings: filtered_findings,
1312            summary,
1313            rules_applied: all_rules_count,
1314        };
1315
1316        // Write output
1317        if let Some(ref output_path) = self.output {
1318            if writer.is_text() {
1319                let text = format_api_check_text(&report);
1320                fs::write(output_path, text)?;
1321            } else {
1322                let json = serde_json::to_string_pretty(&report)?;
1323                fs::write(output_path, json)?;
1324            }
1325        } else if writer.is_text() {
1326            let text = format_api_check_text(&report);
1327            writer.write_text(&text)?;
1328        } else {
1329            writer.write(&report)?;
1330        }
1331
1332        Ok(())
1333    }
1334}
1335
1336// =============================================================================
1337// File Collection
1338// =============================================================================
1339
1340/// Collect supported source files from a path
1341fn collect_files(path: &Path) -> Result<Vec<PathBuf>> {
1342    let mut files = Vec::new();
1343
1344    if path.is_file() {
1345        if is_supported_file(path) {
1346            files.push(path.to_path_buf());
1347        }
1348    } else if path.is_dir() {
1349        for entry in WalkDir::new(path)
1350            .follow_links(false)
1351            .into_iter()
1352            .filter_map(|e| e.ok())
1353        {
1354            if files.len() >= MAX_DIRECTORY_FILES as usize {
1355                break;
1356            }
1357
1358            let entry_path = entry.path();
1359            if entry_path.is_file() && is_supported_file(entry_path) {
1360                // Check file size
1361                if let Ok(metadata) = fs::metadata(entry_path) {
1362                    if metadata.len() <= MAX_FILE_SIZE {
1363                        files.push(entry_path.to_path_buf());
1364                    }
1365                }
1366            }
1367        }
1368    }
1369
1370    Ok(files)
1371}
1372
1373/// Check if a path has a supported extension.
1374fn is_supported_file(path: &Path) -> bool {
1375    detect_language(path).is_some()
1376}
1377
1378pub(crate) fn detect_language(path: &Path) -> Option<ApiLanguage> {
1379    match path.extension().and_then(|e| e.to_str()) {
1380        Some("py") => Some(ApiLanguage::Python),
1381        Some("rs") => Some(ApiLanguage::Rust),
1382        Some("go") => Some(ApiLanguage::Go),
1383        Some("java") => Some(ApiLanguage::Java),
1384        Some("js") | Some("jsx") | Some("mjs") | Some("cjs") => Some(ApiLanguage::JavaScript),
1385        Some("ts") | Some("tsx") => Some(ApiLanguage::TypeScript),
1386        Some("c") | Some("h") => Some(ApiLanguage::C),
1387        Some("cpp") | Some("hpp") | Some("cc") | Some("cxx") => Some(ApiLanguage::Cpp),
1388        Some("rb") => Some(ApiLanguage::Ruby),
1389        Some("php") => Some(ApiLanguage::Php),
1390        Some("kt") | Some("kts") => Some(ApiLanguage::Kotlin),
1391        Some("swift") => Some(ApiLanguage::Swift),
1392        Some("cs") => Some(ApiLanguage::CSharp),
1393        Some("scala") => Some(ApiLanguage::Scala),
1394        Some("ex") | Some("exs") => Some(ApiLanguage::Elixir),
1395        Some("lua") => Some(ApiLanguage::Lua),
1396        Some("luau") => Some(ApiLanguage::Luau),
1397        Some("ml") | Some("mli") => Some(ApiLanguage::Ocaml),
1398        _ => None,
1399    }
1400}
1401
1402pub(crate) fn rules_for_language(language: ApiLanguage) -> Vec<APIRule> {
1403    match language {
1404        ApiLanguage::Python => python_rules(),
1405        ApiLanguage::Rust => rust_rules(),
1406        _ => regex_rule_specs_for_language(language)
1407            .iter()
1408            .copied()
1409            .map(RegexRuleSpec::rule)
1410            .collect(),
1411    }
1412}
1413
1414// =============================================================================
1415// Analysis Engine
1416// =============================================================================
1417
1418/// Analyze a single file for API misuse
1419pub(crate) fn analyze_file(
1420    path: &Path,
1421    rules: &[APIRule],
1422    language: ApiLanguage,
1423) -> Result<Vec<MisuseFinding>> {
1424    let content = fs::read_to_string(path)?;
1425    let file_str = path.display().to_string();
1426    let mut findings = Vec::new();
1427    let mut prev_trimmed = String::new();
1428    let file_has_hashmap = matches!(language, ApiLanguage::Rust) && content.contains("HashMap");
1429
1430    for (line_num, line) in content.lines().enumerate() {
1431        let line_number = (line_num + 1) as u32;
1432        let trimmed = line.trim();
1433        let rust_ctx = RustLineContext {
1434            file_has_hashmap,
1435            previous_line: prev_trimmed.as_str(),
1436            previous_is_loop: prev_trimmed.starts_with("for ")
1437                || prev_trimmed.starts_with("while "),
1438        };
1439
1440        // Check each rule
1441        for rule in rules {
1442            if let Some(finding) =
1443                check_rule(rule, &file_str, line_number, line, language, &rust_ctx)
1444            {
1445                findings.push(finding);
1446            }
1447        }
1448        prev_trimmed = trimmed.to_string();
1449    }
1450
1451    Ok(findings)
1452}
1453
1454struct RustLineContext<'a> {
1455    file_has_hashmap: bool,
1456    previous_line: &'a str,
1457    previous_is_loop: bool,
1458}
1459
1460/// Check a single rule against a line of code
1461fn check_rule(
1462    rule: &APIRule,
1463    file: &str,
1464    line: u32,
1465    line_text: &str,
1466    language: ApiLanguage,
1467    rust_ctx: &RustLineContext<'_>,
1468) -> Option<MisuseFinding> {
1469    let trimmed = line_text.trim();
1470
1471    // Skip comments
1472    if is_comment_line(trimmed, language) {
1473        return None;
1474    }
1475
1476    match rule.id.as_str() {
1477        "PY001" => check_missing_timeout(rule, file, line, trimmed),
1478        "PY002" => check_bare_except(rule, file, line, trimmed),
1479        "PY003" => check_md5_usage(rule, file, line, trimmed),
1480        "PY004" => check_sha1_usage(rule, file, line, trimmed),
1481        "PY005" => check_unclosed_file(rule, file, line, trimmed),
1482        "PY006" => check_insecure_random(rule, file, line, trimmed),
1483        "RS001" => check_mutex_lock_unwrap(rule, file, line, trimmed),
1484        "RS002" => check_file_open_without_context(rule, file, line, trimmed),
1485        "RS003" => check_unbounded_with_capacity(rule, file, line, trimmed),
1486        "RS004" => check_detached_tokio_spawn(rule, file, line, trimmed),
1487        "RS005" => check_hashmap_order_dependence(rule, file, line, trimmed, rust_ctx),
1488        "RS006" => check_clone_in_hot_loop(rule, file, line, trimmed, rust_ctx),
1489        _ => check_regex_rule(rule, file, line, trimmed, language),
1490    }
1491}
1492
1493fn is_comment_line(trimmed: &str, language: ApiLanguage) -> bool {
1494    match language {
1495        ApiLanguage::Python | ApiLanguage::Ruby | ApiLanguage::Elixir => trimmed.starts_with('#'),
1496        ApiLanguage::Rust
1497        | ApiLanguage::Go
1498        | ApiLanguage::Java
1499        | ApiLanguage::JavaScript
1500        | ApiLanguage::TypeScript
1501        | ApiLanguage::C
1502        | ApiLanguage::Cpp
1503        | ApiLanguage::Kotlin
1504        | ApiLanguage::Swift
1505        | ApiLanguage::CSharp
1506        | ApiLanguage::Scala => trimmed.starts_with("//"),
1507        ApiLanguage::Php => trimmed.starts_with("//") || trimmed.starts_with('#'),
1508        ApiLanguage::Lua | ApiLanguage::Luau => trimmed.starts_with("--"),
1509        ApiLanguage::Ocaml => trimmed.starts_with("(*"),
1510    }
1511}
1512
1513fn check_regex_rule(
1514    rule: &APIRule,
1515    file: &str,
1516    line: u32,
1517    line_text: &str,
1518    language: ApiLanguage,
1519) -> Option<MisuseFinding> {
1520    let spec = regex_rule_specs_for_language(language)
1521        .iter()
1522        .find(|spec| spec.id == rule.id)?;
1523    let regex = Regex::new(spec.pattern).ok()?;
1524    if !regex.is_match(line_text) {
1525        return None;
1526    }
1527
1528    let column = regex.find(line_text).map(|m| m.start()).unwrap_or(0) as u32;
1529    Some(MisuseFinding {
1530        file: file.to_string(),
1531        line,
1532        column,
1533        rule: rule.clone(),
1534        api_call: spec.api_call.to_string(),
1535        message: spec.message.to_string(),
1536        fix_suggestion: spec.fix_suggestion.to_string(),
1537        code_context: line_text.to_string(),
1538    })
1539}
1540
1541/// Check for requests without timeout
1542fn check_missing_timeout(
1543    rule: &APIRule,
1544    file: &str,
1545    line: u32,
1546    line_text: &str,
1547) -> Option<MisuseFinding> {
1548    // Look for requests.get/post/put/delete/patch without timeout
1549    let request_patterns = [
1550        "requests.get(",
1551        "requests.post(",
1552        "requests.put(",
1553        "requests.delete(",
1554        "requests.patch(",
1555        "requests.head(",
1556        "requests.options(",
1557    ];
1558
1559    for pattern in &request_patterns {
1560        if line_text.contains(pattern) && !line_text.contains("timeout") {
1561            let column = line_text.find(pattern).unwrap_or(0) as u32;
1562            return Some(MisuseFinding {
1563                file: file.to_string(),
1564                line,
1565                column,
1566                rule: rule.clone(),
1567                api_call: pattern.trim_end_matches('(').to_string(),
1568                message: format!(
1569                    "{} called without timeout parameter",
1570                    pattern.trim_end_matches('(')
1571                ),
1572                fix_suggestion: format!("Add timeout parameter: {}url, timeout=30)", pattern),
1573                code_context: line_text.to_string(),
1574            });
1575        }
1576    }
1577
1578    None
1579}
1580
1581/// Check for bare except clause
1582fn check_bare_except(
1583    rule: &APIRule,
1584    file: &str,
1585    line: u32,
1586    line_text: &str,
1587) -> Option<MisuseFinding> {
1588    // Look for "except:" without an exception type
1589    // Match "except:" but not "except SomeException:" or "except Exception as e:"
1590    if line_text.starts_with("except:") || line_text.contains(" except:") {
1591        let column = line_text.find("except:").unwrap_or(0) as u32;
1592        return Some(MisuseFinding {
1593            file: file.to_string(),
1594            line,
1595            column,
1596            rule: rule.clone(),
1597            api_call: "except".to_string(),
1598            message: "Bare except clause catches all exceptions including KeyboardInterrupt and SystemExit".to_string(),
1599            fix_suggestion: "Use 'except Exception as e:' to catch only program exceptions".to_string(),
1600            code_context: line_text.to_string(),
1601        });
1602    }
1603
1604    None
1605}
1606
1607/// Check for MD5 usage
1608fn check_md5_usage(
1609    rule: &APIRule,
1610    file: &str,
1611    line: u32,
1612    line_text: &str,
1613) -> Option<MisuseFinding> {
1614    // Look for hashlib.md5 usage
1615    if line_text.contains("hashlib.md5") || line_text.contains("md5(") {
1616        let column = line_text
1617            .find("hashlib.md5")
1618            .or_else(|| line_text.find("md5("))
1619            .unwrap_or(0) as u32;
1620        return Some(MisuseFinding {
1621            file: file.to_string(),
1622            line,
1623            column,
1624            rule: rule.clone(),
1625            api_call: "hashlib.md5".to_string(),
1626            message: "MD5 is cryptographically broken and should not be used for security purposes"
1627                .to_string(),
1628            fix_suggestion: "Use hashlib.sha256() or stronger. For passwords, use bcrypt or argon2"
1629                .to_string(),
1630            code_context: line_text.to_string(),
1631        });
1632    }
1633
1634    None
1635}
1636
1637/// Check for SHA1 usage
1638fn check_sha1_usage(
1639    rule: &APIRule,
1640    file: &str,
1641    line: u32,
1642    line_text: &str,
1643) -> Option<MisuseFinding> {
1644    // Look for hashlib.sha1 usage
1645    if line_text.contains("hashlib.sha1") || line_text.contains("sha1(") {
1646        let column = line_text
1647            .find("hashlib.sha1")
1648            .or_else(|| line_text.find("sha1("))
1649            .unwrap_or(0) as u32;
1650        return Some(MisuseFinding {
1651            file: file.to_string(),
1652            line,
1653            column,
1654            rule: rule.clone(),
1655            api_call: "hashlib.sha1".to_string(),
1656            message: "SHA1 is cryptographically weak and should not be used for security purposes"
1657                .to_string(),
1658            fix_suggestion: "Use hashlib.sha256() or stronger".to_string(),
1659            code_context: line_text.to_string(),
1660        });
1661    }
1662
1663    None
1664}
1665
1666/// Check for unclosed file
1667fn check_unclosed_file(
1668    rule: &APIRule,
1669    file: &str,
1670    line: u32,
1671    line_text: &str,
1672) -> Option<MisuseFinding> {
1673    // Look for "open(" that's not after "with "
1674    // This is a simplified check - a proper implementation would use AST
1675    if line_text.contains("open(")
1676        && !line_text.contains("with ")
1677        && !line_text.starts_with("with ")
1678    {
1679        // Check if it's an assignment (f = open(...))
1680        if line_text.contains("= open(") || line_text.contains("=open(") {
1681            let column = line_text.find("open(").unwrap_or(0) as u32;
1682            return Some(MisuseFinding {
1683                file: file.to_string(),
1684                line,
1685                column,
1686                rule: rule.clone(),
1687                api_call: "open".to_string(),
1688                message: "File opened without context manager may not be properly closed"
1689                    .to_string(),
1690                fix_suggestion: "Use 'with open(path) as f:' to ensure file is closed".to_string(),
1691                code_context: line_text.to_string(),
1692            });
1693        }
1694    }
1695
1696    None
1697}
1698
1699/// Check for insecure random usage
1700fn check_insecure_random(
1701    rule: &APIRule,
1702    file: &str,
1703    line: u32,
1704    line_text: &str,
1705) -> Option<MisuseFinding> {
1706    // Look for random.* usage that might be for security
1707    let insecure_patterns = [
1708        "random.randint(",
1709        "random.random(",
1710        "random.choice(",
1711        "random.randrange(",
1712    ];
1713
1714    // Only flag if it looks like it's being used for security
1715    // (contains words like token, secret, password, key)
1716    let security_indicators = ["token", "secret", "password", "key", "auth", "session"];
1717
1718    for pattern in &insecure_patterns {
1719        if line_text.contains(pattern) {
1720            // Check if the line or nearby context suggests security use
1721            let line_lower = line_text.to_lowercase();
1722            for indicator in &security_indicators {
1723                if line_lower.contains(indicator) {
1724                    let column = line_text.find(pattern).unwrap_or(0) as u32;
1725                    return Some(MisuseFinding {
1726                        file: file.to_string(),
1727                        line,
1728                        column,
1729                        rule: rule.clone(),
1730                        api_call: pattern.trim_end_matches('(').to_string(),
1731                        message: format!(
1732                            "{} is not cryptographically secure, don't use for security purposes",
1733                            pattern.trim_end_matches('(')
1734                        ),
1735                        fix_suggestion:
1736                            "Use secrets.token_bytes() or secrets.token_hex() for security"
1737                                .to_string(),
1738                        code_context: line_text.to_string(),
1739                    });
1740                }
1741            }
1742        }
1743    }
1744
1745    None
1746}
1747
1748/// Check for poisoned mutex lock unwrap.
1749fn check_mutex_lock_unwrap(
1750    rule: &APIRule,
1751    file: &str,
1752    line: u32,
1753    line_text: &str,
1754) -> Option<MisuseFinding> {
1755    if line_text.contains(".lock().unwrap()") {
1756        let column = line_text.find(".lock().unwrap()").unwrap_or(0) as u32;
1757        return Some(MisuseFinding {
1758            file: file.to_string(),
1759            line,
1760            column,
1761            rule: rule.clone(),
1762            api_call: "Mutex::lock".to_string(),
1763            message:
1764                "Mutex::lock().unwrap() can panic on poisoned locks and hide deadlock behavior"
1765                    .to_string(),
1766            fix_suggestion:
1767                "Handle lock errors explicitly (match/if let), or use try_lock with backoff"
1768                    .to_string(),
1769            code_context: line_text.to_string(),
1770        });
1771    }
1772    None
1773}
1774
1775/// Check for File::open without context propagation.
1776fn check_file_open_without_context(
1777    rule: &APIRule,
1778    file: &str,
1779    line: u32,
1780    line_text: &str,
1781) -> Option<MisuseFinding> {
1782    if line_text.contains("File::open(")
1783        && !line_text.contains(".context(")
1784        && !line_text.contains(".with_context(")
1785        && !line_text.contains("map_err(")
1786    {
1787        let column = line_text.find("File::open(").unwrap_or(0) as u32;
1788        return Some(MisuseFinding {
1789            file: file.to_string(),
1790            line,
1791            column,
1792            rule: rule.clone(),
1793            api_call: "File::open".to_string(),
1794            message: "File::open used without contextual error mapping".to_string(),
1795            fix_suggestion:
1796                "Wrap errors with context (with_context/context/map_err) before propagating"
1797                    .to_string(),
1798            code_context: line_text.to_string(),
1799        });
1800    }
1801    None
1802}
1803
1804/// Check for capacity allocations sourced from unbounded input.
1805fn check_unbounded_with_capacity(
1806    rule: &APIRule,
1807    file: &str,
1808    line: u32,
1809    line_text: &str,
1810) -> Option<MisuseFinding> {
1811    if line_text.contains("Vec::with_capacity(") {
1812        let line_lower = line_text.to_lowercase();
1813        let user_input_markers = [
1814            "input", "args", "user", "request", "len", "size",
1815        ];
1816        if user_input_markers.iter().any(|m| line_lower.contains(m)) {
1817            let column = line_text.find("Vec::with_capacity(").unwrap_or(0) as u32;
1818            return Some(MisuseFinding {
1819                file: file.to_string(),
1820                line,
1821                column,
1822                rule: rule.clone(),
1823                api_call: "Vec::with_capacity".to_string(),
1824                message: "Vec::with_capacity appears to use unbounded external input".to_string(),
1825                fix_suggestion:
1826                    "Clamp requested capacity with a hard upper bound before allocation".to_string(),
1827                code_context: line_text.to_string(),
1828            });
1829        }
1830    }
1831    None
1832}
1833
1834/// Check for detached tokio tasks.
1835fn check_detached_tokio_spawn(
1836    rule: &APIRule,
1837    file: &str,
1838    line: u32,
1839    line_text: &str,
1840) -> Option<MisuseFinding> {
1841    if line_text.contains("tokio::spawn(")
1842        && !line_text.contains('=')
1843        && !line_text.contains("handles.push")
1844    {
1845        let column = line_text.find("tokio::spawn(").unwrap_or(0) as u32;
1846        return Some(MisuseFinding {
1847            file: file.to_string(),
1848            line,
1849            column,
1850            rule: rule.clone(),
1851            api_call: "tokio::spawn".to_string(),
1852            message: "tokio::spawn used without keeping JoinHandle".to_string(),
1853            fix_suggestion: "Store JoinHandle values and await them to surface task errors"
1854                .to_string(),
1855            code_context: line_text.to_string(),
1856        });
1857    }
1858    None
1859}
1860
1861/// Check for map iteration order assumptions.
1862fn check_hashmap_order_dependence(
1863    rule: &APIRule,
1864    file: &str,
1865    line: u32,
1866    line_text: &str,
1867    rust_ctx: &RustLineContext<'_>,
1868) -> Option<MisuseFinding> {
1869    let looks_like_hashmap_iteration = line_text.contains(".iter()")
1870        && (line_text.contains("for ") || rust_ctx.previous_line.starts_with("for "))
1871        && rust_ctx.file_has_hashmap;
1872    if looks_like_hashmap_iteration {
1873        let column = line_text.find(".iter()").unwrap_or(0) as u32;
1874        return Some(MisuseFinding {
1875            file: file.to_string(),
1876            line,
1877            column,
1878            rule: rule.clone(),
1879            api_call: "HashMap::iter".to_string(),
1880            message: "Potential logic dependence on HashMap iteration order".to_string(),
1881            fix_suggestion: "Use BTreeMap/IndexMap or sort keys before ordered operations"
1882                .to_string(),
1883            code_context: line_text.to_string(),
1884        });
1885    }
1886    None
1887}
1888
1889/// Check for clone usage in loop bodies.
1890fn check_clone_in_hot_loop(
1891    rule: &APIRule,
1892    file: &str,
1893    line: u32,
1894    line_text: &str,
1895    rust_ctx: &RustLineContext<'_>,
1896) -> Option<MisuseFinding> {
1897    if line_text.contains(".clone()")
1898        && (line_text.contains("for ") || line_text.contains("while ") || rust_ctx.previous_is_loop)
1899    {
1900        let column = line_text.find(".clone()").unwrap_or(0) as u32;
1901        return Some(MisuseFinding {
1902            file: file.to_string(),
1903            line,
1904            column,
1905            rule: rule.clone(),
1906            api_call: "clone".to_string(),
1907            message: "clone() in loop context may create avoidable allocation overhead".to_string(),
1908            fix_suggestion: "Prefer borrowing/references or move semantics inside hot loops"
1909                .to_string(),
1910            code_context: line_text.to_string(),
1911        });
1912    }
1913    None
1914}
1915
1916// =============================================================================
1917// Filtering
1918// =============================================================================
1919
1920/// Filter findings by category and severity
1921fn filter_findings(
1922    findings: Vec<MisuseFinding>,
1923    categories: Option<&[MisuseCategory]>,
1924    severities: Option<&[MisuseSeverity]>,
1925) -> Vec<MisuseFinding> {
1926    findings
1927        .into_iter()
1928        .filter(|f| {
1929            // Category filter
1930            if let Some(cats) = categories {
1931                if !cats.contains(&f.rule.category) {
1932                    return false;
1933                }
1934            }
1935
1936            // Severity filter
1937            if let Some(sevs) = severities {
1938                if !sevs.contains(&f.rule.severity) {
1939                    return false;
1940                }
1941            }
1942
1943            true
1944        })
1945        .collect()
1946}
1947
1948// =============================================================================
1949// Summary Building
1950// =============================================================================
1951
1952/// Build summary from findings
1953fn build_summary(findings: &[MisuseFinding], files_scanned: u32) -> APICheckSummary {
1954    let mut by_category: HashMap<String, u32> = HashMap::new();
1955    let mut by_severity: HashMap<String, u32> = HashMap::new();
1956    let mut apis_checked: Vec<String> = Vec::new();
1957
1958    for finding in findings {
1959        // Count by category
1960        let cat_str = format!("{:?}", finding.rule.category).to_lowercase();
1961        *by_category.entry(cat_str).or_insert(0) += 1;
1962
1963        // Count by severity
1964        let sev_str = format!("{:?}", finding.rule.severity).to_lowercase();
1965        *by_severity.entry(sev_str).or_insert(0) += 1;
1966
1967        // Track APIs
1968        if !apis_checked.contains(&finding.api_call) {
1969            apis_checked.push(finding.api_call.clone());
1970        }
1971    }
1972
1973    APICheckSummary {
1974        total_findings: findings.len() as u32,
1975        by_category,
1976        by_severity,
1977        apis_checked,
1978        files_scanned,
1979    }
1980}
1981
1982// =============================================================================
1983// Output Formatting
1984// =============================================================================
1985
1986/// Format report as human-readable text
1987fn format_api_check_text(report: &APICheckReport) -> String {
1988    let mut output = String::new();
1989
1990    output.push_str("=== API Check Report ===\n\n");
1991
1992    // Summary
1993    output.push_str(&format!(
1994        "Files scanned: {}\n",
1995        report.summary.files_scanned
1996    ));
1997    output.push_str(&format!("Rules applied: {}\n", report.rules_applied));
1998    output.push_str(&format!(
1999        "Total findings: {}\n\n",
2000        report.summary.total_findings
2001    ));
2002
2003    // By severity
2004    if !report.summary.by_severity.is_empty() {
2005        output.push_str("By Severity:\n");
2006        for (severity, count) in &report.summary.by_severity {
2007            output.push_str(&format!("  {}: {}\n", severity, count));
2008        }
2009        output.push('\n');
2010    }
2011
2012    // By category
2013    if !report.summary.by_category.is_empty() {
2014        output.push_str("By Category:\n");
2015        for (category, count) in &report.summary.by_category {
2016            output.push_str(&format!("  {}: {}\n", category, count));
2017        }
2018        output.push('\n');
2019    }
2020
2021    // Findings
2022    if !report.findings.is_empty() {
2023        output.push_str("Findings:\n");
2024        output.push_str(&"-".repeat(60));
2025        output.push('\n');
2026
2027        for finding in &report.findings {
2028            output.push_str(&format!(
2029                "[{:?}] {} ({})\n",
2030                finding.rule.severity, finding.rule.name, finding.rule.id
2031            ));
2032            output.push_str(&format!(
2033                "  Location: {}:{}:{}\n",
2034                finding.file, finding.line, finding.column
2035            ));
2036            output.push_str(&format!("  API: {}\n", finding.api_call));
2037            output.push_str(&format!("  Message: {}\n", finding.message));
2038            output.push_str(&format!("  Fix: {}\n", finding.fix_suggestion));
2039            if !finding.code_context.is_empty() {
2040                output.push_str(&format!("  Context: {}\n", finding.code_context.trim()));
2041            }
2042            output.push('\n');
2043        }
2044    } else {
2045        output.push_str("No API misuse patterns detected.\n");
2046    }
2047
2048    output
2049}
2050
2051// =============================================================================
2052// Tests
2053// =============================================================================
2054
2055#[cfg(test)]
2056mod tests {
2057    use super::*;
2058    use tempfile::TempDir;
2059
2060    #[test]
2061    fn test_python_rules_defined() {
2062        let rules = python_rules();
2063        assert!(!rules.is_empty());
2064        assert!(rules.iter().any(|r| r.id == "PY001")); // missing-timeout
2065        assert!(rules.iter().any(|r| r.id == "PY002")); // bare-except
2066        assert!(rules.iter().any(|r| r.id == "PY003")); // weak-hash-md5
2067        assert!(rules.iter().any(|r| r.id == "PY005")); // unclosed-file
2068    }
2069
2070    #[test]
2071    fn test_rust_rules_defined() {
2072        let rules = rust_rules();
2073        assert!(!rules.is_empty());
2074        assert!(rules.iter().any(|r| r.id == "RS001"));
2075        assert!(rules.iter().any(|r| r.id == "RS002"));
2076        assert!(rules.iter().any(|r| r.id == "RS003"));
2077        assert!(rules.iter().any(|r| r.id == "RS004"));
2078        assert!(rules.iter().any(|r| r.id == "RS005"));
2079        assert!(rules.iter().any(|r| r.id == "RS006"));
2080    }
2081
2082    #[test]
2083    fn test_all_supported_languages_have_rules() {
2084        for language in all_api_languages() {
2085            let rules = rules_for_language(*language);
2086            assert!(
2087                !rules.is_empty(),
2088                "expected at least one api-check rule for {:?}",
2089                language
2090            );
2091        }
2092    }
2093
2094    #[test]
2095    fn test_detect_language_extended_extensions() {
2096        let cases = [
2097            ("main.go", ApiLanguage::Go),
2098            ("Main.java", ApiLanguage::Java),
2099            ("app.js", ApiLanguage::JavaScript),
2100            ("component.tsx", ApiLanguage::TypeScript),
2101            ("main.c", ApiLanguage::C),
2102            ("main.cpp", ApiLanguage::Cpp),
2103            ("app.rb", ApiLanguage::Ruby),
2104            ("index.php", ApiLanguage::Php),
2105            ("Main.kt", ApiLanguage::Kotlin),
2106            ("main.swift", ApiLanguage::Swift),
2107            ("Program.cs", ApiLanguage::CSharp),
2108            ("Main.scala", ApiLanguage::Scala),
2109            ("app.ex", ApiLanguage::Elixir),
2110            ("main.lua", ApiLanguage::Lua),
2111            ("game.luau", ApiLanguage::Luau),
2112            ("main.ml", ApiLanguage::Ocaml),
2113        ];
2114
2115        for (path, expected) in cases {
2116            assert_eq!(detect_language(Path::new(path)), Some(expected), "{path}");
2117        }
2118    }
2119
2120    #[test]
2121    fn test_check_missing_timeout() {
2122        let rule = &python_rules()[0]; // PY001
2123
2124        // Should detect
2125        let finding = check_missing_timeout(rule, "test.py", 1, "response = requests.get(url)");
2126        assert!(finding.is_some());
2127
2128        // Should not detect (has timeout)
2129        let finding = check_missing_timeout(
2130            rule,
2131            "test.py",
2132            1,
2133            "response = requests.get(url, timeout=30)",
2134        );
2135        assert!(finding.is_none());
2136    }
2137
2138    #[test]
2139    fn test_check_bare_except() {
2140        let rule = &python_rules()[1]; // PY002
2141
2142        // Should detect
2143        let finding = check_bare_except(rule, "test.py", 1, "except:");
2144        assert!(finding.is_some());
2145
2146        // Should not detect (has exception type)
2147        let finding = check_bare_except(rule, "test.py", 1, "except Exception:");
2148        assert!(finding.is_none());
2149    }
2150
2151    #[test]
2152    fn test_check_md5_usage() {
2153        let rule = &python_rules()[2]; // PY003
2154
2155        // Should detect
2156        let finding = check_md5_usage(rule, "test.py", 1, "hash = hashlib.md5(data)");
2157        assert!(finding.is_some());
2158
2159        // Should not detect
2160        let finding = check_md5_usage(rule, "test.py", 1, "hash = hashlib.sha256(data)");
2161        assert!(finding.is_none());
2162    }
2163
2164    #[test]
2165    fn test_check_unclosed_file() {
2166        let rule = &python_rules()[4]; // PY005
2167
2168        // Should detect
2169        let finding = check_unclosed_file(rule, "test.py", 1, "f = open('data.txt')");
2170        assert!(finding.is_some());
2171
2172        // Should not detect (using context manager)
2173        let finding = check_unclosed_file(rule, "test.py", 1, "with open('data.txt') as f:");
2174        assert!(finding.is_none());
2175    }
2176
2177    #[test]
2178    fn test_filter_by_category() {
2179        let findings = vec![
2180            MisuseFinding {
2181                file: "test.py".to_string(),
2182                line: 1,
2183                column: 0,
2184                rule: APIRule {
2185                    id: "PY001".to_string(),
2186                    name: "test".to_string(),
2187                    category: MisuseCategory::Parameters,
2188                    severity: MisuseSeverity::High,
2189                    description: "test".to_string(),
2190                    correct_usage: "test".to_string(),
2191                },
2192                api_call: "test".to_string(),
2193                message: "test".to_string(),
2194                fix_suggestion: "test".to_string(),
2195                code_context: "test".to_string(),
2196            },
2197            MisuseFinding {
2198                file: "test.py".to_string(),
2199                line: 2,
2200                column: 0,
2201                rule: APIRule {
2202                    id: "PY003".to_string(),
2203                    name: "test".to_string(),
2204                    category: MisuseCategory::Crypto,
2205                    severity: MisuseSeverity::High,
2206                    description: "test".to_string(),
2207                    correct_usage: "test".to_string(),
2208                },
2209                api_call: "test".to_string(),
2210                message: "test".to_string(),
2211                fix_suggestion: "test".to_string(),
2212                code_context: "test".to_string(),
2213            },
2214        ];
2215
2216        let filtered = filter_findings(findings, Some(&[MisuseCategory::Crypto]), None);
2217        assert_eq!(filtered.len(), 1);
2218        assert_eq!(filtered[0].rule.category, MisuseCategory::Crypto);
2219    }
2220
2221    #[test]
2222    fn test_build_summary() {
2223        let findings = vec![MisuseFinding {
2224            file: "test.py".to_string(),
2225            line: 1,
2226            column: 0,
2227            rule: APIRule {
2228                id: "PY001".to_string(),
2229                name: "test".to_string(),
2230                category: MisuseCategory::Parameters,
2231                severity: MisuseSeverity::High,
2232                description: "test".to_string(),
2233                correct_usage: "test".to_string(),
2234            },
2235            api_call: "requests.get".to_string(),
2236            message: "test".to_string(),
2237            fix_suggestion: "test".to_string(),
2238            code_context: "test".to_string(),
2239        }];
2240
2241        let summary = build_summary(&findings, 5);
2242        assert_eq!(summary.total_findings, 1);
2243        assert_eq!(summary.files_scanned, 5);
2244        assert!(summary.apis_checked.contains(&"requests.get".to_string()));
2245    }
2246
2247    #[test]
2248    fn test_collect_files_includes_rust() {
2249        let temp = TempDir::new().unwrap();
2250        let py = temp.path().join("a.py");
2251        let rs = temp.path().join("b.rs");
2252        let go = temp.path().join("c.go");
2253        let txt = temp.path().join("c.txt");
2254        fs::write(&py, "print('ok')").unwrap();
2255        fs::write(&rs, "fn main() {}").unwrap();
2256        fs::write(&go, "package main").unwrap();
2257        fs::write(&txt, "ignore").unwrap();
2258
2259        let files = collect_files(temp.path()).unwrap();
2260        assert!(files.iter().any(|f| f.ends_with("a.py")));
2261        assert!(files.iter().any(|f| f.ends_with("b.rs")));
2262        assert!(files.iter().any(|f| f.ends_with("c.go")));
2263        assert!(!files.iter().any(|f| f.ends_with("c.txt")));
2264    }
2265
2266    #[test]
2267    fn test_check_mutex_lock_unwrap() {
2268        let rule = &rust_rules()[0];
2269        let finding =
2270            check_mutex_lock_unwrap(rule, "lib.rs", 10, "let guard = shared.lock().unwrap();");
2271        assert!(finding.is_some());
2272    }
2273
2274    #[test]
2275    fn test_check_file_open_without_context() {
2276        let rule = &rust_rules()[1];
2277        let finding = check_file_open_without_context(rule, "lib.rs", 8, "let f = File::open(p)?;");
2278        assert!(finding.is_some());
2279
2280        let contextual = check_file_open_without_context(
2281            rule,
2282            "lib.rs",
2283            9,
2284            "let f = File::open(p).with_context(|| \"open\".to_string())?;",
2285        );
2286        assert!(contextual.is_none());
2287    }
2288
2289    #[test]
2290    fn test_check_unbounded_with_capacity() {
2291        let rule = &rust_rules()[2];
2292        let finding =
2293            check_unbounded_with_capacity(rule, "lib.rs", 12, "let v = Vec::with_capacity(len);");
2294        assert!(finding.is_some());
2295
2296        let bounded = check_unbounded_with_capacity(
2297            rule,
2298            "lib.rs",
2299            13,
2300            "let v = Vec::with_capacity(256);",
2301        );
2302        assert!(bounded.is_none());
2303    }
2304
2305    #[test]
2306    fn test_check_tokio_spawn_detached() {
2307        let rule = &rust_rules()[3];
2308        let detached = check_detached_tokio_spawn(
2309            rule,
2310            "lib.rs",
2311            3,
2312            "tokio::spawn(async move { work().await; });",
2313        );
2314        let tracked = check_detached_tokio_spawn(
2315            rule,
2316            "lib.rs",
2317            4,
2318            "let handle = tokio::spawn(async move { work().await; });",
2319        );
2320        assert!(detached.is_some());
2321        assert!(tracked.is_none());
2322    }
2323
2324    #[test]
2325    fn test_check_hashmap_order_dependence() {
2326        let rule = &rust_rules()[4];
2327        let ctx = RustLineContext {
2328            file_has_hashmap: true,
2329            previous_line: "for (k, v) in map",
2330            previous_is_loop: true,
2331        };
2332        let finding = check_hashmap_order_dependence(rule, "lib.rs", 12, "    .iter()", &ctx);
2333        assert!(finding.is_some());
2334    }
2335
2336    #[test]
2337    fn test_check_clone_in_hot_loop() {
2338        let rule = &rust_rules()[5];
2339        let ctx = RustLineContext {
2340            file_has_hashmap: false,
2341            previous_line: "for item in items {",
2342            previous_is_loop: true,
2343        };
2344        let finding = check_clone_in_hot_loop(rule, "lib.rs", 20, "value.clone()", &ctx);
2345        assert!(finding.is_some());
2346    }
2347
2348    fn assert_language_findings(
2349        filename: &str,
2350        language: ApiLanguage,
2351        source: &str,
2352        expected_rule_id: &str,
2353    ) {
2354        let temp = TempDir::new().unwrap();
2355        let path = temp.path().join(filename);
2356        fs::write(&path, source).unwrap();
2357        let rules = rules_for_language(language);
2358        let findings = analyze_file(&path, &rules, language).unwrap();
2359        assert!(
2360            findings.iter().any(|finding| finding.rule.id == expected_rule_id),
2361            "expected {expected_rule_id} for {filename}, got {:?}",
2362            findings.iter().map(|f| f.rule.id.clone()).collect::<Vec<_>>()
2363        );
2364    }
2365
2366    #[test]
2367    fn test_extended_language_rule_detection() {
2368        let cases = [
2369            ("main.go", ApiLanguage::Go, "data, _ := ioutil.ReadFile(path)", "GO001"),
2370            (
2371                "Main.java",
2372                ApiLanguage::Java,
2373                "if (name == otherName) { }",
2374                "JV001",
2375            ),
2376            ("app.js", ApiLanguage::JavaScript, "if (a == b) {}", "JS001"),
2377            ("app.ts", ApiLanguage::TypeScript, "if (a == b) {}", "TS001"),
2378            ("main.c", ApiLanguage::C, "gets(buffer);", "C001"),
2379            ("main.cpp", ApiLanguage::Cpp, "std::auto_ptr<Foo> p;", "CPP003"),
2380            ("app.rb", ApiLanguage::Ruby, "eval(params[:code])", "RB001"),
2381            ("index.php", ApiLanguage::Php, "unserialize($payload);", "PH005"),
2382            ("Main.kt", ApiLanguage::Kotlin, "val name = user!!", "KT001"),
2383            ("main.swift", ApiLanguage::Swift, "let name = value!", "SW003"),
2384            ("Program.cs", ApiLanguage::CSharp, "var x = task.Result;", "CS003"),
2385            ("Main.scala", ApiLanguage::Scala, "val casted = value.asInstanceOf[String]", "SC002"),
2386            ("app.ex", ApiLanguage::Elixir, "String.to_atom(param)", "EX001"),
2387            ("main.lua", ApiLanguage::Lua, "value = 1", "LU001"),
2388            ("game.luau", ApiLanguage::Luau, "os.execute(cmd)", "LU003"),
2389            ("main.ml", ApiLanguage::Ocaml, "Obj.magic value", "OC004"),
2390        ];
2391
2392        for (filename, language, source, expected_rule_id) in cases {
2393            assert_language_findings(filename, language, source, expected_rule_id);
2394        }
2395    }
2396}
tldr_cli/commands/remaining/api_check.rs

tldr_cli/commands/remaining/
api_check.rs