tldr_cli/commands/remaining/secure.rs
1//! Secure Command - Security Analysis Dashboard
2//!
3//! Aggregates security sub-analyses (taint, resources, bounds, contracts,
4//! behavioral, mutability) into a severity-sorted security report.
5//!
6//! # Sub-analyses
7//!
8//! - `taint`: Detect data flow from untrusted sources to sensitive sinks
9//! - `resources`: Detect resource leaks (files, connections)
10//! - `bounds`: Detect potential buffer overflows and bounds issues
11//! - `contracts`: Analyze pre/postconditions (full mode only)
12//! - `behavioral`: Analyze exception handling and state transitions (full mode only)
13//! - `mutability`: Detect mutable parameter issues (full mode only)
14//!
15//! # Quick Mode
16//!
17//! Quick mode (`--quick`) runs only the fast analyses:
18//! - taint, resources, bounds
19//!
20//! Full mode adds:
21//! - contracts, behavioral, mutability
22//!
23//! # Example
24//!
25//! ```bash
26//! # Analyze a file
27//! tldr secure src/app.py
28//!
29//! # Quick mode (faster)
30//! tldr secure src/app.py --quick
31//!
32//! # Show detail for sub-analysis
33//! tldr secure src/app.py --detail taint
34//!
35//! # Text output
36//! tldr secure src/app.py -f text
37//! ```
38
39use std::collections::HashMap;
40use std::fs;
41use std::path::{Path, PathBuf};
42use std::time::Instant;
43
44use clap::Args;
45use colored::Colorize;
46use serde_json::Value;
47use tldr_core::fs::{read_to_string_tolerant, ReadOutcome};
48use tldr_core::walker::ProjectWalker;
49use tldr_core::Language;
50use tree_sitter::Node;
51
52use crate::output::OutputFormat;
53
54use super::ast_cache::AstCache;
55use super::error::{RemainingError, RemainingResult};
56use super::types::{SecureFinding, SecureReport, SecureSummary};
57
58// =============================================================================
59// Security Analysis Types
60// =============================================================================
61
62/// Security sub-analysis types
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub enum SecurityAnalysis {
65 Taint,
66 Resources,
67 Bounds,
68 Contracts,
69 Behavioral,
70 Mutability,
71}
72
73impl SecurityAnalysis {
74 /// Get the analysis name
75 pub fn name(&self) -> &'static str {
76 match self {
77 Self::Taint => "taint",
78 Self::Resources => "resources",
79 Self::Bounds => "bounds",
80 Self::Contracts => "contracts",
81 Self::Behavioral => "behavioral",
82 Self::Mutability => "mutability",
83 }
84 }
85}
86
87/// Quick mode analyses (fast)
88pub const QUICK_ANALYSES: &[SecurityAnalysis] = &[
89 SecurityAnalysis::Taint,
90 SecurityAnalysis::Resources,
91 SecurityAnalysis::Bounds,
92];
93
94/// Full mode analyses (all)
95pub const FULL_ANALYSES: &[SecurityAnalysis] = &[
96 SecurityAnalysis::Taint,
97 SecurityAnalysis::Resources,
98 SecurityAnalysis::Bounds,
99 SecurityAnalysis::Contracts,
100 SecurityAnalysis::Behavioral,
101 SecurityAnalysis::Mutability,
102];
103
104// =============================================================================
105// CLI Arguments
106// =============================================================================
107
108/// Security analysis dashboard aggregating multiple security checks
109#[derive(Debug, Args, Clone)]
110pub struct SecureArgs {
111 /// File path or directory to analyze
112 pub path: PathBuf,
113
114 /// Programming language to filter by (auto-detected if omitted)
115 #[arg(long, short = 'l')]
116 pub lang: Option<Language>,
117
118 /// Show details for specific sub-analysis
119 #[arg(long)]
120 pub detail: Option<String>,
121
122 /// Run quick mode (taint, resources, bounds only)
123 #[arg(long)]
124 pub quick: bool,
125
126 /// Write output to file instead of stdout
127 #[arg(long, short = 'o')]
128 pub output: Option<PathBuf>,
129
130 /// Walk vendored/build dirs (node_modules, target, dist, etc.) that would normally be skipped.
131 #[arg(long)]
132 pub no_default_ignore: bool,
133
134 /// Include findings on test files. Mirrors `tldr vuln --include-tests`
135 /// (M-X3 `js-test-file-suppression-v1`). Default: `false` — findings
136 /// emitted from JS/TS test files (paths under `test/`, `tests/`,
137 /// `__tests__/`, or filenames ending in `.test.{js,ts,jsx,tsx}`,
138 /// `.spec.{js,ts,jsx,tsx}`, or `.e2e.{js,ts}`) and Rust test files
139 /// (paths under `/tests/` or filenames ending in `_test.rs` /
140 /// `tests.rs`) are suppressed because they exercise sink behavior on
141 /// synthetic inputs and pollute production-codebase scans. Pass
142 /// `--include-tests` to restore them. Mirrors the `--include-smells`
143 /// precedent (opt-in for noisy categories).
144 #[arg(long)]
145 pub include_tests: bool,
146}
147
148impl SecureArgs {
149 /// Run the secure command with CLI-provided format
150 pub fn run(&self, format: OutputFormat) -> anyhow::Result<()> {
151 run(self.clone(), format)
152 }
153}
154
155// =============================================================================
156// Implementation
157// =============================================================================
158
159/// Run the secure analysis
160pub fn run(args: SecureArgs, format: OutputFormat) -> anyhow::Result<()> {
161 let start = Instant::now();
162
163 // Validate path exists
164 if !args.path.exists() {
165 return Err(RemainingError::file_not_found(&args.path).into());
166 }
167
168 // Create report
169 let mut report = SecureReport::new(args.path.display().to_string());
170
171 // Initialize AST cache for shared parsing
172 let mut cache = AstCache::default();
173
174 // Determine which analyses to run
175 let analyses = if args.quick {
176 QUICK_ANALYSES
177 } else {
178 FULL_ANALYSES
179 };
180
181 // VULN-SECURE-AUTODETECT-PARITY-V1 (M-AA5): mirror `tldr vuln`'s
182 // language-resolution path so secure agrees with vuln on autodetect.
183 //
184 // Pre-fix: `tldr secure /tmp/repos/express` (no `--lang`) reported
185 // `summary.taint_count: 0` while `tldr vuln /tmp/repos/express`
186 // reported `findings: 1`. The discrepancy traced to secure's
187 // `collect_files` lacking the autodetect step: with `lang = None`,
188 // `is_supported_secure_file` matches only `py | rs`, so a JS-only
189 // tree (express) silently produced an empty file set.
190 //
191 // M-Z10 (`secure-test-file-suppression-v1`) made vuln+secure agree
192 // when `--lang` is EXPLICIT by mirroring the test-file suppression
193 // mask. M-AA5 closes the symmetric gap on the autodetect path:
194 //
195 // 1. If `--lang L` provided, honor it as-is.
196 // 2. Else, autodetect via `Language::from_directory` (M-AA1
197 // `autodetect-dominant-language-v1` made this strict
198 // extension-majority + manifest-priority).
199 // 3. If the detected language lies outside the natively-analyzed
200 // set, error with `AutodetectUnsupported` (exit 2) — same
201 // contract as vuln. This points the user at an explicit
202 // `--lang` flag.
203 //
204 // The natively-analyzed set is canonical-pipeline-driven and lives
205 // in `vuln::is_natively_analyzed` (Python, Rust, TypeScript,
206 // JavaScript per M-Y3). Reusing it here keeps secure↔vuln gate
207 // semantics in lock-step: if vuln autodetect-rejects a tree, secure
208 // does too, with the same message.
209 let effective_lang: Option<Language> = match args.lang {
210 Some(l) => Some(l),
211 None => {
212 let detected = if args.path.is_dir() {
213 Language::from_directory(&args.path)
214 } else {
215 Language::from_path(&args.path)
216 };
217 if let Some(l) = detected {
218 if !super::vuln::is_natively_analyzed(l) {
219 return Err(RemainingError::autodetect_unsupported(format!(
220 "secure: taint analysis for {lang} is not yet supported by autodetect; \
221 pass --lang {lang} explicitly to scan this file (the canonical taint \
222 pipeline supports it). Autodetect-by-extension currently routes only \
223 --lang python, --lang rust, --lang typescript, and --lang javascript; \
224 other languages require an explicit --lang flag.",
225 lang = l.as_str()
226 ))
227 .into());
228 }
229 }
230 detected
231 }
232 };
233
234 // Collect files to analyze (autodetected language drives the
235 // extension filter when --lang is omitted).
236 let candidate_files = collect_files(&args.path, effective_lang, args.no_default_ignore)?;
237
238 // SECURE-UTF8-TOLERANCE-V1: pre-filter for UTF-8 validity ONCE up front.
239 // The 6 sub-analyses (taint, resources, bounds, contracts, behavioral,
240 // mutability) each re-iterate the same files, so doing the read here
241 // (a) dedupes warnings (1 message per bad file, not 6) and
242 // (b) avoids each analysis having to know about the tolerance policy.
243 // The Luau parser-test corpus (`tests/conformance/literals.luau`,
244 // `pm.luau`, `sort.luau`) intentionally embeds raw 0xFF/0xFE bytes —
245 // pre-fix `tldr secure --lang luau /tmp/repos/luau-luau` aborted with
246 // `Error: stream did not contain valid UTF-8` on the first such file.
247 let (files, warnings, files_skipped) = partition_utf8_clean(&candidate_files);
248
249 // Run sub-analyses and collect findings
250 let mut all_findings = Vec::new();
251 let mut sub_results: HashMap<String, Value> = HashMap::new();
252
253 for analysis in analyses {
254 let (findings, raw_result) = run_security_analysis(*analysis, &files, &mut cache)?;
255
256 // Collect findings
257 all_findings.extend(findings);
258
259 // Store raw result if requested
260 if args.detail.as_deref() == Some(analysis.name()) {
261 sub_results.insert(analysis.name().to_string(), raw_result);
262 }
263 }
264
265 // SECURE-TEST-FILE-SUPPRESSION-V1 (M-Z10): mirror the test-file
266 // suppression policy from `tldr vuln` (M-X3
267 // `js-test-file-suppression-v1`). See `apply_test_file_suppression`.
268 if !args.include_tests {
269 apply_test_file_suppression(&mut all_findings);
270 }
271
272 // Sort findings by severity (critical first)
273 all_findings.sort_by(|a, b| severity_order(&a.severity).cmp(&severity_order(&b.severity)));
274
275 // WRAPPER-CROSS-CONSISTENCY-V1 (BUG-15, BUG-16): compute the summary
276 // counters from the FINAL `findings` array via category group-by,
277 // post-aggregation and post-sort. The previous implementation set
278 // `taint_count = findings.len()` inside the per-analysis update where
279 // `analyze_taint` on Rust files returns `category="unsafe_block"`
280 // findings — so `taint_count` ghosted to N while the findings array
281 // had zero `category=="taint"` entries (BUG-16). Group-by on the
282 // canonical findings array makes the summary match the array by
283 // construction.
284 report.summary = compute_summary_from_findings(&all_findings);
285
286 report.findings = all_findings;
287 report.sub_results = sub_results;
288 report.total_elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
289 // SECURE-UTF8-TOLERANCE-V1: surface skipped files in the report.
290 report.files_skipped = files_skipped;
291 report.warnings = warnings;
292
293 // Output
294 let output_str = match format {
295 OutputFormat::Json => serde_json::to_string_pretty(&report)?,
296 OutputFormat::Compact => serde_json::to_string(&report)?,
297 OutputFormat::Text => format_text_report(&report),
298 OutputFormat::Sarif | OutputFormat::Dot => {
299 // SARIF/DOT not fully supported for secure, fall back to JSON
300 serde_json::to_string_pretty(&report)?
301 }
302 };
303
304 // Write output
305 if let Some(output_path) = &args.output {
306 fs::write(output_path, &output_str)?;
307 } else {
308 println!("{}", output_str);
309 }
310
311 Ok(())
312}
313
314/// Collect supported files to analyze.
315fn collect_files(
316 path: &Path,
317 lang: Option<Language>,
318 no_default_ignore: bool,
319) -> RemainingResult<Vec<PathBuf>> {
320 let mut files = Vec::new();
321
322 if path.is_file() {
323 if is_supported_secure_file(path, lang) {
324 files.push(path.to_path_buf());
325 }
326 } else if path.is_dir() {
327 // Walk directory and collect supported source files.
328 let mut walker = ProjectWalker::new(path).max_depth(10);
329 if no_default_ignore {
330 walker = walker.no_default_ignore();
331 }
332 for entry in walker.iter() {
333 let p = entry.path();
334 if p.is_file() && is_supported_secure_file(p, lang) {
335 files.push(p.to_path_buf());
336 }
337 }
338 }
339
340 // Return empty vec if no files found (like vuln.rs does)
341 // The report will show 0 files scanned with no findings
342
343 Ok(files)
344}
345
346/// Check whether `path` is a source file the secure analyzer should scan.
347///
348/// With `lang = Some(L)`, only matches that language's extensions. With
349/// `lang = None`, preserves the historical behavior of `py | rs` (the
350/// languages the sub-analyzers natively support).
351fn is_supported_secure_file(path: &std::path::Path, lang: Option<Language>) -> bool {
352 let ext = match path.extension().and_then(|e| e.to_str()) {
353 Some(e) => e,
354 None => return false,
355 };
356 match lang {
357 Some(Language::TypeScript) => matches!(ext, "ts" | "tsx"),
358 Some(Language::JavaScript) => matches!(ext, "js" | "mjs" | "cjs" | "jsx"),
359 Some(Language::Python) => ext == "py",
360 Some(Language::Rust) => ext == "rs",
361 Some(Language::Go) => ext == "go",
362 Some(Language::Java) => ext == "java",
363 Some(Language::C) => matches!(ext, "c" | "h"),
364 Some(Language::Cpp) => matches!(ext, "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx"),
365 Some(Language::CSharp) => ext == "cs",
366 Some(Language::Ruby) => ext == "rb",
367 Some(Language::Php) => ext == "php",
368 Some(Language::Kotlin) => matches!(ext, "kt" | "kts"),
369 Some(Language::Swift) => ext == "swift",
370 Some(Language::Scala) => ext == "scala",
371 Some(Language::Elixir) => matches!(ext, "ex" | "exs"),
372 Some(Language::Lua) => ext == "lua",
373 Some(Language::Luau) => ext == "luau",
374 Some(Language::Ocaml) => matches!(ext, "ml" | "mli"),
375 None => matches!(ext, "py" | "rs"),
376 }
377}
378
379fn is_rust_file(path: &std::path::Path) -> bool {
380 matches!(path.extension().and_then(|e| e.to_str()), Some("rs"))
381}
382
383// `is_rust_test_file` was originally defined locally here; M-Z10
384// (`secure-test-file-suppression-v1`) consolidated it with vuln.rs by
385// promoting `vuln::is_rust_test_file` to `pub(super)` and reusing it
386// here. See `super::vuln::is_rust_test_file`. The behavior is identical
387// to the previous local impl (path component `/tests/` or filename
388// suffix `_test.rs` / `tests.rs`).
389
390/// Partition the candidate file set into clean (kept) and skipped files.
391///
392/// Two-stage filter:
393///
394/// 1. **Oversize / auto-gen pre-filter** (SECURE-FASTPATH-V1, M-Z8):
395/// defer to `tldr_core::fs::oversize::check_size` before reading the
396/// file. The 6 sub-analyses each iterate this file set and read the
397/// full content into memory; without a cap, a 2.3 MB
398/// `dom.generated.d.ts` (TypeScript DOM-gen baselines) dominates
399/// the wall clock — pre-fix `tldr secure --lang typescript
400/// /tmp/repos/ts-dom-gen` ran 154 s, dwarfing the rest of the
401/// repo's ~20 ms. Mirrors the policy applied in
402/// `vuln.rs::analyze_file` (covered by M-Y3
403/// `typescript-large-file-perf-v1`) and `api_check.rs::analyze_file`
404/// (covered by M-Z4 `fastpath-extend-non-vuln-v1`); central policy
405/// in `tldr_core::fs::oversize` enforces the 10 MB source-file cap
406/// and the 512 KB cap for `.d.ts` / `.min.js` / `.bundle.*`
407/// auto-generated artefacts.
408///
409/// 2. **UTF-8 tolerance** (SECURE-UTF8-TOLERANCE-V1, M-X5): pre-fix,
410/// `run_security_analysis` called `fs::read_to_string(file)?` which
411/// propagates the `Err(io::Error("stream did not contain valid
412/// UTF-8"))` returned by `String::from_utf8` for files like
413/// `tests/conformance/literals.luau` in the upstream luau-luau
414/// repo. That `?` aborted the entire scan on the first such file,
415/// so `tldr secure --lang luau /tmp/repos/luau-luau` failed with
416/// `Error: IO error: stream did not contain valid UTF-8` and
417/// exited 1, even though 111/114 files were perfectly scannable.
418/// Mirrors the policy already in
419/// `crates/tldr-core/src/surface/luau.rs`: skip with a structured
420/// warning, continue.
421///
422/// Both oversize and non-UTF-8 skips are counted under the returned
423/// `files_skipped` counter and surfaced via a structured warning.
424/// Genuine I/O errors (file vanished mid-scan) drop the file with a
425/// warning but are NOT counted as a skip — the `secure` walk is
426/// best-effort and one transient failure should not lose the rest.
427fn partition_utf8_clean(candidates: &[PathBuf]) -> (Vec<PathBuf>, Vec<String>, u32) {
428 use tldr_core::fs::oversize::{check_size, format_oversize_warning, SizeCheck};
429
430 let mut clean: Vec<PathBuf> = Vec::with_capacity(candidates.len());
431 let mut warnings: Vec<String> = Vec::new();
432 let mut skipped: u32 = 0;
433 for file in candidates {
434 // SECURE-FASTPATH-V1 (M-Z8): apply oversize cap BEFORE the read.
435 // `read_to_string_tolerant` reads the full file into memory, so
436 // a 2.3 MB `dom.generated.d.ts` would otherwise be loaded six
437 // times (once per sub-analysis read) and parsed once into a
438 // tree-sitter AST per analysis. The check_size stat call is
439 // O(1) and returns SizeCheck::Unknown for missing files
440 // (which then falls through to the existing read path and is
441 // handled there).
442 match check_size(file) {
443 SizeCheck::Oversize {
444 size_bytes,
445 max_bytes,
446 is_autogen,
447 } => {
448 skipped += 1;
449 warnings.push(format_oversize_warning(
450 file,
451 size_bytes,
452 max_bytes,
453 is_autogen,
454 ));
455 continue;
456 }
457 // WithinLimit | Unknown: proceed to the UTF-8 read below.
458 _ => {}
459 }
460
461 match read_to_string_tolerant(file) {
462 Ok(ReadOutcome::Ok(_)) => clean.push(file.clone()),
463 Ok(ReadOutcome::NonUtf8 { byte_offset }) => {
464 skipped += 1;
465 warnings.push(format!(
466 "Skipped {}: invalid UTF-8 at byte {}",
467 file.display(),
468 byte_offset
469 ));
470 }
471 Err(e) => {
472 // Genuine I/O failure (permissions, vanished, etc.).
473 // Drop the file with a warning rather than aborting the
474 // whole scan. This is NOT counted under `files_skipped`,
475 // which is reserved for the UTF-8-tolerance policy and
476 // the oversize policy.
477 warnings.push(format!(
478 "Skipped {}: I/O error: {}",
479 file.display(),
480 e
481 ));
482 }
483 }
484 }
485 (clean, warnings, skipped)
486}
487
488/// Run a specific security analysis on files
489fn run_security_analysis(
490 analysis: SecurityAnalysis,
491 files: &[PathBuf],
492 cache: &mut AstCache,
493) -> RemainingResult<(Vec<SecureFinding>, Value)> {
494 let mut findings = Vec::new();
495
496 for file in files {
497 // SECURE-UTF8-TOLERANCE-V1 (defense-in-depth): the file set was
498 // pre-filtered by `partition_utf8_clean` in `run`, so a clean
499 // read is the expected path. We still use the tolerant reader
500 // here so that a TOCTOU race (file replaced with non-UTF-8
501 // content between the partition pass and the analysis pass)
502 // skips the file instead of aborting the scan. No warning is
503 // emitted here — the partition pass owns warning emission to
504 // avoid duplicate messages across the 6 sub-analyses.
505 let source = match read_to_string_tolerant(file)? {
506 ReadOutcome::Ok(s) => s,
507 ReadOutcome::NonUtf8 { .. } => continue,
508 };
509
510 // Get or parse the AST
511 let tree = cache.get_or_parse(file, &source)?;
512
513 // Run analysis
514 let file_findings = match analysis {
515 SecurityAnalysis::Taint => analyze_taint(tree.root_node(), &source, file),
516 SecurityAnalysis::Resources => analyze_resources(tree.root_node(), &source, file),
517 SecurityAnalysis::Bounds => analyze_bounds(tree.root_node(), &source, file),
518 SecurityAnalysis::Contracts => analyze_contracts(tree.root_node(), &source, file),
519 SecurityAnalysis::Behavioral => analyze_behavioral(tree.root_node(), &source, file),
520 SecurityAnalysis::Mutability => analyze_mutability(tree.root_node(), &source, file),
521 };
522
523 findings.extend(file_findings);
524 }
525
526 // Create raw result
527 let raw_result = serde_json::to_value(&findings).unwrap_or(Value::Array(vec![]));
528
529 Ok((findings, raw_result))
530}
531
532/// SECURE-TEST-FILE-SUPPRESSION-V1 (M-Z10): in-place suppression of
533/// findings emitted from test files. Mirrors the post-analysis filter
534/// applied in `vuln.rs::VulnArgs::run` for `--include-tests`, restoring
535/// vuln↔secure parity (`tldr secure`'s taint findings count must match
536/// `tldr vuln`'s finding count on the same path).
537///
538/// Pre-fix on `/tmp/repos/express`:
539/// * `tldr vuln --lang javascript .` → 1 finding (index.js:21; the
540/// `test/app.engine.js:9` finding masked by M-X3 `is_js_test_file`).
541/// * `tldr secure --lang javascript . | jq '[.findings[]|select(.category=="taint")]'`
542/// → 2 findings (index.js + test/app.engine.js — secure ran the
543/// canonical taint pipeline but never applied the M-X3 mask, so the
544/// `test/app.engine.js` finding leaked through).
545///
546/// Reuses `super::vuln::is_js_test_file` (M-X3 helper: JS/TS path
547/// components + test-style filename suffixes, with a `/fixtures/`
548/// exemption that keeps `vuln_migration_v1` GREEN) and
549/// `super::vuln::is_rust_test_file` (Rust `/tests/` + `_test.rs` /
550/// `tests.rs` suffix). The Rust mask was already applied INSIDE
551/// `analyze_rust_bounds` for unwrap-style smell findings; this filter
552/// adds the symmetric mask for taint-class findings.
553///
554/// Runs BEFORE `compute_summary_from_findings` so the summary reflects
555/// the suppressed view (matches the WRAPPER-CROSS-CONSISTENCY-V1
556/// invariant: summary derives from the final findings array).
557fn apply_test_file_suppression(findings: &mut Vec<SecureFinding>) {
558 findings.retain(|f| {
559 let p = std::path::Path::new(&f.file);
560 // Fixture exemption: paths under a `fixtures/` directory must
561 // NOT be suppressed even when their ancestors include `tests/`
562 // (e.g. `crates/tldr-cli/tests/fixtures/vuln_migration_v1/...`).
563 // `is_js_test_file` already bakes this exemption in; we apply
564 // the same gate to the Rust predicate (which doesn't, since on
565 // the vuln side Rust file collection happens before the
566 // post-analysis filter and the fixture suite is JS/TS-only).
567 // Without this gate, finding-level Rust suppression would drop
568 // legitimate fixture findings on hypothetical Rust fixtures.
569 let in_fixtures =
570 f.file.contains("/fixtures/") || f.file.contains("\\fixtures\\");
571 if in_fixtures {
572 return true;
573 }
574 !super::vuln::is_js_test_file(p) && !super::vuln::is_rust_test_file(p)
575 });
576}
577
578/// Compute the summary by category group-by over the FINAL findings array.
579///
580/// WRAPPER-CROSS-CONSISTENCY-V1 (BUG-15, BUG-16): every `*_count` field
581/// derives from `findings[].category`, so the schema invariant
582/// `taint_count + leak_count + bounds_warnings + behavioral_count +
583/// unsafe_blocks + raw_pointer_ops + unwrap_calls + todo_markers +
584/// missing_contracts + mutable_params == findings.len()`
585/// holds by construction. `taint_critical` is a severity refinement of
586/// `taint_count` (subset, not its own category) and is excluded from the
587/// invariant.
588///
589/// Categories emitted by sub-analyzers (must remain in sync with the
590/// `analyze_*` functions below):
591/// - taint analysis: `taint` (Python/JS/etc.) | `unsafe_block` (Rust)
592/// - resource analysis: `resource_leak` (Python) | `raw_pointer` (Rust)
593/// - bounds analysis: `bounds` (Python) | `unwrap`, `todo_marker` (Rust)
594/// - behavioral analysis: `behavioral`
595/// - contracts analysis: `missing_contract` (placeholder, currently unused)
596/// - mutability analysis: `mutable_param` (placeholder, currently unused)
597fn compute_summary_from_findings(findings: &[SecureFinding]) -> SecureSummary {
598 let count_cat = |cat: &str| findings.iter().filter(|f| f.category == cat).count() as u32;
599
600 SecureSummary {
601 taint_count: count_cat("taint"),
602 taint_critical: findings
603 .iter()
604 .filter(|f| f.category == "taint" && f.severity == "critical")
605 .count() as u32,
606 leak_count: count_cat("resource_leak"),
607 bounds_warnings: count_cat("bounds"),
608 behavioral_count: count_cat("behavioral"),
609 missing_contracts: count_cat("missing_contract"),
610 mutable_params: count_cat("mutable_param"),
611 unsafe_blocks: count_cat("unsafe_block"),
612 raw_pointer_ops: count_cat("raw_pointer"),
613 unwrap_calls: count_cat("unwrap"),
614 todo_markers: count_cat("todo_marker"),
615 }
616}
617
618/// Get severity order (lower = more severe)
619fn severity_order(severity: &str) -> u8 {
620 match severity {
621 "critical" => 0,
622 "high" => 1,
623 "medium" => 2,
624 "low" => 3,
625 "info" => 4,
626 _ => 5,
627 }
628}
629
630// =============================================================================
631// Taint Analysis
632// =============================================================================
633
634/// Analyze taint flows in a file.
635///
636/// SECURE-TAINT-AGGREGATOR-V1: For non-Rust files this routes through the
637/// canonical `tldr_core::security::vuln::scan_vulnerabilities` pipeline —
638/// the same pipeline `tldr vuln` uses — so `secure.summary.taint_count`
639/// agrees with `tldr vuln`'s finding count.
640///
641/// RUST-SECURE-TAINT-AGGREGATOR-V2: For Rust files this now mirrors
642/// `tldr vuln`'s dual dispatch from `rust-vuln-taint-pipeline-v1`:
643/// canonical pipeline + line scanner with overlap dedup. The
644/// canonical findings AND the line-scanner SqlInjection /
645/// CommandInjection findings (the only line-scanner emissions that are
646/// taint-class — UnsafeCode/MemorySafety/Panic are smell-class and not
647/// counted under `summary.taint_count`) are emitted with
648/// `category = "taint"`. Unsafe-block findings retain
649/// `category = "unsafe_block"` (counted separately by
650/// `summary.unsafe_blocks`). Pre-V2, secure dropped ALL canonical Rust
651/// taint findings — `tldr vuln --lang rust file.rs` reported N>0
652/// findings while `tldr secure --lang rust file.rs` reported 0
653/// (BUG-17, surfaced by the 17-lang sweep).
654///
655/// The legacy substring-based `TAINT_SINKS` matcher (which produced 0
656/// findings on real flows because it could not see source-to-sink
657/// relationships) remains retired.
658fn analyze_taint(_root: Node, source: &str, file: &Path) -> Vec<SecureFinding> {
659 let (mut findings, canonical_lines) = canonical_taint_findings_with_index(file);
660 if is_rust_file(file) {
661 findings.extend(rust_line_scanner_taint_findings(
662 file,
663 source,
664 &canonical_lines,
665 ));
666 findings.extend(analyze_rust_unsafe_blocks(source, file));
667 }
668 findings
669}
670
671/// Run the Rust line scanner from `vuln.rs` and project ONLY its
672/// taint-class findings (SqlInjection, CommandInjection) onto
673/// `SecureFinding`s with `category = "taint"`. Non-taint smell-class
674/// emissions (UnsafeCode, MemorySafety, Panic) are dropped here — they
675/// are surfaced by the dedicated `analyze_rust_unsafe_blocks` /
676/// `analyze_rust_raw_pointers` / `analyze_rust_bounds` paths under
677/// their own categories.
678///
679/// `canonical_index` carries the `(line, core_VulnType)` tuples the
680/// canonical pipeline already produced for this file. SqlInjection /
681/// CommandInjection line-scanner findings whose `(line, vuln_type)` is
682/// already in the canonical index are dropped — same dedup predicate as
683/// `vuln.rs::dedupe_overlap`. This keeps secure↔vuln per-file counts
684/// equal: vuln applies the same dedup, so secure must too, otherwise
685/// secure would over-count when both layers report the same finding.
686///
687/// RUST-SECURE-TAINT-AGGREGATOR-V2: closes the
688/// `sql_injection_format_keyword_positive.rs` parity gap — the
689/// canonical Rust pipeline does not produce a SqlInjection finding for
690/// `format!("SELECT … {}", x)` (no real source-to-sink), but the line
691/// scanner does (per `rust-format-sql-fp-narrowing-v1`). For
692/// secure↔vuln directory-level parity, secure must include this.
693fn rust_line_scanner_taint_findings(
694 file: &Path,
695 source: &str,
696 canonical_index: &[(u32, tldr_core::security::vuln::VulnType)],
697) -> Vec<SecureFinding> {
698 use crate::commands::remaining::types::VulnType;
699
700 super::vuln::analyze_rust_file(file, source)
701 .into_iter()
702 .filter(|f| {
703 matches!(
704 f.vuln_type,
705 VulnType::SqlInjection | VulnType::CommandInjection
706 )
707 })
708 .filter(|f| {
709 // Mirrors `vuln.rs::dedupe_overlap`: drop line-scanner finding
710 // if canonical already covers `(line, vuln_type)`.
711 let core_ty = match f.vuln_type {
712 VulnType::SqlInjection => tldr_core::security::vuln::VulnType::SqlInjection,
713 VulnType::CommandInjection => {
714 tldr_core::security::vuln::VulnType::CommandInjection
715 }
716 _ => return true,
717 };
718 !canonical_index
719 .iter()
720 .any(|(line, ty)| *line == f.line && *ty == core_ty)
721 })
722 .map(|f| {
723 let severity = match f.severity {
724 crate::commands::remaining::types::Severity::Critical => "critical",
725 crate::commands::remaining::types::Severity::High => "high",
726 crate::commands::remaining::types::Severity::Medium => "medium",
727 crate::commands::remaining::types::Severity::Low => "low",
728 _ => "medium",
729 };
730 let description = format!("{:?}: {}", f.vuln_type, f.description);
731 SecureFinding::new("taint", severity, description).with_location(f.file, f.line)
732 })
733 .collect()
734}
735
736/// Run the canonical `scan_vulnerabilities` pipeline on a single file and
737/// project the resulting `VulnFinding`s onto `SecureFinding`s with
738/// `category = "taint"`. Returns both the projected findings AND the
739/// set of `(line, core_VulnType)` tuples covered by canonical — used by
740/// the Rust line-scanner path to dedupe overlap (SqlInjection,
741/// CommandInjection on the same line). Mirrors
742/// `vuln.rs::dedupe_overlap`.
743///
744/// Runs for ALL extensions including `.rs`
745/// (RUST-SECURE-TAINT-AGGREGATOR-V2 — mirrors `tldr vuln`'s
746/// canonical-for-all-languages dispatch from
747/// `rust-vuln-taint-pipeline-v1`).
748fn canonical_taint_findings_with_index(
749 file: &Path,
750) -> (
751 Vec<SecureFinding>,
752 Vec<(u32, tldr_core::security::vuln::VulnType)>,
753) {
754 let report = match tldr_core::security::vuln::scan_vulnerabilities(file, None, None) {
755 Ok(r) => r,
756 Err(_) => return (Vec::new(), Vec::new()),
757 };
758
759 let index: Vec<(u32, tldr_core::security::vuln::VulnType)> = report
760 .findings
761 .iter()
762 .map(|f| (f.sink.line, f.vuln_type))
763 .collect();
764
765 let findings = report
766 .findings
767 .into_iter()
768 .map(|f| {
769 let severity = match f.severity.to_uppercase().as_str() {
770 "CRITICAL" => "critical",
771 "HIGH" => "high",
772 "MEDIUM" => "medium",
773 "LOW" => "low",
774 _ => "medium",
775 };
776 let description = format!(
777 "{:?}: {} with unsanitized input from {}",
778 f.vuln_type, f.sink.sink_type, f.source.source_type
779 );
780 SecureFinding::new("taint", severity, description)
781 .with_location(f.file.display().to_string(), f.sink.line)
782 })
783 .collect();
784
785 (findings, index)
786}
787
788// =============================================================================
789// Resource Analysis
790// =============================================================================
791
792/// Known resource creators
793const RESOURCE_CREATORS: &[&str] = &["open", "socket", "connect", "cursor", "urlopen"];
794
795/// Analyze resource leaks in a file
796fn analyze_resources(root: Node, source: &str, file: &Path) -> Vec<SecureFinding> {
797 if is_rust_file(file) {
798 return analyze_rust_raw_pointers(source, file);
799 }
800
801 let mut findings = Vec::new();
802 let source_bytes = source.as_bytes();
803
804 // Find resource assignments outside of `with` statements
805 find_leaked_resources(root, source_bytes, file, &mut findings);
806
807 findings
808}
809
810fn find_leaked_resources(
811 node: Node,
812 source: &[u8],
813 file: &Path,
814 findings: &mut Vec<SecureFinding>,
815) {
816 // Check if this is an assignment with a resource creator
817 if node.kind() == "assignment" {
818 if let Some(right) = node.child_by_field_name("right") {
819 if right.kind() == "call" {
820 if let Some(func) = right.child_by_field_name("function") {
821 let func_text = node_text(func, source);
822 let func_name = func_text.split('.').next_back().unwrap_or(func_text);
823
824 if RESOURCE_CREATORS.contains(&func_name) {
825 // Check if this is inside a with statement
826 if !is_inside_with(node) {
827 findings.push(
828 SecureFinding::new(
829 "resource_leak",
830 "high",
831 format!(
832 "Resource '{}' opened without context manager - may leak",
833 func_name
834 ),
835 )
836 .with_location(
837 file.display().to_string(),
838 node.start_position().row as u32 + 1,
839 ),
840 );
841 }
842 }
843 }
844 }
845 }
846 }
847
848 // Recurse
849 for i in 0..node.child_count() {
850 if let Some(child) = node.child(i) {
851 find_leaked_resources(child, source, file, findings);
852 }
853 }
854}
855
856fn is_inside_with(node: Node) -> bool {
857 let mut current = node.parent();
858 while let Some(parent) = current {
859 if parent.kind() == "with_statement" {
860 return true;
861 }
862 current = parent.parent();
863 }
864 false
865}
866
867// =============================================================================
868// Bounds Analysis
869// =============================================================================
870
871/// Analyze bounds/overflow issues in a file
872fn analyze_bounds(_root: Node, source: &str, file: &Path) -> Vec<SecureFinding> {
873 if is_rust_file(file) {
874 return analyze_rust_bounds(source, file);
875 }
876
877 // Placeholder for Python bounds analysis.
878 Vec::new()
879}
880
881// =============================================================================
882// Contracts Analysis
883// =============================================================================
884
885/// Analyze missing contracts in a file
886fn analyze_contracts(_root: Node, _source: &str, _file: &Path) -> Vec<SecureFinding> {
887 // Placeholder - would check for functions without type hints, docstrings, or assertions
888 Vec::new()
889}
890
891// =============================================================================
892// Behavioral Analysis
893// =============================================================================
894
895/// Analyze behavioral issues (exception handling, state) in a file
896fn analyze_behavioral(root: Node, source: &str, file: &Path) -> Vec<SecureFinding> {
897 let mut findings = Vec::new();
898 let source_bytes = source.as_bytes();
899
900 // Find bare except clauses
901 find_bare_except(root, source_bytes, file, &mut findings);
902
903 findings
904}
905
906fn find_bare_except(node: Node, source: &[u8], file: &Path, findings: &mut Vec<SecureFinding>) {
907 // Check for except clauses without exception type
908 if node.kind() == "except_clause" {
909 let has_type = node.children(&mut node.walk()).any(|c| {
910 c.kind() == "as_pattern"
911 || (c.kind() == "identifier" && node_text(c, source) != "Exception")
912 });
913
914 if !has_type {
915 let text = node_text(node, source);
916 if text.starts_with("except:") || text.starts_with("except :") {
917 findings.push(
918 SecureFinding::new(
919 "behavioral",
920 "medium",
921 "Bare except clause catches all exceptions including KeyboardInterrupt",
922 )
923 .with_location(
924 file.display().to_string(),
925 node.start_position().row as u32 + 1,
926 ),
927 );
928 }
929 }
930 }
931
932 // Recurse
933 for i in 0..node.child_count() {
934 if let Some(child) = node.child(i) {
935 find_bare_except(child, source, file, findings);
936 }
937 }
938}
939
940// =============================================================================
941// Mutability Analysis
942// =============================================================================
943
944/// Analyze mutability issues in a file
945fn analyze_mutability(_root: Node, _source: &str, _file: &Path) -> Vec<SecureFinding> {
946 // Placeholder - would check for mutable default arguments, etc.
947 Vec::new()
948}
949
950// =============================================================================
951// Utilities
952// =============================================================================
953
954fn node_text<'a>(node: Node, source: &'a [u8]) -> &'a str {
955 std::str::from_utf8(&source[node.start_byte()..node.end_byte()]).unwrap_or("")
956}
957
958fn analyze_rust_unsafe_blocks(source: &str, file: &Path) -> Vec<SecureFinding> {
959 let mut findings = Vec::new();
960 for (idx, line) in source.lines().enumerate() {
961 let trimmed = line.trim();
962 if trimmed.starts_with("//") {
963 continue;
964 }
965 if trimmed.contains("unsafe {") || trimmed.starts_with("unsafe{") {
966 findings.push(
967 SecureFinding::new(
968 "unsafe_block",
969 "high",
970 "unsafe block detected; verify invariants and safety rationale",
971 )
972 .with_location(file.display().to_string(), (idx + 1) as u32),
973 );
974 }
975 }
976 findings
977}
978
979fn analyze_rust_raw_pointers(source: &str, file: &Path) -> Vec<SecureFinding> {
980 let mut findings = Vec::new();
981 for (idx, line) in source.lines().enumerate() {
982 let trimmed = line.trim();
983 if trimmed.starts_with("//") {
984 continue;
985 }
986 if trimmed.contains("std::ptr::")
987 || trimmed.contains("core::ptr::")
988 || trimmed.contains("ptr::read(")
989 || trimmed.contains("ptr::write(")
990 {
991 findings.push(
992 SecureFinding::new(
993 "raw_pointer",
994 "high",
995 "raw pointer operation detected; audit aliasing, lifetime, and bounds assumptions",
996 )
997 .with_location(file.display().to_string(), (idx + 1) as u32),
998 );
999 }
1000 }
1001 findings
1002}
1003
1004fn analyze_rust_bounds(source: &str, file: &Path) -> Vec<SecureFinding> {
1005 let mut findings = Vec::new();
1006 let skip_test_only = super::vuln::is_rust_test_file(file);
1007
1008 for (idx, line) in source.lines().enumerate() {
1009 let trimmed = line.trim();
1010 if trimmed.starts_with("//") {
1011 continue;
1012 }
1013
1014 if !skip_test_only && trimmed.contains(".unwrap()") {
1015 findings.push(
1016 SecureFinding::new(
1017 "unwrap",
1018 "medium",
1019 "unwrap() call in non-test code may panic at runtime",
1020 )
1021 .with_location(file.display().to_string(), (idx + 1) as u32),
1022 );
1023 }
1024
1025 if !skip_test_only && (trimmed.contains("todo!(") || trimmed.contains("unimplemented!(")) {
1026 findings.push(
1027 SecureFinding::new(
1028 "todo_marker",
1029 "low",
1030 "todo!/unimplemented! marker found in non-test Rust code",
1031 )
1032 .with_location(file.display().to_string(), (idx + 1) as u32),
1033 );
1034 }
1035 }
1036
1037 findings
1038}
1039
1040// =============================================================================
1041// Text Output
1042// =============================================================================
1043
1044fn format_text_report(report: &SecureReport) -> String {
1045 let mut output = String::new();
1046
1047 output.push_str(&"=".repeat(60));
1048 output.push('\n');
1049 output.push_str(&format!(
1050 "{}\n",
1051 "SECURE - Security Analysis Dashboard".bold()
1052 ));
1053 output.push_str(&"=".repeat(60));
1054 output.push_str("\n\n");
1055 output.push_str(&format!("Path: {}\n\n", report.path));
1056
1057 if report.findings.is_empty() {
1058 output.push_str(&format!("{}\n", "No security issues found.".green()));
1059 } else {
1060 output.push_str(&format!(
1061 "{}\n",
1062 "Severity | Category | Description".bold()
1063 ));
1064 output.push_str(&format!("{}\n", "-".repeat(60)));
1065
1066 for finding in &report.findings {
1067 let severity_colored = match finding.severity.as_str() {
1068 "critical" => finding.severity.red().bold().to_string(),
1069 "high" => finding.severity.red().to_string(),
1070 "medium" => finding.severity.yellow().to_string(),
1071 "low" => finding.severity.blue().to_string(),
1072 _ => finding.severity.clone(),
1073 };
1074 output.push_str(&format!(
1075 "{:>8} | {:<14} | {}\n",
1076 severity_colored, finding.category, finding.description
1077 ));
1078 if !finding.file.is_empty() {
1079 output.push_str(&format!(
1080 " | | {}:{}\n",
1081 finding.file, finding.line
1082 ));
1083 }
1084 }
1085 }
1086
1087 output.push('\n');
1088 output.push_str(&format!("{}\n", "Summary:".bold()));
1089 output.push_str(&format!(
1090 " Taint issues: {} ({} critical)\n",
1091 report.summary.taint_count, report.summary.taint_critical
1092 ));
1093 output.push_str(&format!(
1094 " Resource leaks: {}\n",
1095 report.summary.leak_count
1096 ));
1097 output.push_str(&format!(
1098 " Bounds warnings: {}\n",
1099 report.summary.bounds_warnings
1100 ));
1101 output.push_str(&format!(
1102 " Behavioral: {}\n",
1103 report.summary.behavioral_count
1104 ));
1105 output.push_str(&format!(
1106 " Missing contracts: {}\n",
1107 report.summary.missing_contracts
1108 ));
1109 output.push_str(&format!(
1110 " Mutable params: {}\n",
1111 report.summary.mutable_params
1112 ));
1113 output.push_str(&format!(
1114 " Unsafe blocks: {}\n",
1115 report.summary.unsafe_blocks
1116 ));
1117 output.push_str(&format!(
1118 " Raw pointer ops: {}\n",
1119 report.summary.raw_pointer_ops
1120 ));
1121 output.push_str(&format!(
1122 " Unwrap calls: {}\n",
1123 report.summary.unwrap_calls
1124 ));
1125 output.push_str(&format!(
1126 " Todo markers: {}\n",
1127 report.summary.todo_markers
1128 ));
1129 output.push('\n');
1130 output.push_str(&format!("Elapsed: {:.2}ms\n", report.total_elapsed_ms));
1131
1132 output
1133}
1134
1135#[cfg(test)]
1136mod tests {
1137 use super::*;
1138 use tempfile::TempDir;
1139 use tree_sitter::Parser;
1140
1141 fn create_test_file(dir: &TempDir, name: &str, content: &str) -> PathBuf {
1142 let path = dir.path().join(name);
1143 fs::write(&path, content).unwrap();
1144 path
1145 }
1146
1147 #[test]
1148 fn test_secure_args_default() {
1149 // Test that default values are set correctly
1150 let args = SecureArgs {
1151 path: PathBuf::from("test.py"),
1152 lang: None,
1153 detail: None,
1154 quick: false,
1155 output: None,
1156 no_default_ignore: false,
1157 include_tests: false,
1158 };
1159 assert!(!args.quick);
1160 assert!(!args.include_tests);
1161 }
1162
1163 #[test]
1164 fn test_severity_order() {
1165 assert!(severity_order("critical") < severity_order("high"));
1166 assert!(severity_order("high") < severity_order("medium"));
1167 assert!(severity_order("medium") < severity_order("low"));
1168 assert!(severity_order("low") < severity_order("info"));
1169 }
1170
1171 #[test]
1172 fn test_taint_analysis_finds_sql_injection() {
1173 // SECURE-TAINT-AGGREGATOR-V1: routes through canonical
1174 // `scan_vulnerabilities` which requires a real source-to-sink
1175 // flow (not just a literal f-string in a sink). This fixture
1176 // models a Flask request → cursor.execute flow that the
1177 // canonical taint engine reports.
1178 let temp = TempDir::new().unwrap();
1179 let source = r#"
1180from flask import request
1181import sqlite3
1182
1183def query():
1184 user_input = request.args.get("name")
1185 conn = sqlite3.connect("db")
1186 cursor = conn.cursor()
1187 cursor.execute("SELECT * FROM users WHERE name = '" + user_input + "'")
1188"#;
1189 let path = create_test_file(&temp, "vuln.py", source);
1190
1191 let mut parser = Parser::new();
1192 parser
1193 .set_language(&tree_sitter_python::LANGUAGE.into())
1194 .unwrap();
1195 let tree = parser.parse(source, None).unwrap();
1196
1197 let findings = analyze_taint(tree.root_node(), source, &path);
1198 assert!(
1199 !findings.is_empty(),
1200 "Should detect SQL injection from request.args -> cursor.execute"
1201 );
1202 assert!(findings.iter().all(|f| f.category == "taint"));
1203 }
1204
1205 /// SECURE-TAINT-AGGREGATOR-V1: secure↔vuln aggregation parity guard.
1206 ///
1207 /// The canonical `scan_vulnerabilities` pipeline is the single
1208 /// source of truth for taint findings. `tldr secure` MUST surface
1209 /// the same finding count as `tldr vuln` on the same path —
1210 /// previously secure ran a substring-only matcher that missed
1211 /// every real source-to-sink flow and reported `taint_count: 0`
1212 /// while `vuln` reported N>0 on the same file.
1213 #[test]
1214 fn test_secure_taint_count_matches_vuln_findings() {
1215 let temp = TempDir::new().unwrap();
1216 // Fixture with a real Flask-style taint flow: HTTP param ->
1217 // subprocess.call (CommandInjection) and HTTP param ->
1218 // cursor.execute (SqlInjection-via-string-concat).
1219 let source = r#"
1220from flask import request
1221import subprocess
1222import sqlite3
1223
1224def cmd():
1225 user = request.args.get("user")
1226 subprocess.call("echo " + user, shell=True)
1227
1228def sql():
1229 name = request.args.get("name")
1230 conn = sqlite3.connect("db")
1231 cur = conn.cursor()
1232 cur.execute("SELECT * FROM users WHERE name='" + name + "'")
1233"#;
1234 let path = create_test_file(&temp, "flow.py", source);
1235
1236 // Canonical pipeline (same call path tldr vuln uses).
1237 let vuln_report =
1238 tldr_core::security::vuln::scan_vulnerabilities(&path, None, None).unwrap();
1239 let vuln_count = vuln_report.findings.len();
1240 assert!(
1241 vuln_count > 0,
1242 "Fixture must produce >=1 canonical finding (got 0 - fixture is wrong)"
1243 );
1244
1245 // secure's taint analysis on the same file.
1246 let mut parser = Parser::new();
1247 parser
1248 .set_language(&tree_sitter_python::LANGUAGE.into())
1249 .unwrap();
1250 let tree = parser.parse(source, None).unwrap();
1251 let secure_findings = analyze_taint(tree.root_node(), source, &path);
1252
1253 assert_eq!(
1254 secure_findings.len(),
1255 vuln_count,
1256 "secure taint findings must match vuln finding count exactly \
1257 (secure={}, vuln={}). secure uses canonical scan_vulnerabilities \
1258 pipeline.",
1259 secure_findings.len(),
1260 vuln_count
1261 );
1262 assert!(secure_findings.iter().all(|f| f.category == "taint"));
1263 }
1264
1265 /// RUST-SECURE-TAINT-AGGREGATOR-V2: Rust-specific secure↔vuln aggregation
1266 /// parity guard. Pre-V2, `analyze_taint` short-circuited on `.rs` files
1267 /// to ONLY the unsafe-block line scanner, dropping every canonical
1268 /// taint finding. `tldr vuln --lang rust file.rs` reported N>0
1269 /// CommandInjection/SqlInjection findings while `tldr secure --lang rust
1270 /// file.rs` reported `taint_count: 0`. Surfaced by the v0.2.x 17-language
1271 /// sweep — Rust was the only language failing
1272 /// `secure.taint_count == vuln.findings.length` parity (16/17 passed).
1273 ///
1274 /// Post-V2: secure.taint_count (category="taint") MUST equal
1275 /// vuln.findings.length on Rust. Unsafe-block findings are still
1276 /// emitted but counted under summary.unsafe_blocks, not taint_count.
1277 #[test]
1278 fn test_secure_taint_count_matches_vuln_rust() {
1279 let temp = TempDir::new().unwrap();
1280 // Real source-to-sink command-injection flow in Rust: env input
1281 // (untrusted source) flowing into Command::new(...).arg(...).output()
1282 // (sink). Mirrors `command_injection_positive.rs` from
1283 // `vuln_migration_v1` fixtures.
1284 let source = r#"
1285use std::env;
1286use std::process::Command;
1287
1288fn run() {
1289 let user = env::var("USER_INPUT").unwrap();
1290 let output = Command::new("sh").arg("-c").arg(&user).output();
1291 let _ = output;
1292}
1293"#;
1294 let path = create_test_file(&temp, "cmd_inj.rs", source);
1295
1296 // Canonical pipeline (same call path tldr vuln uses).
1297 let vuln_report =
1298 tldr_core::security::vuln::scan_vulnerabilities(&path, None, None).unwrap();
1299 let vuln_count = vuln_report.findings.len();
1300 assert!(
1301 vuln_count > 0,
1302 "Fixture must produce >=1 canonical Rust finding (got 0 - fixture is wrong)"
1303 );
1304
1305 // secure's taint analysis on the same Rust file.
1306 let mut parser = Parser::new();
1307 parser
1308 .set_language(&tree_sitter_rust::LANGUAGE.into())
1309 .unwrap();
1310 let tree = parser.parse(source, None).unwrap();
1311 let secure_findings = analyze_taint(tree.root_node(), source, &path);
1312
1313 // Filter to category="taint" — that's what summary.taint_count counts.
1314 // (analyze_taint may also include category="unsafe_block" findings
1315 // for Rust, which feed summary.unsafe_blocks, not taint_count.)
1316 let taint_findings: Vec<_> = secure_findings
1317 .iter()
1318 .filter(|f| f.category == "taint")
1319 .collect();
1320
1321 assert_eq!(
1322 taint_findings.len(),
1323 vuln_count,
1324 "secure taint findings (category=\"taint\") must match vuln \
1325 finding count exactly on Rust (secure_taint={}, vuln={}). \
1326 RUST-SECURE-TAINT-AGGREGATOR-V2 routes Rust through the \
1327 canonical scan_vulnerabilities pipeline, same as tldr vuln.",
1328 taint_findings.len(),
1329 vuln_count
1330 );
1331 }
1332
1333 #[test]
1334 fn test_resource_analysis_finds_leak() {
1335 let source = r#"
1336def read_file():
1337 f = open("test.txt")
1338 data = f.read()
1339 return data
1340"#;
1341
1342 let mut parser = Parser::new();
1343 parser
1344 .set_language(&tree_sitter_python::LANGUAGE.into())
1345 .unwrap();
1346 let tree = parser.parse(source, None).unwrap();
1347
1348 let findings = analyze_resources(tree.root_node(), source, &PathBuf::from("test.py"));
1349 assert!(!findings.is_empty(), "Should detect resource leak");
1350 }
1351
1352 #[test]
1353 fn test_resource_analysis_no_leak_with_context() {
1354 let source = r#"
1355def read_file():
1356 with open("test.txt") as f:
1357 data = f.read()
1358 return data
1359"#;
1360
1361 let mut parser = Parser::new();
1362 parser
1363 .set_language(&tree_sitter_python::LANGUAGE.into())
1364 .unwrap();
1365 let tree = parser.parse(source, None).unwrap();
1366
1367 let findings = analyze_resources(tree.root_node(), source, &PathBuf::from("test.py"));
1368 assert!(
1369 findings.is_empty(),
1370 "Should not detect leak with context manager"
1371 );
1372 }
1373
1374 #[test]
1375 fn test_collect_files_includes_rust() {
1376 let temp = TempDir::new().unwrap();
1377 create_test_file(&temp, "sample.py", "print('ok')");
1378 create_test_file(&temp, "lib.rs", "fn main() {}");
1379 create_test_file(&temp, "notes.txt", "ignore");
1380
1381 let files = collect_files(temp.path(), None, false).unwrap();
1382 assert!(files.iter().any(|f| f.ends_with("sample.py")));
1383 assert!(files.iter().any(|f| f.ends_with("lib.rs")));
1384 assert!(!files.iter().any(|f| f.ends_with("notes.txt")));
1385 }
1386
1387 #[test]
1388 fn test_rust_secure_metrics_detected() {
1389 let source = r#"
1390use std::ptr;
1391
1392fn risky(user: &str) {
1393 unsafe { ptr::write(user.as_ptr() as *mut u8, b'x'); }
1394 let _v = Some(user).unwrap();
1395 todo!("finish hardening");
1396}
1397"#;
1398 let mut parser = Parser::new();
1399 parser
1400 .set_language(&tree_sitter_rust::LANGUAGE.into())
1401 .unwrap();
1402 let tree = parser.parse(source, None).unwrap();
1403 let file = PathBuf::from("src/lib.rs");
1404
1405 let taint_findings = analyze_taint(tree.root_node(), source, &file);
1406 let resource_findings = analyze_resources(tree.root_node(), source, &file);
1407 let bounds_findings = analyze_bounds(tree.root_node(), source, &file);
1408
1409 assert!(!taint_findings.is_empty(), "Should count unsafe blocks");
1410 assert!(
1411 !resource_findings.is_empty(),
1412 "Should count raw pointer ops"
1413 );
1414 assert!(
1415 bounds_findings.iter().any(|f| f.category == "unwrap"),
1416 "Should count unwrap calls"
1417 );
1418 assert!(
1419 bounds_findings.iter().any(|f| f.category == "todo_marker"),
1420 "Should count todo markers"
1421 );
1422 }
1423
1424 /// SECURE-FASTPATH-V1 (M-Z8): the file partition step must drop
1425 /// oversize / auto-generated files BEFORE the per-analysis
1426 /// `read_to_string_tolerant` loop, mirroring the policy applied
1427 /// by `vuln.rs::analyze_file` (M-Y3) and `api_check.rs::analyze_file`
1428 /// (M-Z4). Pre-fix, `tldr secure --lang typescript /tmp/repos/ts-dom-gen`
1429 /// ran 154 s because the 2.3 MB `dom.generated.d.ts` was read 6
1430 /// times (once per sub-analysis) and parsed 6 times into a
1431 /// tree-sitter AST. The fastpath skips it on the FIRST stat call.
1432 ///
1433 /// Test fixture: a synthetic `.d.ts` file padded over the 512 KB
1434 /// auto-gen cap (`MAX_AUTOGEN_FILE_SIZE_BYTES`). Asserts:
1435 /// 1. The file is dropped from the kept set.
1436 /// 2. `files_skipped` is incremented.
1437 /// 3. The warning carries the documented oversize shape so
1438 /// consumers can distinguish oversize from UTF-8 skips.
1439 #[test]
1440 fn test_secure_skips_oversize_files() {
1441 use tldr_core::fs::oversize::MAX_AUTOGEN_FILE_SIZE_BYTES;
1442
1443 let temp = TempDir::new().unwrap();
1444
1445 // Padded content that exceeds the auto-gen cap. Use a `.d.ts`
1446 // suffix so the auto-gen 512 KB cap applies (rather than the
1447 // 10 MB source-file cap, which would force a many-MB fixture).
1448 let mut padded = String::with_capacity(MAX_AUTOGEN_FILE_SIZE_BYTES as usize + 1024);
1449 padded.push_str("export type Generated = {\n");
1450 // A line that is harmless but heavy enough to cross the cap.
1451 let line = " member_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx: string;\n";
1452 while (padded.len() as u64) < MAX_AUTOGEN_FILE_SIZE_BYTES + 1024 {
1453 padded.push_str(line);
1454 }
1455 padded.push_str("};\n");
1456 let big = create_test_file(&temp, "dom.generated.d.ts", &padded);
1457
1458 // Sanity: confirm we actually exceeded the cap (otherwise the
1459 // test would be a no-op false-positive).
1460 let size = std::fs::metadata(&big).unwrap().len();
1461 assert!(
1462 size > MAX_AUTOGEN_FILE_SIZE_BYTES,
1463 "fixture must exceed auto-gen cap (size={}, cap={})",
1464 size,
1465 MAX_AUTOGEN_FILE_SIZE_BYTES
1466 );
1467
1468 // Also include a small, in-policy `.ts` file so we can verify
1469 // the partition continues past the oversize skip rather than
1470 // short-circuiting.
1471 let small = create_test_file(
1472 &temp,
1473 "ok.ts",
1474 "export function f(x: string): string { return x; }\n",
1475 );
1476
1477 let (kept, warnings, files_skipped) =
1478 partition_utf8_clean(&[big.clone(), small.clone()]);
1479
1480 // 1. Oversize file is dropped from the kept set.
1481 assert!(
1482 !kept.iter().any(|p| p == &big),
1483 "oversize .d.ts must be dropped from kept set: kept={:?}",
1484 kept
1485 );
1486 // The small in-policy file is preserved.
1487 assert!(
1488 kept.iter().any(|p| p == &small),
1489 "small in-policy .ts must be preserved: kept={:?}",
1490 kept
1491 );
1492
1493 // 2. files_skipped reflects the oversize drop.
1494 assert_eq!(
1495 files_skipped, 1,
1496 "files_skipped must count the oversize drop (got {})",
1497 files_skipped
1498 );
1499
1500 // 3. Warning carries the documented oversize shape, distinct
1501 // from the UTF-8 "invalid UTF-8 at byte" shape.
1502 let oversize_warning = warnings
1503 .iter()
1504 .find(|w| w.contains("dom.generated.d.ts"))
1505 .expect("must emit a warning for the oversize file");
1506 assert!(
1507 oversize_warning.contains("exceeds")
1508 && oversize_warning.contains("cap for")
1509 && oversize_warning.contains("auto-generated/minified files"),
1510 "oversize warning must use the format_oversize_warning shape \
1511 (got: {})",
1512 oversize_warning
1513 );
1514 }
1515
1516 // =========================================================================
1517 // SECURE-TEST-FILE-SUPPRESSION-V1 (M-Z10) — vuln/secure parity tests
1518 // =========================================================================
1519
1520 /// Build a SecureReport JSON file by running `secure::run` against a
1521 /// temp directory containing the supplied files. Returns the parsed
1522 /// JSON value for assertion.
1523 fn run_secure_to_json(
1524 path: &Path,
1525 lang: Language,
1526 include_tests: bool,
1527 ) -> serde_json::Value {
1528 let temp_out = TempDir::new().unwrap();
1529 let out_path = temp_out.path().join("report.json");
1530 let args = SecureArgs {
1531 path: path.to_path_buf(),
1532 lang: Some(lang),
1533 detail: None,
1534 // Quick mode: only run taint/resources/bounds, sufficient for
1535 // the suppression assertion and faster.
1536 quick: true,
1537 output: Some(out_path.clone()),
1538 no_default_ignore: false,
1539 include_tests,
1540 };
1541 run(args, OutputFormat::Json).expect("secure::run should succeed");
1542 let raw = fs::read_to_string(&out_path).expect("report file must exist");
1543 serde_json::from_str(&raw).expect("report must be valid JSON")
1544 }
1545
1546 /// SECURE-TEST-FILE-SUPPRESSION-V1: default scan must suppress
1547 /// findings emitted from JS/TS test files, mirroring `tldr vuln`'s
1548 /// M-X3 mask. Without this, vuln/secure parity breaks: vuln=1
1549 /// finding (source-only), secure.taint=2 findings (source + test).
1550 #[test]
1551 fn test_secure_default_suppresses_js_test_files() {
1552 let temp = TempDir::new().unwrap();
1553
1554 // Source file with a real source-to-sink reflected-XSS flow
1555 // (req.query -> res.send). Same shape as the
1556 // `vuln_migration_v1/javascript/xss_positive.js` fixture, which
1557 // the canonical taint engine is known to report on.
1558 let source_js = r#"export function handler(req, res, db) {
1559 const name = req.query.name;
1560 res.send("<h1>" + name + "</h1>");
1561}
1562"#;
1563 // Test file with the SAME flow shape, placed under `test/` so
1564 // it matches `is_js_test_file`. The canonical taint engine
1565 // emits a finding here too; without the suppression filter
1566 // this would inflate `secure.taint` past `vuln.findings`.
1567 let test_js = r#"export function handler(req, res, db) {
1568 const input = req.query.q;
1569 res.send("<p>" + input + "</p>");
1570}
1571"#;
1572
1573 // Layout:
1574 // <temp>/src/index.js (source — must produce a finding)
1575 // <temp>/test/app.test.js (test — must be suppressed)
1576 let src_dir = temp.path().join("src");
1577 let test_dir = temp.path().join("test");
1578 fs::create_dir_all(&src_dir).unwrap();
1579 fs::create_dir_all(&test_dir).unwrap();
1580 let src_path = src_dir.join("index.js");
1581 let test_path = test_dir.join("app.test.js");
1582 fs::write(&src_path, source_js).unwrap();
1583 fs::write(&test_path, test_js).unwrap();
1584
1585 let report = run_secure_to_json(temp.path(), Language::JavaScript, false);
1586
1587 // Pull out taint findings — the suppression target.
1588 let findings = report["findings"]
1589 .as_array()
1590 .expect("findings must be an array")
1591 .iter()
1592 .filter(|f| f["category"].as_str() == Some("taint"))
1593 .collect::<Vec<_>>();
1594
1595 // Pre-fix: at least one finding from the test file leaked
1596 // through. Post-fix: every taint finding's `file` MUST be the
1597 // source file (test/app.test.js suppressed entirely). Assert
1598 // by looking at unique file paths to be tolerant of canonical
1599 // engine emitting multiple findings per flow.
1600 assert!(
1601 !findings.is_empty(),
1602 "fixture must produce at least one taint finding (got 0 — fixture is wrong)"
1603 );
1604 let unique_files: std::collections::HashSet<&str> = findings
1605 .iter()
1606 .filter_map(|f| f["file"].as_str())
1607 .collect();
1608 assert_eq!(
1609 unique_files.len(),
1610 1,
1611 "default scan must suppress test-file findings — expected exactly 1 \
1612 unique file (the source), got {:?}",
1613 unique_files
1614 );
1615 let kept_file = unique_files.iter().next().unwrap();
1616 assert!(
1617 kept_file.ends_with("index.js"),
1618 "kept finding must come from the source file, got {:?}",
1619 kept_file
1620 );
1621 assert!(
1622 !kept_file.contains("/test/"),
1623 "kept finding must not come from a test path, got {:?}",
1624 kept_file
1625 );
1626 assert!(
1627 !findings
1628 .iter()
1629 .any(|f| f["file"].as_str().unwrap_or("").contains("/test/")),
1630 "no finding may originate from a test/ path; got: {:?}",
1631 findings.iter().map(|f| f["file"].clone()).collect::<Vec<_>>()
1632 );
1633 }
1634
1635 /// SECURE-TEST-FILE-SUPPRESSION-V1: `--include-tests` must restore
1636 /// the legacy emission set, surfacing findings from BOTH source and
1637 /// test files. Mirrors `tldr vuln --include-tests` semantics.
1638 #[test]
1639 fn test_secure_include_tests_emits_test_findings() {
1640 let temp = TempDir::new().unwrap();
1641
1642 let source_js = r#"export function handler(req, res, db) {
1643 const name = req.query.name;
1644 res.send("<h1>" + name + "</h1>");
1645}
1646"#;
1647 let test_js = r#"export function handler(req, res, db) {
1648 const input = req.query.q;
1649 res.send("<p>" + input + "</p>");
1650}
1651"#;
1652
1653 let src_dir = temp.path().join("src");
1654 let test_dir = temp.path().join("test");
1655 fs::create_dir_all(&src_dir).unwrap();
1656 fs::create_dir_all(&test_dir).unwrap();
1657 fs::write(src_dir.join("index.js"), source_js).unwrap();
1658 fs::write(test_dir.join("app.test.js"), test_js).unwrap();
1659
1660 let report = run_secure_to_json(temp.path(), Language::JavaScript, true);
1661
1662 let findings = report["findings"]
1663 .as_array()
1664 .expect("findings must be an array")
1665 .iter()
1666 .filter(|f| f["category"].as_str() == Some("taint"))
1667 .collect::<Vec<_>>();
1668
1669 // With --include-tests BOTH source and test findings surface.
1670 // Use unique-file-set semantics for tolerance to canonical-
1671 // engine multi-emission per flow.
1672 let unique_files: std::collections::HashSet<&str> = findings
1673 .iter()
1674 .filter_map(|f| f["file"].as_str())
1675 .collect();
1676 assert_eq!(
1677 unique_files.len(),
1678 2,
1679 "--include-tests must restore test-file emissions — expected 2 \
1680 unique files (source + test), got {:?}",
1681 unique_files
1682 );
1683 assert!(
1684 unique_files.iter().any(|f| f.ends_with("index.js")),
1685 "must include source-file finding: {:?}",
1686 unique_files
1687 );
1688 assert!(
1689 unique_files.iter().any(|f| f.contains("/test/") && f.ends_with(".test.js")),
1690 "must include test-file finding when --include-tests: {:?}",
1691 unique_files
1692 );
1693 }
1694
1695 /// Direct unit test for the in-place helper. Independent of the
1696 /// `run()` pipeline so a regression in suppression semantics is
1697 /// caught at the predicate-application boundary.
1698 #[test]
1699 fn test_apply_test_file_suppression_filters_js_and_rust_test_paths() {
1700 let mk = |file: &str| SecureFinding::new("taint", "high", "x").with_location(file, 1);
1701
1702 let mut findings = vec![
1703 mk("/abs/src/index.js"), // keep
1704 mk("/abs/test/app.test.js"), // drop (js test path)
1705 mk("/abs/lib/foo.spec.ts"), // drop (js spec suffix)
1706 mk("/abs/__tests__/x.tsx"), // drop (js __tests__)
1707 mk("/abs/crates/foo/tests/it.rs"), // drop (rust /tests/)
1708 mk("/abs/crates/foo/src/lib.rs"), // keep
1709 mk("/abs/crates/foo/src/foo_test.rs"), // drop (rust _test.rs)
1710 // Fixture exemption — must NOT be dropped (vuln_migration_v1
1711 // suite depends on this exemption being preserved).
1712 mk("/abs/crates/tldr-cli/tests/fixtures/vuln_migration_v1/javascript/x.js"),
1713 ];
1714
1715 apply_test_file_suppression(&mut findings);
1716
1717 let kept: Vec<_> = findings.iter().map(|f| f.file.clone()).collect();
1718 // Expected kept: 2 source files (index.js, lib.rs) + 1 fixture =
1719 // 3. The 5 dropped: app.test.js, foo.spec.ts, x.tsx, it.rs,
1720 // foo_test.rs.
1721 assert_eq!(
1722 kept.len(),
1723 3,
1724 "expected 3 kept (2 source + 1 fixture), got {:?}",
1725 kept
1726 );
1727 assert!(kept.iter().any(|f| f.ends_with("/src/index.js")));
1728 assert!(kept.iter().any(|f| f.ends_with("/src/lib.rs")));
1729 assert!(kept.iter().any(|f| f.contains("/fixtures/")));
1730 // Negative assertions — none of the dropped paths should remain.
1731 assert!(!kept.iter().any(|f| f.ends_with("/app.test.js")));
1732 assert!(!kept.iter().any(|f| f.ends_with("/foo.spec.ts")));
1733 assert!(!kept.iter().any(|f| f.ends_with("/__tests__/x.tsx")));
1734 assert!(!kept.iter().any(|f| f.ends_with("/tests/it.rs")));
1735 assert!(!kept.iter().any(|f| f.ends_with("/foo_test.rs")));
1736 }
1737}