Skip to main content

tldr_cli/commands/remaining/
secure.rs

1//! Secure Command - Security Analysis Dashboard
2//!
3//! Aggregates security sub-analyses (taint, resources, bounds, contracts,
4//! behavioral, mutability) into a severity-sorted security report.
5//!
6//! # Sub-analyses
7//!
8//! - `taint`: Detect data flow from untrusted sources to sensitive sinks
9//! - `resources`: Detect resource leaks (files, connections)
10//! - `bounds`: Detect potential buffer overflows and bounds issues
11//! - `contracts`: Analyze pre/postconditions (full mode only)
12//! - `behavioral`: Analyze exception handling and state transitions (full mode only)
13//! - `mutability`: Detect mutable parameter issues (full mode only)
14//!
15//! # Quick Mode
16//!
17//! Quick mode (`--quick`) runs only the fast analyses:
18//! - taint, resources, bounds
19//!
20//! Full mode adds:
21//! - contracts, behavioral, mutability
22//!
23//! # Example
24//!
25//! ```bash
26//! # Analyze a file
27//! tldr secure src/app.py
28//!
29//! # Quick mode (faster)
30//! tldr secure src/app.py --quick
31//!
32//! # Show detail for sub-analysis
33//! tldr secure src/app.py --detail taint
34//!
35//! # Text output
36//! tldr secure src/app.py -f text
37//! ```
38
39use std::collections::HashMap;
40use std::fs;
41use std::path::{Path, PathBuf};
42use std::time::Instant;
43
44use clap::Args;
45use colored::Colorize;
46use serde_json::Value;
47use tldr_core::fs::{read_to_string_tolerant, ReadOutcome};
48use tldr_core::walker::ProjectWalker;
49use tldr_core::Language;
50use tree_sitter::Node;
51
52use crate::output::OutputFormat;
53
54use super::ast_cache::AstCache;
55use super::error::{RemainingError, RemainingResult};
56use super::types::{SecureFinding, SecureReport, SecureSummary};
57
58// =============================================================================
59// Security Analysis Types
60// =============================================================================
61
62/// Security sub-analysis types
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub enum SecurityAnalysis {
65    Taint,
66    Resources,
67    Bounds,
68    Contracts,
69    Behavioral,
70    Mutability,
71}
72
73impl SecurityAnalysis {
74    /// Get the analysis name
75    pub fn name(&self) -> &'static str {
76        match self {
77            Self::Taint => "taint",
78            Self::Resources => "resources",
79            Self::Bounds => "bounds",
80            Self::Contracts => "contracts",
81            Self::Behavioral => "behavioral",
82            Self::Mutability => "mutability",
83        }
84    }
85}
86
87/// Quick mode analyses (fast)
88pub const QUICK_ANALYSES: &[SecurityAnalysis] = &[
89    SecurityAnalysis::Taint,
90    SecurityAnalysis::Resources,
91    SecurityAnalysis::Bounds,
92];
93
94/// Full mode analyses (all)
95pub const FULL_ANALYSES: &[SecurityAnalysis] = &[
96    SecurityAnalysis::Taint,
97    SecurityAnalysis::Resources,
98    SecurityAnalysis::Bounds,
99    SecurityAnalysis::Contracts,
100    SecurityAnalysis::Behavioral,
101    SecurityAnalysis::Mutability,
102];
103
104// =============================================================================
105// CLI Arguments
106// =============================================================================
107
108/// Security analysis dashboard aggregating multiple security checks
109#[derive(Debug, Args, Clone)]
110pub struct SecureArgs {
111    /// File path or directory to analyze
112    pub path: PathBuf,
113
114    /// Programming language to filter by (auto-detected if omitted)
115    #[arg(long, short = 'l')]
116    pub lang: Option<Language>,
117
118    /// Show details for specific sub-analysis
119    #[arg(long)]
120    pub detail: Option<String>,
121
122    /// Run quick mode (taint, resources, bounds only)
123    #[arg(long)]
124    pub quick: bool,
125
126    /// Write output to file instead of stdout
127    #[arg(long, short = 'o')]
128    pub output: Option<PathBuf>,
129
130    /// Walk vendored/build dirs (node_modules, target, dist, etc.) that would normally be skipped.
131    #[arg(long)]
132    pub no_default_ignore: bool,
133
134    /// Include findings on test files. Mirrors `tldr vuln --include-tests`
135    /// (M-X3 `js-test-file-suppression-v1`). Default: `false` — findings
136    /// emitted from JS/TS test files (paths under `test/`, `tests/`,
137    /// `__tests__/`, or filenames ending in `.test.{js,ts,jsx,tsx}`,
138    /// `.spec.{js,ts,jsx,tsx}`, or `.e2e.{js,ts}`) and Rust test files
139    /// (paths under `/tests/` or filenames ending in `_test.rs` /
140    /// `tests.rs`) are suppressed because they exercise sink behavior on
141    /// synthetic inputs and pollute production-codebase scans. Pass
142    /// `--include-tests` to restore them. Mirrors the `--include-smells`
143    /// precedent (opt-in for noisy categories).
144    #[arg(long)]
145    pub include_tests: bool,
146}
147
148impl SecureArgs {
149    /// Run the secure command with CLI-provided format
150    pub fn run(&self, format: OutputFormat) -> anyhow::Result<()> {
151        run(self.clone(), format)
152    }
153}
154
155// =============================================================================
156// Implementation
157// =============================================================================
158
159/// Run the secure analysis
160pub fn run(args: SecureArgs, format: OutputFormat) -> anyhow::Result<()> {
161    let start = Instant::now();
162
163    // Validate path exists
164    if !args.path.exists() {
165        return Err(RemainingError::file_not_found(&args.path).into());
166    }
167
168    // Create report
169    let mut report = SecureReport::new(args.path.display().to_string());
170
171    // Initialize AST cache for shared parsing
172    let mut cache = AstCache::default();
173
174    // Determine which analyses to run
175    let analyses = if args.quick {
176        QUICK_ANALYSES
177    } else {
178        FULL_ANALYSES
179    };
180
181    // VULN-SECURE-AUTODETECT-PARITY-V1 (M-AA5): mirror `tldr vuln`'s
182    // language-resolution path so secure agrees with vuln on autodetect.
183    //
184    // Pre-fix: `tldr secure /tmp/repos/express` (no `--lang`) reported
185    // `summary.taint_count: 0` while `tldr vuln /tmp/repos/express`
186    // reported `findings: 1`. The discrepancy traced to secure's
187    // `collect_files` lacking the autodetect step: with `lang = None`,
188    // `is_supported_secure_file` matches only `py | rs`, so a JS-only
189    // tree (express) silently produced an empty file set.
190    //
191    // M-Z10 (`secure-test-file-suppression-v1`) made vuln+secure agree
192    // when `--lang` is EXPLICIT by mirroring the test-file suppression
193    // mask. M-AA5 closes the symmetric gap on the autodetect path:
194    //
195    //   1. If `--lang L` provided, honor it as-is.
196    //   2. Else, autodetect via `Language::from_directory` (M-AA1
197    //      `autodetect-dominant-language-v1` made this strict
198    //      extension-majority + manifest-priority).
199    //   3. If the detected language lies outside the natively-analyzed
200    //      set, error with `AutodetectUnsupported` (exit 2) — same
201    //      contract as vuln. This points the user at an explicit
202    //      `--lang` flag.
203    //
204    // The natively-analyzed set is canonical-pipeline-driven and lives
205    // in `vuln::is_natively_analyzed` (Python, Rust, TypeScript,
206    // JavaScript per M-Y3). Reusing it here keeps secure↔vuln gate
207    // semantics in lock-step: if vuln autodetect-rejects a tree, secure
208    // does too, with the same message.
209    let effective_lang: Option<Language> = match args.lang {
210        Some(l) => Some(l),
211        None => {
212            let detected = if args.path.is_dir() {
213                Language::from_directory(&args.path)
214            } else {
215                Language::from_path(&args.path)
216            };
217            if let Some(l) = detected {
218                if !super::vuln::is_natively_analyzed(l) {
219                    return Err(RemainingError::autodetect_unsupported(format!(
220                        "secure: taint analysis for {lang} is not yet supported by autodetect; \
221                         pass --lang {lang} explicitly to scan this file (the canonical taint \
222                         pipeline supports it). Autodetect-by-extension currently routes only \
223                         --lang python, --lang rust, --lang typescript, and --lang javascript; \
224                         other languages require an explicit --lang flag.",
225                        lang = l.as_str()
226                    ))
227                    .into());
228                }
229            }
230            detected
231        }
232    };
233
234    // Collect files to analyze (autodetected language drives the
235    // extension filter when --lang is omitted).
236    let candidate_files = collect_files(&args.path, effective_lang, args.no_default_ignore)?;
237
238    // SECURE-UTF8-TOLERANCE-V1: pre-filter for UTF-8 validity ONCE up front.
239    // The 6 sub-analyses (taint, resources, bounds, contracts, behavioral,
240    // mutability) each re-iterate the same files, so doing the read here
241    // (a) dedupes warnings (1 message per bad file, not 6) and
242    // (b) avoids each analysis having to know about the tolerance policy.
243    // The Luau parser-test corpus (`tests/conformance/literals.luau`,
244    // `pm.luau`, `sort.luau`) intentionally embeds raw 0xFF/0xFE bytes —
245    // pre-fix `tldr secure --lang luau /tmp/repos/luau-luau` aborted with
246    // `Error: stream did not contain valid UTF-8` on the first such file.
247    let (files, warnings, files_skipped) = partition_utf8_clean(&candidate_files);
248
249    // Run sub-analyses and collect findings
250    let mut all_findings = Vec::new();
251    let mut sub_results: HashMap<String, Value> = HashMap::new();
252
253    for analysis in analyses {
254        let (findings, raw_result) = run_security_analysis(*analysis, &files, &mut cache)?;
255
256        // Collect findings
257        all_findings.extend(findings);
258
259        // Store raw result if requested
260        if args.detail.as_deref() == Some(analysis.name()) {
261            sub_results.insert(analysis.name().to_string(), raw_result);
262        }
263    }
264
265    // SECURE-TEST-FILE-SUPPRESSION-V1 (M-Z10): mirror the test-file
266    // suppression policy from `tldr vuln` (M-X3
267    // `js-test-file-suppression-v1`). See `apply_test_file_suppression`.
268    if !args.include_tests {
269        apply_test_file_suppression(&mut all_findings);
270    }
271
272    // Sort findings by severity (critical first)
273    all_findings.sort_by(|a, b| severity_order(&a.severity).cmp(&severity_order(&b.severity)));
274
275    // WRAPPER-CROSS-CONSISTENCY-V1 (BUG-15, BUG-16): compute the summary
276    // counters from the FINAL `findings` array via category group-by,
277    // post-aggregation and post-sort. The previous implementation set
278    // `taint_count = findings.len()` inside the per-analysis update where
279    // `analyze_taint` on Rust files returns `category="unsafe_block"`
280    // findings — so `taint_count` ghosted to N while the findings array
281    // had zero `category=="taint"` entries (BUG-16). Group-by on the
282    // canonical findings array makes the summary match the array by
283    // construction.
284    report.summary = compute_summary_from_findings(&all_findings);
285
286    report.findings = all_findings;
287    report.sub_results = sub_results;
288    report.total_elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
289    // SECURE-UTF8-TOLERANCE-V1: surface skipped files in the report.
290    report.files_skipped = files_skipped;
291    report.warnings = warnings;
292
293    // Output
294    let output_str = match format {
295        OutputFormat::Json => serde_json::to_string_pretty(&report)?,
296        OutputFormat::Compact => serde_json::to_string(&report)?,
297        OutputFormat::Text => format_text_report(&report),
298        OutputFormat::Sarif | OutputFormat::Dot => {
299            // SARIF/DOT not fully supported for secure, fall back to JSON
300            serde_json::to_string_pretty(&report)?
301        }
302    };
303
304    // Write output
305    if let Some(output_path) = &args.output {
306        fs::write(output_path, &output_str)?;
307    } else {
308        println!("{}", output_str);
309    }
310
311    Ok(())
312}
313
314/// Collect supported files to analyze.
315fn collect_files(
316    path: &Path,
317    lang: Option<Language>,
318    no_default_ignore: bool,
319) -> RemainingResult<Vec<PathBuf>> {
320    let mut files = Vec::new();
321
322    if path.is_file() {
323        if is_supported_secure_file(path, lang) {
324            files.push(path.to_path_buf());
325        }
326    } else if path.is_dir() {
327        // Walk directory and collect supported source files.
328        let mut walker = ProjectWalker::new(path).max_depth(10);
329        if no_default_ignore {
330            walker = walker.no_default_ignore();
331        }
332        for entry in walker.iter() {
333            let p = entry.path();
334            if p.is_file() && is_supported_secure_file(p, lang) {
335                files.push(p.to_path_buf());
336            }
337        }
338    }
339
340    // Return empty vec if no files found (like vuln.rs does)
341    // The report will show 0 files scanned with no findings
342
343    Ok(files)
344}
345
346/// Check whether `path` is a source file the secure analyzer should scan.
347///
348/// With `lang = Some(L)`, only matches that language's extensions. With
349/// `lang = None`, preserves the historical behavior of `py | rs` (the
350/// languages the sub-analyzers natively support).
351fn is_supported_secure_file(path: &std::path::Path, lang: Option<Language>) -> bool {
352    let ext = match path.extension().and_then(|e| e.to_str()) {
353        Some(e) => e,
354        None => return false,
355    };
356    match lang {
357        Some(Language::TypeScript) => matches!(ext, "ts" | "tsx"),
358        Some(Language::JavaScript) => matches!(ext, "js" | "mjs" | "cjs" | "jsx"),
359        Some(Language::Python) => ext == "py",
360        Some(Language::Rust) => ext == "rs",
361        Some(Language::Go) => ext == "go",
362        Some(Language::Java) => ext == "java",
363        Some(Language::C) => matches!(ext, "c" | "h"),
364        Some(Language::Cpp) => matches!(ext, "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx"),
365        Some(Language::CSharp) => ext == "cs",
366        Some(Language::Ruby) => ext == "rb",
367        Some(Language::Php) => ext == "php",
368        Some(Language::Kotlin) => matches!(ext, "kt" | "kts"),
369        Some(Language::Swift) => ext == "swift",
370        Some(Language::Scala) => ext == "scala",
371        Some(Language::Elixir) => matches!(ext, "ex" | "exs"),
372        Some(Language::Lua) => ext == "lua",
373        Some(Language::Luau) => ext == "luau",
374        Some(Language::Ocaml) => matches!(ext, "ml" | "mli"),
375        None => matches!(ext, "py" | "rs"),
376    }
377}
378
379fn is_rust_file(path: &std::path::Path) -> bool {
380    matches!(path.extension().and_then(|e| e.to_str()), Some("rs"))
381}
382
383// `is_rust_test_file` was originally defined locally here; M-Z10
384// (`secure-test-file-suppression-v1`) consolidated it with vuln.rs by
385// promoting `vuln::is_rust_test_file` to `pub(super)` and reusing it
386// here. See `super::vuln::is_rust_test_file`. The behavior is identical
387// to the previous local impl (path component `/tests/` or filename
388// suffix `_test.rs` / `tests.rs`).
389
390/// Partition the candidate file set into clean (kept) and skipped files.
391///
392/// Two-stage filter:
393///
394/// 1. **Oversize / auto-gen pre-filter** (SECURE-FASTPATH-V1, M-Z8):
395///    defer to `tldr_core::fs::oversize::check_size` before reading the
396///    file. The 6 sub-analyses each iterate this file set and read the
397///    full content into memory; without a cap, a 2.3 MB
398///    `dom.generated.d.ts` (TypeScript DOM-gen baselines) dominates
399///    the wall clock — pre-fix `tldr secure --lang typescript
400///    /tmp/repos/ts-dom-gen` ran 154 s, dwarfing the rest of the
401///    repo's ~20 ms. Mirrors the policy applied in
402///    `vuln.rs::analyze_file` (covered by M-Y3
403///    `typescript-large-file-perf-v1`) and `api_check.rs::analyze_file`
404///    (covered by M-Z4 `fastpath-extend-non-vuln-v1`); central policy
405///    in `tldr_core::fs::oversize` enforces the 10 MB source-file cap
406///    and the 512 KB cap for `.d.ts` / `.min.js` / `.bundle.*`
407///    auto-generated artefacts.
408///
409/// 2. **UTF-8 tolerance** (SECURE-UTF8-TOLERANCE-V1, M-X5): pre-fix,
410///    `run_security_analysis` called `fs::read_to_string(file)?` which
411///    propagates the `Err(io::Error("stream did not contain valid
412///    UTF-8"))` returned by `String::from_utf8` for files like
413///    `tests/conformance/literals.luau` in the upstream luau-luau
414///    repo. That `?` aborted the entire scan on the first such file,
415///    so `tldr secure --lang luau /tmp/repos/luau-luau` failed with
416///    `Error: IO error: stream did not contain valid UTF-8` and
417///    exited 1, even though 111/114 files were perfectly scannable.
418///    Mirrors the policy already in
419///    `crates/tldr-core/src/surface/luau.rs`: skip with a structured
420///    warning, continue.
421///
422/// Both oversize and non-UTF-8 skips are counted under the returned
423/// `files_skipped` counter and surfaced via a structured warning.
424/// Genuine I/O errors (file vanished mid-scan) drop the file with a
425/// warning but are NOT counted as a skip — the `secure` walk is
426/// best-effort and one transient failure should not lose the rest.
427fn partition_utf8_clean(candidates: &[PathBuf]) -> (Vec<PathBuf>, Vec<String>, u32) {
428    use tldr_core::fs::oversize::{check_size, format_oversize_warning, SizeCheck};
429
430    let mut clean: Vec<PathBuf> = Vec::with_capacity(candidates.len());
431    let mut warnings: Vec<String> = Vec::new();
432    let mut skipped: u32 = 0;
433    for file in candidates {
434        // SECURE-FASTPATH-V1 (M-Z8): apply oversize cap BEFORE the read.
435        // `read_to_string_tolerant` reads the full file into memory, so
436        // a 2.3 MB `dom.generated.d.ts` would otherwise be loaded six
437        // times (once per sub-analysis read) and parsed once into a
438        // tree-sitter AST per analysis. The check_size stat call is
439        // O(1) and returns SizeCheck::Unknown for missing files
440        // (which then falls through to the existing read path and is
441        // handled there).
442        match check_size(file) {
443            SizeCheck::Oversize {
444                size_bytes,
445                max_bytes,
446                is_autogen,
447            } => {
448                skipped += 1;
449                warnings.push(format_oversize_warning(
450                    file,
451                    size_bytes,
452                    max_bytes,
453                    is_autogen,
454                ));
455                continue;
456            }
457            // WithinLimit | Unknown: proceed to the UTF-8 read below.
458            _ => {}
459        }
460
461        match read_to_string_tolerant(file) {
462            Ok(ReadOutcome::Ok(_)) => clean.push(file.clone()),
463            Ok(ReadOutcome::NonUtf8 { byte_offset }) => {
464                skipped += 1;
465                warnings.push(format!(
466                    "Skipped {}: invalid UTF-8 at byte {}",
467                    file.display(),
468                    byte_offset
469                ));
470            }
471            Err(e) => {
472                // Genuine I/O failure (permissions, vanished, etc.).
473                // Drop the file with a warning rather than aborting the
474                // whole scan. This is NOT counted under `files_skipped`,
475                // which is reserved for the UTF-8-tolerance policy and
476                // the oversize policy.
477                warnings.push(format!(
478                    "Skipped {}: I/O error: {}",
479                    file.display(),
480                    e
481                ));
482            }
483        }
484    }
485    (clean, warnings, skipped)
486}
487
488/// Run a specific security analysis on files
489fn run_security_analysis(
490    analysis: SecurityAnalysis,
491    files: &[PathBuf],
492    cache: &mut AstCache,
493) -> RemainingResult<(Vec<SecureFinding>, Value)> {
494    let mut findings = Vec::new();
495
496    for file in files {
497        // SECURE-UTF8-TOLERANCE-V1 (defense-in-depth): the file set was
498        // pre-filtered by `partition_utf8_clean` in `run`, so a clean
499        // read is the expected path. We still use the tolerant reader
500        // here so that a TOCTOU race (file replaced with non-UTF-8
501        // content between the partition pass and the analysis pass)
502        // skips the file instead of aborting the scan. No warning is
503        // emitted here — the partition pass owns warning emission to
504        // avoid duplicate messages across the 6 sub-analyses.
505        let source = match read_to_string_tolerant(file)? {
506            ReadOutcome::Ok(s) => s,
507            ReadOutcome::NonUtf8 { .. } => continue,
508        };
509
510        // Get or parse the AST
511        let tree = cache.get_or_parse(file, &source)?;
512
513        // Run analysis
514        let file_findings = match analysis {
515            SecurityAnalysis::Taint => analyze_taint(tree.root_node(), &source, file),
516            SecurityAnalysis::Resources => analyze_resources(tree.root_node(), &source, file),
517            SecurityAnalysis::Bounds => analyze_bounds(tree.root_node(), &source, file),
518            SecurityAnalysis::Contracts => analyze_contracts(tree.root_node(), &source, file),
519            SecurityAnalysis::Behavioral => analyze_behavioral(tree.root_node(), &source, file),
520            SecurityAnalysis::Mutability => analyze_mutability(tree.root_node(), &source, file),
521        };
522
523        findings.extend(file_findings);
524    }
525
526    // Create raw result
527    let raw_result = serde_json::to_value(&findings).unwrap_or(Value::Array(vec![]));
528
529    Ok((findings, raw_result))
530}
531
532/// SECURE-TEST-FILE-SUPPRESSION-V1 (M-Z10): in-place suppression of
533/// findings emitted from test files. Mirrors the post-analysis filter
534/// applied in `vuln.rs::VulnArgs::run` for `--include-tests`, restoring
535/// vuln↔secure parity (`tldr secure`'s taint findings count must match
536/// `tldr vuln`'s finding count on the same path).
537///
538/// Pre-fix on `/tmp/repos/express`:
539/// * `tldr vuln --lang javascript .` → 1 finding (index.js:21; the
540///   `test/app.engine.js:9` finding masked by M-X3 `is_js_test_file`).
541/// * `tldr secure --lang javascript . | jq '[.findings[]|select(.category=="taint")]'`
542///   → 2 findings (index.js + test/app.engine.js — secure ran the
543///   canonical taint pipeline but never applied the M-X3 mask, so the
544///   `test/app.engine.js` finding leaked through).
545///
546/// Reuses `super::vuln::is_js_test_file` (M-X3 helper: JS/TS path
547/// components + test-style filename suffixes, with a `/fixtures/`
548/// exemption that keeps `vuln_migration_v1` GREEN) and
549/// `super::vuln::is_rust_test_file` (Rust `/tests/` + `_test.rs` /
550/// `tests.rs` suffix). The Rust mask was already applied INSIDE
551/// `analyze_rust_bounds` for unwrap-style smell findings; this filter
552/// adds the symmetric mask for taint-class findings.
553///
554/// Runs BEFORE `compute_summary_from_findings` so the summary reflects
555/// the suppressed view (matches the WRAPPER-CROSS-CONSISTENCY-V1
556/// invariant: summary derives from the final findings array).
557fn apply_test_file_suppression(findings: &mut Vec<SecureFinding>) {
558    findings.retain(|f| {
559        let p = std::path::Path::new(&f.file);
560        // Fixture exemption: paths under a `fixtures/` directory must
561        // NOT be suppressed even when their ancestors include `tests/`
562        // (e.g. `crates/tldr-cli/tests/fixtures/vuln_migration_v1/...`).
563        // `is_js_test_file` already bakes this exemption in; we apply
564        // the same gate to the Rust predicate (which doesn't, since on
565        // the vuln side Rust file collection happens before the
566        // post-analysis filter and the fixture suite is JS/TS-only).
567        // Without this gate, finding-level Rust suppression would drop
568        // legitimate fixture findings on hypothetical Rust fixtures.
569        let in_fixtures =
570            f.file.contains("/fixtures/") || f.file.contains("\\fixtures\\");
571        if in_fixtures {
572            return true;
573        }
574        !super::vuln::is_js_test_file(p) && !super::vuln::is_rust_test_file(p)
575    });
576}
577
578/// Compute the summary by category group-by over the FINAL findings array.
579///
580/// WRAPPER-CROSS-CONSISTENCY-V1 (BUG-15, BUG-16): every `*_count` field
581/// derives from `findings[].category`, so the schema invariant
582/// `taint_count + leak_count + bounds_warnings + behavioral_count +
583///  unsafe_blocks + raw_pointer_ops + unwrap_calls + todo_markers +
584///  missing_contracts + mutable_params == findings.len()`
585/// holds by construction. `taint_critical` is a severity refinement of
586/// `taint_count` (subset, not its own category) and is excluded from the
587/// invariant.
588///
589/// Categories emitted by sub-analyzers (must remain in sync with the
590/// `analyze_*` functions below):
591/// - taint analysis: `taint` (Python/JS/etc.) | `unsafe_block` (Rust)
592/// - resource analysis: `resource_leak` (Python) | `raw_pointer` (Rust)
593/// - bounds analysis: `bounds` (Python) | `unwrap`, `todo_marker` (Rust)
594/// - behavioral analysis: `behavioral`
595/// - contracts analysis: `missing_contract` (placeholder, currently unused)
596/// - mutability analysis: `mutable_param` (placeholder, currently unused)
597fn compute_summary_from_findings(findings: &[SecureFinding]) -> SecureSummary {
598    let count_cat = |cat: &str| findings.iter().filter(|f| f.category == cat).count() as u32;
599
600    SecureSummary {
601        taint_count: count_cat("taint"),
602        taint_critical: findings
603            .iter()
604            .filter(|f| f.category == "taint" && f.severity == "critical")
605            .count() as u32,
606        leak_count: count_cat("resource_leak"),
607        bounds_warnings: count_cat("bounds"),
608        behavioral_count: count_cat("behavioral"),
609        missing_contracts: count_cat("missing_contract"),
610        mutable_params: count_cat("mutable_param"),
611        unsafe_blocks: count_cat("unsafe_block"),
612        raw_pointer_ops: count_cat("raw_pointer"),
613        unwrap_calls: count_cat("unwrap"),
614        todo_markers: count_cat("todo_marker"),
615    }
616}
617
618/// Get severity order (lower = more severe)
619fn severity_order(severity: &str) -> u8 {
620    match severity {
621        "critical" => 0,
622        "high" => 1,
623        "medium" => 2,
624        "low" => 3,
625        "info" => 4,
626        _ => 5,
627    }
628}
629
630// =============================================================================
631// Taint Analysis
632// =============================================================================
633
634/// Analyze taint flows in a file.
635///
636/// SECURE-TAINT-AGGREGATOR-V1: For non-Rust files this routes through the
637/// canonical `tldr_core::security::vuln::scan_vulnerabilities` pipeline —
638/// the same pipeline `tldr vuln` uses — so `secure.summary.taint_count`
639/// agrees with `tldr vuln`'s finding count.
640///
641/// RUST-SECURE-TAINT-AGGREGATOR-V2: For Rust files this now mirrors
642/// `tldr vuln`'s dual dispatch from `rust-vuln-taint-pipeline-v1`:
643/// canonical pipeline + line scanner with overlap dedup. The
644/// canonical findings AND the line-scanner SqlInjection /
645/// CommandInjection findings (the only line-scanner emissions that are
646/// taint-class — UnsafeCode/MemorySafety/Panic are smell-class and not
647/// counted under `summary.taint_count`) are emitted with
648/// `category = "taint"`. Unsafe-block findings retain
649/// `category = "unsafe_block"` (counted separately by
650/// `summary.unsafe_blocks`). Pre-V2, secure dropped ALL canonical Rust
651/// taint findings — `tldr vuln --lang rust file.rs` reported N>0
652/// findings while `tldr secure --lang rust file.rs` reported 0
653/// (BUG-17, surfaced by the 17-lang sweep).
654///
655/// The legacy substring-based `TAINT_SINKS` matcher (which produced 0
656/// findings on real flows because it could not see source-to-sink
657/// relationships) remains retired.
658fn analyze_taint(_root: Node, source: &str, file: &Path) -> Vec<SecureFinding> {
659    let (mut findings, canonical_lines) = canonical_taint_findings_with_index(file);
660    if is_rust_file(file) {
661        findings.extend(rust_line_scanner_taint_findings(
662            file,
663            source,
664            &canonical_lines,
665        ));
666        findings.extend(analyze_rust_unsafe_blocks(source, file));
667    }
668    findings
669}
670
671/// Run the Rust line scanner from `vuln.rs` and project ONLY its
672/// taint-class findings (SqlInjection, CommandInjection) onto
673/// `SecureFinding`s with `category = "taint"`. Non-taint smell-class
674/// emissions (UnsafeCode, MemorySafety, Panic) are dropped here — they
675/// are surfaced by the dedicated `analyze_rust_unsafe_blocks` /
676/// `analyze_rust_raw_pointers` / `analyze_rust_bounds` paths under
677/// their own categories.
678///
679/// `canonical_index` carries the `(line, core_VulnType)` tuples the
680/// canonical pipeline already produced for this file. SqlInjection /
681/// CommandInjection line-scanner findings whose `(line, vuln_type)` is
682/// already in the canonical index are dropped — same dedup predicate as
683/// `vuln.rs::dedupe_overlap`. This keeps secure↔vuln per-file counts
684/// equal: vuln applies the same dedup, so secure must too, otherwise
685/// secure would over-count when both layers report the same finding.
686///
687/// RUST-SECURE-TAINT-AGGREGATOR-V2: closes the
688/// `sql_injection_format_keyword_positive.rs` parity gap — the
689/// canonical Rust pipeline does not produce a SqlInjection finding for
690/// `format!("SELECT … {}", x)` (no real source-to-sink), but the line
691/// scanner does (per `rust-format-sql-fp-narrowing-v1`). For
692/// secure↔vuln directory-level parity, secure must include this.
693fn rust_line_scanner_taint_findings(
694    file: &Path,
695    source: &str,
696    canonical_index: &[(u32, tldr_core::security::vuln::VulnType)],
697) -> Vec<SecureFinding> {
698    use crate::commands::remaining::types::VulnType;
699
700    super::vuln::analyze_rust_file(file, source)
701        .into_iter()
702        .filter(|f| {
703            matches!(
704                f.vuln_type,
705                VulnType::SqlInjection | VulnType::CommandInjection
706            )
707        })
708        .filter(|f| {
709            // Mirrors `vuln.rs::dedupe_overlap`: drop line-scanner finding
710            // if canonical already covers `(line, vuln_type)`.
711            let core_ty = match f.vuln_type {
712                VulnType::SqlInjection => tldr_core::security::vuln::VulnType::SqlInjection,
713                VulnType::CommandInjection => {
714                    tldr_core::security::vuln::VulnType::CommandInjection
715                }
716                _ => return true,
717            };
718            !canonical_index
719                .iter()
720                .any(|(line, ty)| *line == f.line && *ty == core_ty)
721        })
722        .map(|f| {
723            let severity = match f.severity {
724                crate::commands::remaining::types::Severity::Critical => "critical",
725                crate::commands::remaining::types::Severity::High => "high",
726                crate::commands::remaining::types::Severity::Medium => "medium",
727                crate::commands::remaining::types::Severity::Low => "low",
728                _ => "medium",
729            };
730            let description = format!("{:?}: {}", f.vuln_type, f.description);
731            SecureFinding::new("taint", severity, description).with_location(f.file, f.line)
732        })
733        .collect()
734}
735
736/// Run the canonical `scan_vulnerabilities` pipeline on a single file and
737/// project the resulting `VulnFinding`s onto `SecureFinding`s with
738/// `category = "taint"`. Returns both the projected findings AND the
739/// set of `(line, core_VulnType)` tuples covered by canonical — used by
740/// the Rust line-scanner path to dedupe overlap (SqlInjection,
741/// CommandInjection on the same line). Mirrors
742/// `vuln.rs::dedupe_overlap`.
743///
744/// Runs for ALL extensions including `.rs`
745/// (RUST-SECURE-TAINT-AGGREGATOR-V2 — mirrors `tldr vuln`'s
746/// canonical-for-all-languages dispatch from
747/// `rust-vuln-taint-pipeline-v1`).
748fn canonical_taint_findings_with_index(
749    file: &Path,
750) -> (
751    Vec<SecureFinding>,
752    Vec<(u32, tldr_core::security::vuln::VulnType)>,
753) {
754    let report = match tldr_core::security::vuln::scan_vulnerabilities(file, None, None) {
755        Ok(r) => r,
756        Err(_) => return (Vec::new(), Vec::new()),
757    };
758
759    let index: Vec<(u32, tldr_core::security::vuln::VulnType)> = report
760        .findings
761        .iter()
762        .map(|f| (f.sink.line, f.vuln_type))
763        .collect();
764
765    let findings = report
766        .findings
767        .into_iter()
768        .map(|f| {
769            let severity = match f.severity.to_uppercase().as_str() {
770                "CRITICAL" => "critical",
771                "HIGH" => "high",
772                "MEDIUM" => "medium",
773                "LOW" => "low",
774                _ => "medium",
775            };
776            let description = format!(
777                "{:?}: {} with unsanitized input from {}",
778                f.vuln_type, f.sink.sink_type, f.source.source_type
779            );
780            SecureFinding::new("taint", severity, description)
781                .with_location(f.file.display().to_string(), f.sink.line)
782        })
783        .collect();
784
785    (findings, index)
786}
787
788// =============================================================================
789// Resource Analysis
790// =============================================================================
791
792/// Known resource creators
793const RESOURCE_CREATORS: &[&str] = &["open", "socket", "connect", "cursor", "urlopen"];
794
795/// Analyze resource leaks in a file
796fn analyze_resources(root: Node, source: &str, file: &Path) -> Vec<SecureFinding> {
797    if is_rust_file(file) {
798        return analyze_rust_raw_pointers(source, file);
799    }
800
801    let mut findings = Vec::new();
802    let source_bytes = source.as_bytes();
803
804    // Find resource assignments outside of `with` statements
805    find_leaked_resources(root, source_bytes, file, &mut findings);
806
807    findings
808}
809
810fn find_leaked_resources(
811    node: Node,
812    source: &[u8],
813    file: &Path,
814    findings: &mut Vec<SecureFinding>,
815) {
816    // Check if this is an assignment with a resource creator
817    if node.kind() == "assignment" {
818        if let Some(right) = node.child_by_field_name("right") {
819            if right.kind() == "call" {
820                if let Some(func) = right.child_by_field_name("function") {
821                    let func_text = node_text(func, source);
822                    let func_name = func_text.split('.').next_back().unwrap_or(func_text);
823
824                    if RESOURCE_CREATORS.contains(&func_name) {
825                        // Check if this is inside a with statement
826                        if !is_inside_with(node) {
827                            findings.push(
828                                SecureFinding::new(
829                                    "resource_leak",
830                                    "high",
831                                    format!(
832                                        "Resource '{}' opened without context manager - may leak",
833                                        func_name
834                                    ),
835                                )
836                                .with_location(
837                                    file.display().to_string(),
838                                    node.start_position().row as u32 + 1,
839                                ),
840                            );
841                        }
842                    }
843                }
844            }
845        }
846    }
847
848    // Recurse
849    for i in 0..node.child_count() {
850        if let Some(child) = node.child(i) {
851            find_leaked_resources(child, source, file, findings);
852        }
853    }
854}
855
856fn is_inside_with(node: Node) -> bool {
857    let mut current = node.parent();
858    while let Some(parent) = current {
859        if parent.kind() == "with_statement" {
860            return true;
861        }
862        current = parent.parent();
863    }
864    false
865}
866
867// =============================================================================
868// Bounds Analysis
869// =============================================================================
870
871/// Analyze bounds/overflow issues in a file
872fn analyze_bounds(_root: Node, source: &str, file: &Path) -> Vec<SecureFinding> {
873    if is_rust_file(file) {
874        return analyze_rust_bounds(source, file);
875    }
876
877    // Placeholder for Python bounds analysis.
878    Vec::new()
879}
880
881// =============================================================================
882// Contracts Analysis
883// =============================================================================
884
885/// Analyze missing contracts in a file
886fn analyze_contracts(_root: Node, _source: &str, _file: &Path) -> Vec<SecureFinding> {
887    // Placeholder - would check for functions without type hints, docstrings, or assertions
888    Vec::new()
889}
890
891// =============================================================================
892// Behavioral Analysis
893// =============================================================================
894
895/// Analyze behavioral issues (exception handling, state) in a file
896fn analyze_behavioral(root: Node, source: &str, file: &Path) -> Vec<SecureFinding> {
897    let mut findings = Vec::new();
898    let source_bytes = source.as_bytes();
899
900    // Find bare except clauses
901    find_bare_except(root, source_bytes, file, &mut findings);
902
903    findings
904}
905
906fn find_bare_except(node: Node, source: &[u8], file: &Path, findings: &mut Vec<SecureFinding>) {
907    // Check for except clauses without exception type
908    if node.kind() == "except_clause" {
909        let has_type = node.children(&mut node.walk()).any(|c| {
910            c.kind() == "as_pattern"
911                || (c.kind() == "identifier" && node_text(c, source) != "Exception")
912        });
913
914        if !has_type {
915            let text = node_text(node, source);
916            if text.starts_with("except:") || text.starts_with("except :") {
917                findings.push(
918                    SecureFinding::new(
919                        "behavioral",
920                        "medium",
921                        "Bare except clause catches all exceptions including KeyboardInterrupt",
922                    )
923                    .with_location(
924                        file.display().to_string(),
925                        node.start_position().row as u32 + 1,
926                    ),
927                );
928            }
929        }
930    }
931
932    // Recurse
933    for i in 0..node.child_count() {
934        if let Some(child) = node.child(i) {
935            find_bare_except(child, source, file, findings);
936        }
937    }
938}
939
940// =============================================================================
941// Mutability Analysis
942// =============================================================================
943
944/// Analyze mutability issues in a file
945fn analyze_mutability(_root: Node, _source: &str, _file: &Path) -> Vec<SecureFinding> {
946    // Placeholder - would check for mutable default arguments, etc.
947    Vec::new()
948}
949
950// =============================================================================
951// Utilities
952// =============================================================================
953
954fn node_text<'a>(node: Node, source: &'a [u8]) -> &'a str {
955    std::str::from_utf8(&source[node.start_byte()..node.end_byte()]).unwrap_or("")
956}
957
958fn analyze_rust_unsafe_blocks(source: &str, file: &Path) -> Vec<SecureFinding> {
959    let mut findings = Vec::new();
960    for (idx, line) in source.lines().enumerate() {
961        let trimmed = line.trim();
962        if trimmed.starts_with("//") {
963            continue;
964        }
965        if trimmed.contains("unsafe {") || trimmed.starts_with("unsafe{") {
966            findings.push(
967                SecureFinding::new(
968                    "unsafe_block",
969                    "high",
970                    "unsafe block detected; verify invariants and safety rationale",
971                )
972                .with_location(file.display().to_string(), (idx + 1) as u32),
973            );
974        }
975    }
976    findings
977}
978
979fn analyze_rust_raw_pointers(source: &str, file: &Path) -> Vec<SecureFinding> {
980    let mut findings = Vec::new();
981    for (idx, line) in source.lines().enumerate() {
982        let trimmed = line.trim();
983        if trimmed.starts_with("//") {
984            continue;
985        }
986        if trimmed.contains("std::ptr::")
987            || trimmed.contains("core::ptr::")
988            || trimmed.contains("ptr::read(")
989            || trimmed.contains("ptr::write(")
990        {
991            findings.push(
992                SecureFinding::new(
993                    "raw_pointer",
994                    "high",
995                    "raw pointer operation detected; audit aliasing, lifetime, and bounds assumptions",
996                )
997                .with_location(file.display().to_string(), (idx + 1) as u32),
998            );
999        }
1000    }
1001    findings
1002}
1003
1004fn analyze_rust_bounds(source: &str, file: &Path) -> Vec<SecureFinding> {
1005    let mut findings = Vec::new();
1006    let skip_test_only = super::vuln::is_rust_test_file(file);
1007
1008    for (idx, line) in source.lines().enumerate() {
1009        let trimmed = line.trim();
1010        if trimmed.starts_with("//") {
1011            continue;
1012        }
1013
1014        if !skip_test_only && trimmed.contains(".unwrap()") {
1015            findings.push(
1016                SecureFinding::new(
1017                    "unwrap",
1018                    "medium",
1019                    "unwrap() call in non-test code may panic at runtime",
1020                )
1021                .with_location(file.display().to_string(), (idx + 1) as u32),
1022            );
1023        }
1024
1025        if !skip_test_only && (trimmed.contains("todo!(") || trimmed.contains("unimplemented!(")) {
1026            findings.push(
1027                SecureFinding::new(
1028                    "todo_marker",
1029                    "low",
1030                    "todo!/unimplemented! marker found in non-test Rust code",
1031                )
1032                .with_location(file.display().to_string(), (idx + 1) as u32),
1033            );
1034        }
1035    }
1036
1037    findings
1038}
1039
1040// =============================================================================
1041// Text Output
1042// =============================================================================
1043
1044fn format_text_report(report: &SecureReport) -> String {
1045    let mut output = String::new();
1046
1047    output.push_str(&"=".repeat(60));
1048    output.push('\n');
1049    output.push_str(&format!(
1050        "{}\n",
1051        "SECURE - Security Analysis Dashboard".bold()
1052    ));
1053    output.push_str(&"=".repeat(60));
1054    output.push_str("\n\n");
1055    output.push_str(&format!("Path: {}\n\n", report.path));
1056
1057    if report.findings.is_empty() {
1058        output.push_str(&format!("{}\n", "No security issues found.".green()));
1059    } else {
1060        output.push_str(&format!(
1061            "{}\n",
1062            "Severity | Category       | Description".bold()
1063        ));
1064        output.push_str(&format!("{}\n", "-".repeat(60)));
1065
1066        for finding in &report.findings {
1067            let severity_colored = match finding.severity.as_str() {
1068                "critical" => finding.severity.red().bold().to_string(),
1069                "high" => finding.severity.red().to_string(),
1070                "medium" => finding.severity.yellow().to_string(),
1071                "low" => finding.severity.blue().to_string(),
1072                _ => finding.severity.clone(),
1073            };
1074            output.push_str(&format!(
1075                "{:>8} | {:<14} | {}\n",
1076                severity_colored, finding.category, finding.description
1077            ));
1078            if !finding.file.is_empty() {
1079                output.push_str(&format!(
1080                    "         |                | {}:{}\n",
1081                    finding.file, finding.line
1082                ));
1083            }
1084        }
1085    }
1086
1087    output.push('\n');
1088    output.push_str(&format!("{}\n", "Summary:".bold()));
1089    output.push_str(&format!(
1090        "  Taint issues:      {} ({} critical)\n",
1091        report.summary.taint_count, report.summary.taint_critical
1092    ));
1093    output.push_str(&format!(
1094        "  Resource leaks:    {}\n",
1095        report.summary.leak_count
1096    ));
1097    output.push_str(&format!(
1098        "  Bounds warnings:   {}\n",
1099        report.summary.bounds_warnings
1100    ));
1101    output.push_str(&format!(
1102        "  Behavioral:        {}\n",
1103        report.summary.behavioral_count
1104    ));
1105    output.push_str(&format!(
1106        "  Missing contracts: {}\n",
1107        report.summary.missing_contracts
1108    ));
1109    output.push_str(&format!(
1110        "  Mutable params:    {}\n",
1111        report.summary.mutable_params
1112    ));
1113    output.push_str(&format!(
1114        "  Unsafe blocks:     {}\n",
1115        report.summary.unsafe_blocks
1116    ));
1117    output.push_str(&format!(
1118        "  Raw pointer ops:   {}\n",
1119        report.summary.raw_pointer_ops
1120    ));
1121    output.push_str(&format!(
1122        "  Unwrap calls:      {}\n",
1123        report.summary.unwrap_calls
1124    ));
1125    output.push_str(&format!(
1126        "  Todo markers:      {}\n",
1127        report.summary.todo_markers
1128    ));
1129    output.push('\n');
1130    output.push_str(&format!("Elapsed: {:.2}ms\n", report.total_elapsed_ms));
1131
1132    output
1133}
1134
1135#[cfg(test)]
1136mod tests {
1137    use super::*;
1138    use tempfile::TempDir;
1139    use tree_sitter::Parser;
1140
1141    fn create_test_file(dir: &TempDir, name: &str, content: &str) -> PathBuf {
1142        let path = dir.path().join(name);
1143        fs::write(&path, content).unwrap();
1144        path
1145    }
1146
1147    #[test]
1148    fn test_secure_args_default() {
1149        // Test that default values are set correctly
1150        let args = SecureArgs {
1151            path: PathBuf::from("test.py"),
1152            lang: None,
1153            detail: None,
1154            quick: false,
1155            output: None,
1156            no_default_ignore: false,
1157            include_tests: false,
1158        };
1159        assert!(!args.quick);
1160        assert!(!args.include_tests);
1161    }
1162
1163    #[test]
1164    fn test_severity_order() {
1165        assert!(severity_order("critical") < severity_order("high"));
1166        assert!(severity_order("high") < severity_order("medium"));
1167        assert!(severity_order("medium") < severity_order("low"));
1168        assert!(severity_order("low") < severity_order("info"));
1169    }
1170
1171    #[test]
1172    fn test_taint_analysis_finds_sql_injection() {
1173        // SECURE-TAINT-AGGREGATOR-V1: routes through canonical
1174        // `scan_vulnerabilities` which requires a real source-to-sink
1175        // flow (not just a literal f-string in a sink). This fixture
1176        // models a Flask request → cursor.execute flow that the
1177        // canonical taint engine reports.
1178        let temp = TempDir::new().unwrap();
1179        let source = r#"
1180from flask import request
1181import sqlite3
1182
1183def query():
1184    user_input = request.args.get("name")
1185    conn = sqlite3.connect("db")
1186    cursor = conn.cursor()
1187    cursor.execute("SELECT * FROM users WHERE name = '" + user_input + "'")
1188"#;
1189        let path = create_test_file(&temp, "vuln.py", source);
1190
1191        let mut parser = Parser::new();
1192        parser
1193            .set_language(&tree_sitter_python::LANGUAGE.into())
1194            .unwrap();
1195        let tree = parser.parse(source, None).unwrap();
1196
1197        let findings = analyze_taint(tree.root_node(), source, &path);
1198        assert!(
1199            !findings.is_empty(),
1200            "Should detect SQL injection from request.args -> cursor.execute"
1201        );
1202        assert!(findings.iter().all(|f| f.category == "taint"));
1203    }
1204
1205    /// SECURE-TAINT-AGGREGATOR-V1: secure↔vuln aggregation parity guard.
1206    ///
1207    /// The canonical `scan_vulnerabilities` pipeline is the single
1208    /// source of truth for taint findings. `tldr secure` MUST surface
1209    /// the same finding count as `tldr vuln` on the same path —
1210    /// previously secure ran a substring-only matcher that missed
1211    /// every real source-to-sink flow and reported `taint_count: 0`
1212    /// while `vuln` reported N>0 on the same file.
1213    #[test]
1214    fn test_secure_taint_count_matches_vuln_findings() {
1215        let temp = TempDir::new().unwrap();
1216        // Fixture with a real Flask-style taint flow: HTTP param ->
1217        // subprocess.call (CommandInjection) and HTTP param ->
1218        // cursor.execute (SqlInjection-via-string-concat).
1219        let source = r#"
1220from flask import request
1221import subprocess
1222import sqlite3
1223
1224def cmd():
1225    user = request.args.get("user")
1226    subprocess.call("echo " + user, shell=True)
1227
1228def sql():
1229    name = request.args.get("name")
1230    conn = sqlite3.connect("db")
1231    cur = conn.cursor()
1232    cur.execute("SELECT * FROM users WHERE name='" + name + "'")
1233"#;
1234        let path = create_test_file(&temp, "flow.py", source);
1235
1236        // Canonical pipeline (same call path tldr vuln uses).
1237        let vuln_report =
1238            tldr_core::security::vuln::scan_vulnerabilities(&path, None, None).unwrap();
1239        let vuln_count = vuln_report.findings.len();
1240        assert!(
1241            vuln_count > 0,
1242            "Fixture must produce >=1 canonical finding (got 0 - fixture is wrong)"
1243        );
1244
1245        // secure's taint analysis on the same file.
1246        let mut parser = Parser::new();
1247        parser
1248            .set_language(&tree_sitter_python::LANGUAGE.into())
1249            .unwrap();
1250        let tree = parser.parse(source, None).unwrap();
1251        let secure_findings = analyze_taint(tree.root_node(), source, &path);
1252
1253        assert_eq!(
1254            secure_findings.len(),
1255            vuln_count,
1256            "secure taint findings must match vuln finding count exactly \
1257             (secure={}, vuln={}). secure uses canonical scan_vulnerabilities \
1258             pipeline.",
1259            secure_findings.len(),
1260            vuln_count
1261        );
1262        assert!(secure_findings.iter().all(|f| f.category == "taint"));
1263    }
1264
1265    /// RUST-SECURE-TAINT-AGGREGATOR-V2: Rust-specific secure↔vuln aggregation
1266    /// parity guard. Pre-V2, `analyze_taint` short-circuited on `.rs` files
1267    /// to ONLY the unsafe-block line scanner, dropping every canonical
1268    /// taint finding. `tldr vuln --lang rust file.rs` reported N>0
1269    /// CommandInjection/SqlInjection findings while `tldr secure --lang rust
1270    /// file.rs` reported `taint_count: 0`. Surfaced by the v0.2.x 17-language
1271    /// sweep — Rust was the only language failing
1272    /// `secure.taint_count == vuln.findings.length` parity (16/17 passed).
1273    ///
1274    /// Post-V2: secure.taint_count (category="taint") MUST equal
1275    /// vuln.findings.length on Rust. Unsafe-block findings are still
1276    /// emitted but counted under summary.unsafe_blocks, not taint_count.
1277    #[test]
1278    fn test_secure_taint_count_matches_vuln_rust() {
1279        let temp = TempDir::new().unwrap();
1280        // Real source-to-sink command-injection flow in Rust: env input
1281        // (untrusted source) flowing into Command::new(...).arg(...).output()
1282        // (sink). Mirrors `command_injection_positive.rs` from
1283        // `vuln_migration_v1` fixtures.
1284        let source = r#"
1285use std::env;
1286use std::process::Command;
1287
1288fn run() {
1289    let user = env::var("USER_INPUT").unwrap();
1290    let output = Command::new("sh").arg("-c").arg(&user).output();
1291    let _ = output;
1292}
1293"#;
1294        let path = create_test_file(&temp, "cmd_inj.rs", source);
1295
1296        // Canonical pipeline (same call path tldr vuln uses).
1297        let vuln_report =
1298            tldr_core::security::vuln::scan_vulnerabilities(&path, None, None).unwrap();
1299        let vuln_count = vuln_report.findings.len();
1300        assert!(
1301            vuln_count > 0,
1302            "Fixture must produce >=1 canonical Rust finding (got 0 - fixture is wrong)"
1303        );
1304
1305        // secure's taint analysis on the same Rust file.
1306        let mut parser = Parser::new();
1307        parser
1308            .set_language(&tree_sitter_rust::LANGUAGE.into())
1309            .unwrap();
1310        let tree = parser.parse(source, None).unwrap();
1311        let secure_findings = analyze_taint(tree.root_node(), source, &path);
1312
1313        // Filter to category="taint" — that's what summary.taint_count counts.
1314        // (analyze_taint may also include category="unsafe_block" findings
1315        // for Rust, which feed summary.unsafe_blocks, not taint_count.)
1316        let taint_findings: Vec<_> = secure_findings
1317            .iter()
1318            .filter(|f| f.category == "taint")
1319            .collect();
1320
1321        assert_eq!(
1322            taint_findings.len(),
1323            vuln_count,
1324            "secure taint findings (category=\"taint\") must match vuln \
1325             finding count exactly on Rust (secure_taint={}, vuln={}). \
1326             RUST-SECURE-TAINT-AGGREGATOR-V2 routes Rust through the \
1327             canonical scan_vulnerabilities pipeline, same as tldr vuln.",
1328            taint_findings.len(),
1329            vuln_count
1330        );
1331    }
1332
1333    #[test]
1334    fn test_resource_analysis_finds_leak() {
1335        let source = r#"
1336def read_file():
1337    f = open("test.txt")
1338    data = f.read()
1339    return data
1340"#;
1341
1342        let mut parser = Parser::new();
1343        parser
1344            .set_language(&tree_sitter_python::LANGUAGE.into())
1345            .unwrap();
1346        let tree = parser.parse(source, None).unwrap();
1347
1348        let findings = analyze_resources(tree.root_node(), source, &PathBuf::from("test.py"));
1349        assert!(!findings.is_empty(), "Should detect resource leak");
1350    }
1351
1352    #[test]
1353    fn test_resource_analysis_no_leak_with_context() {
1354        let source = r#"
1355def read_file():
1356    with open("test.txt") as f:
1357        data = f.read()
1358    return data
1359"#;
1360
1361        let mut parser = Parser::new();
1362        parser
1363            .set_language(&tree_sitter_python::LANGUAGE.into())
1364            .unwrap();
1365        let tree = parser.parse(source, None).unwrap();
1366
1367        let findings = analyze_resources(tree.root_node(), source, &PathBuf::from("test.py"));
1368        assert!(
1369            findings.is_empty(),
1370            "Should not detect leak with context manager"
1371        );
1372    }
1373
1374    #[test]
1375    fn test_collect_files_includes_rust() {
1376        let temp = TempDir::new().unwrap();
1377        create_test_file(&temp, "sample.py", "print('ok')");
1378        create_test_file(&temp, "lib.rs", "fn main() {}");
1379        create_test_file(&temp, "notes.txt", "ignore");
1380
1381        let files = collect_files(temp.path(), None, false).unwrap();
1382        assert!(files.iter().any(|f| f.ends_with("sample.py")));
1383        assert!(files.iter().any(|f| f.ends_with("lib.rs")));
1384        assert!(!files.iter().any(|f| f.ends_with("notes.txt")));
1385    }
1386
1387    #[test]
1388    fn test_rust_secure_metrics_detected() {
1389        let source = r#"
1390use std::ptr;
1391
1392fn risky(user: &str) {
1393    unsafe { ptr::write(user.as_ptr() as *mut u8, b'x'); }
1394    let _v = Some(user).unwrap();
1395    todo!("finish hardening");
1396}
1397"#;
1398        let mut parser = Parser::new();
1399        parser
1400            .set_language(&tree_sitter_rust::LANGUAGE.into())
1401            .unwrap();
1402        let tree = parser.parse(source, None).unwrap();
1403        let file = PathBuf::from("src/lib.rs");
1404
1405        let taint_findings = analyze_taint(tree.root_node(), source, &file);
1406        let resource_findings = analyze_resources(tree.root_node(), source, &file);
1407        let bounds_findings = analyze_bounds(tree.root_node(), source, &file);
1408
1409        assert!(!taint_findings.is_empty(), "Should count unsafe blocks");
1410        assert!(
1411            !resource_findings.is_empty(),
1412            "Should count raw pointer ops"
1413        );
1414        assert!(
1415            bounds_findings.iter().any(|f| f.category == "unwrap"),
1416            "Should count unwrap calls"
1417        );
1418        assert!(
1419            bounds_findings.iter().any(|f| f.category == "todo_marker"),
1420            "Should count todo markers"
1421        );
1422    }
1423
1424    /// SECURE-FASTPATH-V1 (M-Z8): the file partition step must drop
1425    /// oversize / auto-generated files BEFORE the per-analysis
1426    /// `read_to_string_tolerant` loop, mirroring the policy applied
1427    /// by `vuln.rs::analyze_file` (M-Y3) and `api_check.rs::analyze_file`
1428    /// (M-Z4). Pre-fix, `tldr secure --lang typescript /tmp/repos/ts-dom-gen`
1429    /// ran 154 s because the 2.3 MB `dom.generated.d.ts` was read 6
1430    /// times (once per sub-analysis) and parsed 6 times into a
1431    /// tree-sitter AST. The fastpath skips it on the FIRST stat call.
1432    ///
1433    /// Test fixture: a synthetic `.d.ts` file padded over the 512 KB
1434    /// auto-gen cap (`MAX_AUTOGEN_FILE_SIZE_BYTES`). Asserts:
1435    /// 1. The file is dropped from the kept set.
1436    /// 2. `files_skipped` is incremented.
1437    /// 3. The warning carries the documented oversize shape so
1438    ///    consumers can distinguish oversize from UTF-8 skips.
1439    #[test]
1440    fn test_secure_skips_oversize_files() {
1441        use tldr_core::fs::oversize::MAX_AUTOGEN_FILE_SIZE_BYTES;
1442
1443        let temp = TempDir::new().unwrap();
1444
1445        // Padded content that exceeds the auto-gen cap. Use a `.d.ts`
1446        // suffix so the auto-gen 512 KB cap applies (rather than the
1447        // 10 MB source-file cap, which would force a many-MB fixture).
1448        let mut padded = String::with_capacity(MAX_AUTOGEN_FILE_SIZE_BYTES as usize + 1024);
1449        padded.push_str("export type Generated = {\n");
1450        // A line that is harmless but heavy enough to cross the cap.
1451        let line = "  member_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx: string;\n";
1452        while (padded.len() as u64) < MAX_AUTOGEN_FILE_SIZE_BYTES + 1024 {
1453            padded.push_str(line);
1454        }
1455        padded.push_str("};\n");
1456        let big = create_test_file(&temp, "dom.generated.d.ts", &padded);
1457
1458        // Sanity: confirm we actually exceeded the cap (otherwise the
1459        // test would be a no-op false-positive).
1460        let size = std::fs::metadata(&big).unwrap().len();
1461        assert!(
1462            size > MAX_AUTOGEN_FILE_SIZE_BYTES,
1463            "fixture must exceed auto-gen cap (size={}, cap={})",
1464            size,
1465            MAX_AUTOGEN_FILE_SIZE_BYTES
1466        );
1467
1468        // Also include a small, in-policy `.ts` file so we can verify
1469        // the partition continues past the oversize skip rather than
1470        // short-circuiting.
1471        let small = create_test_file(
1472            &temp,
1473            "ok.ts",
1474            "export function f(x: string): string { return x; }\n",
1475        );
1476
1477        let (kept, warnings, files_skipped) =
1478            partition_utf8_clean(&[big.clone(), small.clone()]);
1479
1480        // 1. Oversize file is dropped from the kept set.
1481        assert!(
1482            !kept.iter().any(|p| p == &big),
1483            "oversize .d.ts must be dropped from kept set: kept={:?}",
1484            kept
1485        );
1486        // The small in-policy file is preserved.
1487        assert!(
1488            kept.iter().any(|p| p == &small),
1489            "small in-policy .ts must be preserved: kept={:?}",
1490            kept
1491        );
1492
1493        // 2. files_skipped reflects the oversize drop.
1494        assert_eq!(
1495            files_skipped, 1,
1496            "files_skipped must count the oversize drop (got {})",
1497            files_skipped
1498        );
1499
1500        // 3. Warning carries the documented oversize shape, distinct
1501        //    from the UTF-8 "invalid UTF-8 at byte" shape.
1502        let oversize_warning = warnings
1503            .iter()
1504            .find(|w| w.contains("dom.generated.d.ts"))
1505            .expect("must emit a warning for the oversize file");
1506        assert!(
1507            oversize_warning.contains("exceeds")
1508                && oversize_warning.contains("cap for")
1509                && oversize_warning.contains("auto-generated/minified files"),
1510            "oversize warning must use the format_oversize_warning shape \
1511             (got: {})",
1512            oversize_warning
1513        );
1514    }
1515
1516    // =========================================================================
1517    // SECURE-TEST-FILE-SUPPRESSION-V1 (M-Z10) — vuln/secure parity tests
1518    // =========================================================================
1519
1520    /// Build a SecureReport JSON file by running `secure::run` against a
1521    /// temp directory containing the supplied files. Returns the parsed
1522    /// JSON value for assertion.
1523    fn run_secure_to_json(
1524        path: &Path,
1525        lang: Language,
1526        include_tests: bool,
1527    ) -> serde_json::Value {
1528        let temp_out = TempDir::new().unwrap();
1529        let out_path = temp_out.path().join("report.json");
1530        let args = SecureArgs {
1531            path: path.to_path_buf(),
1532            lang: Some(lang),
1533            detail: None,
1534            // Quick mode: only run taint/resources/bounds, sufficient for
1535            // the suppression assertion and faster.
1536            quick: true,
1537            output: Some(out_path.clone()),
1538            no_default_ignore: false,
1539            include_tests,
1540        };
1541        run(args, OutputFormat::Json).expect("secure::run should succeed");
1542        let raw = fs::read_to_string(&out_path).expect("report file must exist");
1543        serde_json::from_str(&raw).expect("report must be valid JSON")
1544    }
1545
1546    /// SECURE-TEST-FILE-SUPPRESSION-V1: default scan must suppress
1547    /// findings emitted from JS/TS test files, mirroring `tldr vuln`'s
1548    /// M-X3 mask. Without this, vuln/secure parity breaks: vuln=1
1549    /// finding (source-only), secure.taint=2 findings (source + test).
1550    #[test]
1551    fn test_secure_default_suppresses_js_test_files() {
1552        let temp = TempDir::new().unwrap();
1553
1554        // Source file with a real source-to-sink reflected-XSS flow
1555        // (req.query -> res.send). Same shape as the
1556        // `vuln_migration_v1/javascript/xss_positive.js` fixture, which
1557        // the canonical taint engine is known to report on.
1558        let source_js = r#"export function handler(req, res, db) {
1559    const name = req.query.name;
1560    res.send("<h1>" + name + "</h1>");
1561}
1562"#;
1563        // Test file with the SAME flow shape, placed under `test/` so
1564        // it matches `is_js_test_file`. The canonical taint engine
1565        // emits a finding here too; without the suppression filter
1566        // this would inflate `secure.taint` past `vuln.findings`.
1567        let test_js = r#"export function handler(req, res, db) {
1568    const input = req.query.q;
1569    res.send("<p>" + input + "</p>");
1570}
1571"#;
1572
1573        // Layout:
1574        //   <temp>/src/index.js   (source — must produce a finding)
1575        //   <temp>/test/app.test.js  (test — must be suppressed)
1576        let src_dir = temp.path().join("src");
1577        let test_dir = temp.path().join("test");
1578        fs::create_dir_all(&src_dir).unwrap();
1579        fs::create_dir_all(&test_dir).unwrap();
1580        let src_path = src_dir.join("index.js");
1581        let test_path = test_dir.join("app.test.js");
1582        fs::write(&src_path, source_js).unwrap();
1583        fs::write(&test_path, test_js).unwrap();
1584
1585        let report = run_secure_to_json(temp.path(), Language::JavaScript, false);
1586
1587        // Pull out taint findings — the suppression target.
1588        let findings = report["findings"]
1589            .as_array()
1590            .expect("findings must be an array")
1591            .iter()
1592            .filter(|f| f["category"].as_str() == Some("taint"))
1593            .collect::<Vec<_>>();
1594
1595        // Pre-fix: at least one finding from the test file leaked
1596        // through. Post-fix: every taint finding's `file` MUST be the
1597        // source file (test/app.test.js suppressed entirely). Assert
1598        // by looking at unique file paths to be tolerant of canonical
1599        // engine emitting multiple findings per flow.
1600        assert!(
1601            !findings.is_empty(),
1602            "fixture must produce at least one taint finding (got 0 — fixture is wrong)"
1603        );
1604        let unique_files: std::collections::HashSet<&str> = findings
1605            .iter()
1606            .filter_map(|f| f["file"].as_str())
1607            .collect();
1608        assert_eq!(
1609            unique_files.len(),
1610            1,
1611            "default scan must suppress test-file findings — expected exactly 1 \
1612             unique file (the source), got {:?}",
1613            unique_files
1614        );
1615        let kept_file = unique_files.iter().next().unwrap();
1616        assert!(
1617            kept_file.ends_with("index.js"),
1618            "kept finding must come from the source file, got {:?}",
1619            kept_file
1620        );
1621        assert!(
1622            !kept_file.contains("/test/"),
1623            "kept finding must not come from a test path, got {:?}",
1624            kept_file
1625        );
1626        assert!(
1627            !findings
1628                .iter()
1629                .any(|f| f["file"].as_str().unwrap_or("").contains("/test/")),
1630            "no finding may originate from a test/ path; got: {:?}",
1631            findings.iter().map(|f| f["file"].clone()).collect::<Vec<_>>()
1632        );
1633    }
1634
1635    /// SECURE-TEST-FILE-SUPPRESSION-V1: `--include-tests` must restore
1636    /// the legacy emission set, surfacing findings from BOTH source and
1637    /// test files. Mirrors `tldr vuln --include-tests` semantics.
1638    #[test]
1639    fn test_secure_include_tests_emits_test_findings() {
1640        let temp = TempDir::new().unwrap();
1641
1642        let source_js = r#"export function handler(req, res, db) {
1643    const name = req.query.name;
1644    res.send("<h1>" + name + "</h1>");
1645}
1646"#;
1647        let test_js = r#"export function handler(req, res, db) {
1648    const input = req.query.q;
1649    res.send("<p>" + input + "</p>");
1650}
1651"#;
1652
1653        let src_dir = temp.path().join("src");
1654        let test_dir = temp.path().join("test");
1655        fs::create_dir_all(&src_dir).unwrap();
1656        fs::create_dir_all(&test_dir).unwrap();
1657        fs::write(src_dir.join("index.js"), source_js).unwrap();
1658        fs::write(test_dir.join("app.test.js"), test_js).unwrap();
1659
1660        let report = run_secure_to_json(temp.path(), Language::JavaScript, true);
1661
1662        let findings = report["findings"]
1663            .as_array()
1664            .expect("findings must be an array")
1665            .iter()
1666            .filter(|f| f["category"].as_str() == Some("taint"))
1667            .collect::<Vec<_>>();
1668
1669        // With --include-tests BOTH source and test findings surface.
1670        // Use unique-file-set semantics for tolerance to canonical-
1671        // engine multi-emission per flow.
1672        let unique_files: std::collections::HashSet<&str> = findings
1673            .iter()
1674            .filter_map(|f| f["file"].as_str())
1675            .collect();
1676        assert_eq!(
1677            unique_files.len(),
1678            2,
1679            "--include-tests must restore test-file emissions — expected 2 \
1680             unique files (source + test), got {:?}",
1681            unique_files
1682        );
1683        assert!(
1684            unique_files.iter().any(|f| f.ends_with("index.js")),
1685            "must include source-file finding: {:?}",
1686            unique_files
1687        );
1688        assert!(
1689            unique_files.iter().any(|f| f.contains("/test/") && f.ends_with(".test.js")),
1690            "must include test-file finding when --include-tests: {:?}",
1691            unique_files
1692        );
1693    }
1694
1695    /// Direct unit test for the in-place helper. Independent of the
1696    /// `run()` pipeline so a regression in suppression semantics is
1697    /// caught at the predicate-application boundary.
1698    #[test]
1699    fn test_apply_test_file_suppression_filters_js_and_rust_test_paths() {
1700        let mk = |file: &str| SecureFinding::new("taint", "high", "x").with_location(file, 1);
1701
1702        let mut findings = vec![
1703            mk("/abs/src/index.js"),                   // keep
1704            mk("/abs/test/app.test.js"),               // drop (js test path)
1705            mk("/abs/lib/foo.spec.ts"),                // drop (js spec suffix)
1706            mk("/abs/__tests__/x.tsx"),                // drop (js __tests__)
1707            mk("/abs/crates/foo/tests/it.rs"),         // drop (rust /tests/)
1708            mk("/abs/crates/foo/src/lib.rs"),          // keep
1709            mk("/abs/crates/foo/src/foo_test.rs"),     // drop (rust _test.rs)
1710            // Fixture exemption — must NOT be dropped (vuln_migration_v1
1711            // suite depends on this exemption being preserved).
1712            mk("/abs/crates/tldr-cli/tests/fixtures/vuln_migration_v1/javascript/x.js"),
1713        ];
1714
1715        apply_test_file_suppression(&mut findings);
1716
1717        let kept: Vec<_> = findings.iter().map(|f| f.file.clone()).collect();
1718        // Expected kept: 2 source files (index.js, lib.rs) + 1 fixture =
1719        // 3. The 5 dropped: app.test.js, foo.spec.ts, x.tsx, it.rs,
1720        // foo_test.rs.
1721        assert_eq!(
1722            kept.len(),
1723            3,
1724            "expected 3 kept (2 source + 1 fixture), got {:?}",
1725            kept
1726        );
1727        assert!(kept.iter().any(|f| f.ends_with("/src/index.js")));
1728        assert!(kept.iter().any(|f| f.ends_with("/src/lib.rs")));
1729        assert!(kept.iter().any(|f| f.contains("/fixtures/")));
1730        // Negative assertions — none of the dropped paths should remain.
1731        assert!(!kept.iter().any(|f| f.ends_with("/app.test.js")));
1732        assert!(!kept.iter().any(|f| f.ends_with("/foo.spec.ts")));
1733        assert!(!kept.iter().any(|f| f.ends_with("/__tests__/x.tsx")));
1734        assert!(!kept.iter().any(|f| f.ends_with("/tests/it.rs")));
1735        assert!(!kept.iter().any(|f| f.ends_with("/foo_test.rs")));
1736    }
1737}