use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use std::time::Instant;
use anyhow::Result;
use clap::Args;
use serde_json::{json, Value};
use tldr_core::walker::ProjectWalker;
use tldr_core::Language;
use super::error::RemainingError;
use super::types::{Severity, TaintFlow, VulnFinding, VulnReport, VulnSummary, VulnType};
use crate::output::OutputFormat;
const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
#[derive(Debug, Args)]
pub struct VulnArgs {
pub path: PathBuf,
#[arg(long, short = 'l')]
pub lang: Option<Language>,
#[arg(long)]
pub severity: Option<Severity>,
#[arg(long, value_name = "TYPE")]
pub vuln_type: Option<Vec<VulnType>>,
#[arg(long)]
pub include_informational: bool,
#[arg(long)]
pub include_smells: bool,
#[arg(long)]
pub include_tests: bool,
#[arg(long, short = 'O')]
pub output: Option<PathBuf>,
#[arg(long)]
pub no_default_ignore: bool,
}
impl VulnArgs {
pub fn run(&self, format: OutputFormat) -> Result<()> {
let start = Instant::now();
if !self.path.exists() {
return Err(RemainingError::file_not_found(&self.path).into());
}
let effective_lang: Option<Language> = match self.lang {
Some(l) => Some(l),
None => {
let detected = if self.path.is_dir() {
Language::from_directory(&self.path)
} else {
Language::from_path(&self.path)
};
if let Some(l) = detected {
if !is_natively_analyzed(l) {
return Err(RemainingError::autodetect_unsupported(format!(
"vuln: taint analysis for {lang} is not yet supported by autodetect; \
pass --lang {lang} explicitly to scan this file (the canonical taint \
pipeline supports it). Autodetect-by-extension currently routes only \
--lang python, --lang rust, --lang typescript, and --lang javascript; \
other languages require an explicit --lang flag.",
lang = l.as_str()
))
.into());
}
}
detected
}
};
let files = collect_files(&self.path, effective_lang, self.no_default_ignore)?;
let mut all_findings: Vec<VulnFinding> = Vec::new();
let mut files_scanned: u32 = 0;
let mut files_skipped: u32 = 0;
let mut warnings: Vec<String> = Vec::new();
for file_path in &files {
match tldr_core::fs::read_to_string_tolerant(file_path) {
Ok(tldr_core::fs::ReadOutcome::NonUtf8 { byte_offset }) => {
files_skipped += 1;
warnings.push(format!(
"Skipped {}: invalid UTF-8 at byte {}",
file_path.display(),
byte_offset
));
files_scanned += 1;
continue;
}
_ => {
}
}
if let Ok(findings) = analyze_file(file_path) {
for finding in findings {
all_findings.push(finding);
}
}
files_scanned += 1;
}
let mut filtered_findings = all_findings;
if let Some(min_severity) = &self.severity {
filtered_findings.retain(|f| f.severity.order() <= min_severity.order());
}
if let Some(types) = &self.vuln_type {
filtered_findings.retain(|f| types.contains(&f.vuln_type));
}
if !self.include_informational {
filtered_findings.retain(|f| f.severity != Severity::Info);
}
if !self.include_smells {
filtered_findings.retain(|f| !is_smell_finding(f));
}
if !self.include_tests {
filtered_findings.retain(|f| !is_js_test_file(Path::new(&f.file)));
}
filtered_findings
.sort_by(|a, b| (&a.file, a.line, a.vuln_type).cmp(&(&b.file, b.line, b.vuln_type)));
enrich_with_enclosing_function(&mut filtered_findings);
let unique_files_with_vulns: HashSet<&str> =
filtered_findings.iter().map(|f| f.file.as_str()).collect();
let summary = build_summary(&filtered_findings, unique_files_with_vulns.len() as u32);
let report = VulnReport {
findings: filtered_findings.clone(),
summary: Some(summary),
scan_duration_ms: start.elapsed().as_millis() as u64,
files_scanned,
files_skipped,
warnings,
};
let output_str = match format {
OutputFormat::Sarif => {
let sarif = generate_sarif(&report);
serde_json::to_string_pretty(&sarif)?
}
OutputFormat::Text => format_vuln_text(&report),
_ => serde_json::to_string_pretty(&report)?,
};
if let Some(ref output_path) = self.output {
fs::write(output_path, &output_str)?;
} else {
println!("{}", output_str);
}
Ok(())
}
}
fn enrich_with_enclosing_function(findings: &mut [VulnFinding]) {
use std::collections::HashMap;
use tldr_core::ast::extract::extract_file;
use tldr_core::types::ModuleInfo;
let mut by_file: HashMap<String, Vec<usize>> = HashMap::new();
for (i, f) in findings.iter().enumerate() {
by_file.entry(f.file.clone()).or_default().push(i);
}
for (file_str, indices) in by_file {
let path = Path::new(&file_str);
let module: ModuleInfo = match extract_file(path, None) {
Ok(m) => m,
Err(_) => continue,
};
for idx in indices {
let line = findings[idx].line;
findings[idx].function = lookup_enclosing_function(&module, line);
}
}
}
fn lookup_enclosing_function(
module: &tldr_core::types::ModuleInfo,
line: u32,
) -> Option<String> {
let mut best: Option<(u32, String)> = None;
let mut consider = |start: u32, end: u32, name: &str| {
if end == 0 || start == 0 {
return;
}
if line < start || line > end {
return;
}
let range = end.saturating_sub(start);
match &best {
None => best = Some((range, name.to_string())),
Some((cur_range, _)) => {
if range < *cur_range {
best = Some((range, name.to_string()));
}
}
}
};
for f in &module.functions {
consider(f.line_number, f.line_end, &f.name);
}
for c in &module.classes {
for m in &c.methods {
consider(m.line_number, m.line_end, &m.name);
}
}
best.map(|(_, name)| name)
}
fn collect_files(
path: &Path,
lang: Option<Language>,
no_default_ignore: bool,
) -> Result<Vec<PathBuf>, RemainingError> {
let mut files = Vec::new();
if path.is_file() {
let metadata = fs::metadata(path).map_err(|_| RemainingError::file_not_found(path))?;
if metadata.len() > MAX_FILE_SIZE {
return Err(RemainingError::file_too_large(path, metadata.len()));
}
files.push(path.to_path_buf());
} else if path.is_dir() {
let mut walker = ProjectWalker::new(path).max_depth(10);
if no_default_ignore {
walker = walker.no_default_ignore();
}
for entry in walker.iter() {
let entry_path = entry.path();
if entry_path.is_file() && is_supported_source_file(entry_path, lang) {
if let Ok(metadata) = fs::metadata(entry_path) {
if metadata.len() <= MAX_FILE_SIZE {
files.push(entry_path.to_path_buf());
}
}
}
}
}
Ok(files)
}
pub(super) fn is_natively_analyzed(lang: Language) -> bool {
matches!(
lang,
Language::Python
| Language::Rust
| Language::TypeScript
| Language::JavaScript
| Language::Go
| Language::Java
| Language::C
| Language::Cpp
| Language::Ruby
| Language::Kotlin
| Language::Swift
| Language::CSharp
| Language::Scala
| Language::Php
| Language::Lua
| Language::Luau
| Language::Elixir
| Language::Ocaml
)
}
fn is_supported_source_file(path: &Path, lang: Option<Language>) -> bool {
let ext = match path.extension().and_then(|e| e.to_str()) {
Some(e) => e,
None => return false,
};
match lang {
Some(Language::TypeScript) => matches!(ext, "ts" | "tsx"),
Some(Language::JavaScript) => matches!(ext, "js" | "mjs" | "cjs" | "jsx"),
Some(Language::Python) => ext == "py",
Some(Language::Rust) => ext == "rs",
Some(Language::Go) => ext == "go",
Some(Language::Java) => ext == "java",
Some(Language::C) => matches!(ext, "c" | "h"),
Some(Language::Cpp) => matches!(ext, "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx"),
Some(Language::CSharp) => ext == "cs",
Some(Language::Ruby) => ext == "rb",
Some(Language::Php) => ext == "php",
Some(Language::Kotlin) => matches!(ext, "kt" | "kts"),
Some(Language::Swift) => ext == "swift",
Some(Language::Scala) => ext == "scala",
Some(Language::Elixir) => matches!(ext, "ex" | "exs"),
Some(Language::Lua) => ext == "lua",
Some(Language::Luau) => ext == "luau",
Some(Language::Ocaml) => matches!(ext, "ml" | "mli"),
None => matches!(ext, "py" | "rs"),
}
}
fn analyze_file(path: &Path) -> Result<Vec<VulnFinding>, RemainingError> {
let source = fs::read_to_string(path).map_err(|_| RemainingError::file_not_found(path))?;
let is_rust = matches!(path.extension().and_then(|e| e.to_str()), Some("rs"));
let mut findings: Vec<VulnFinding> =
match tldr_core::security::vuln::scan_vulnerabilities(path, None, None) {
Ok(report) => report
.findings
.into_iter()
.map(|f| {
let vuln_type = map_core_vuln_type(f.vuln_type);
let severity = match f.severity.to_uppercase().as_str() {
"CRITICAL" => Severity::Critical,
"HIGH" => Severity::High,
"MEDIUM" => Severity::Medium,
"LOW" => Severity::Low,
_ => Severity::Medium,
};
let file_str = f.file.display().to_string();
let is_degenerate = f.source.line == f.sink.line
&& f.source.expression == f.sink.expression
&& !f.source.expression.is_empty();
let taint_flow: Vec<TaintFlow> = if is_degenerate {
vec![TaintFlow {
file: file_str.clone(),
line: f.sink.line,
column: 0,
code_snippet: f.sink.expression.clone(),
description: format!(
"Direct sink: {} (source: {})",
f.sink.sink_type, f.source.source_type
),
}]
} else {
vec![
TaintFlow {
file: file_str.clone(),
line: f.source.line,
column: 0,
code_snippet: f.source.expression.clone(),
description: format!("Source: {}", f.source.source_type),
},
TaintFlow {
file: file_str.clone(),
line: f.sink.line,
column: 0,
code_snippet: f.sink.expression.clone(),
description: format!("Sink: {}", f.sink.sink_type),
},
]
};
VulnFinding {
vuln_type,
severity,
cwe_id: f.cwe_id.unwrap_or_default(),
title: format!("{:?}", f.vuln_type),
description: format!("{} with unsanitized input", f.sink.sink_type),
file: file_str,
line: f.sink.line,
column: 0,
function: None,
taint_flow,
remediation: f.remediation.clone(),
confidence: 0.85,
direct_sink: is_degenerate,
}
})
.collect(),
Err(_) => Vec::new(),
};
if is_rust {
let mut line_findings = analyze_rust_file(path, &source);
dedupe_overlap(&mut line_findings, &findings);
findings.extend(line_findings);
}
Ok(findings)
}
fn dedupe_overlap(line_findings: &mut Vec<VulnFinding>, canonical: &[VulnFinding]) {
line_findings.retain(|line_f| match line_f.vuln_type {
VulnType::SqlInjection | VulnType::CommandInjection => !canonical
.iter()
.any(|c| c.vuln_type == line_f.vuln_type && c.line == line_f.line),
_ => true,
});
}
fn map_core_vuln_type(core_ty: tldr_core::security::vuln::VulnType) -> VulnType {
use tldr_core::security::vuln::VulnType as CoreVulnType;
match core_ty {
CoreVulnType::SqlInjection => VulnType::SqlInjection,
CoreVulnType::Xss => VulnType::Xss,
CoreVulnType::CommandInjection => VulnType::CommandInjection,
CoreVulnType::PathTraversal => VulnType::PathTraversal,
CoreVulnType::Ssrf => VulnType::Ssrf,
CoreVulnType::Deserialization => VulnType::Deserialization,
CoreVulnType::OpenRedirect => VulnType::OpenRedirect,
}
}
pub(super) fn analyze_rust_file(path: &Path, source: &str) -> Vec<VulnFinding> {
let file_path = path.display().to_string();
let is_test_file = is_rust_test_file(path);
let mut findings = Vec::new();
let lines: Vec<&str> = source.lines().collect();
let mut in_command_block = false;
let mut command_block_start_line: u32 = 0;
for (idx, line) in lines.iter().enumerate() {
let line_number = (idx + 1) as u32;
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with("//") {
continue;
}
if (trimmed.contains("unsafe {") || trimmed.starts_with("unsafe{"))
&& !has_nearby_safety_comment(&lines, idx)
{
findings.push(rust_finding(
VulnType::UnsafeCode,
Severity::High,
RustFindingMeta {
cwe_id: "CWE-242",
title: "Unsafe Block Without Safety Rationale",
description: "unsafe block found without nearby SAFETY: justification comment",
},
RustFindingLocation {
file: &file_path,
line: line_number,
column: trimmed.find("unsafe").unwrap_or(0) as u32,
},
"Document invariants with // SAFETY: ... or avoid unsafe when possible",
0.80,
));
}
if trimmed.contains("std::mem::transmute(") || trimmed.contains("mem::transmute(") {
findings.push(rust_finding(
VulnType::MemorySafety,
Severity::Critical,
RustFindingMeta {
cwe_id: "CWE-119",
title: "Risky transmute Usage",
description:
"std::mem::transmute can violate type and memory safety guarantees",
},
RustFindingLocation {
file: &file_path,
line: line_number,
column: trimmed.find("transmute").unwrap_or(0) as u32,
},
"Prefer safe conversions (From/TryFrom, bytemuck) and explicit layout checks",
0.90,
));
}
if trimmed.contains("std::ptr::")
|| trimmed.contains("core::ptr::")
|| trimmed.contains("ptr::read(")
|| trimmed.contains("ptr::write(")
{
findings.push(rust_finding(
VulnType::MemorySafety,
Severity::High,
RustFindingMeta {
cwe_id: "CWE-119",
title: "Raw Pointer Operation",
description:
"raw pointer operation detected; verify lifetimes, aliasing, and bounds",
},
RustFindingLocation {
file: &file_path,
line: line_number,
column: trimmed.find("ptr::").unwrap_or(0) as u32,
},
"Use safe abstractions/slices where possible and document pointer invariants",
0.85,
));
}
if !is_test_file && trimmed.contains(".unwrap()") {
findings.push(rust_finding(
VulnType::Panic,
Severity::Medium,
RustFindingMeta {
cwe_id: "CWE-703",
title: "Potential Panic From unwrap()",
description: "unwrap() in non-test Rust code can panic in production paths",
},
RustFindingLocation {
file: &file_path,
line: line_number,
column: trimmed.find(".unwrap()").unwrap_or(0) as u32,
},
"Handle Result/Option explicitly or use expect() with actionable context",
0.70,
));
}
if trimmed.contains("format!(")
&& format_string_contains_sql_keyword(trimmed)
&& (trimmed.contains("{}") || trimmed.contains("{") || trimmed.contains("+"))
{
findings.push(rust_finding(
VulnType::SqlInjection,
Severity::Critical,
RustFindingMeta {
cwe_id: "CWE-89",
title: "SQL String Interpolation",
description:
"SQL query appears to be built via string formatting/interpolation",
},
RustFindingLocation {
file: &file_path,
line: line_number,
column: trimmed.find("format!(").unwrap_or(0) as u32,
},
"Use parameterized queries via your DB client instead of format!/concatenation",
0.88,
));
}
if trimmed.contains("from_utf8_unchecked(")
|| trimmed.contains(".as_bytes()[")
|| trimmed.contains(".as_bytes().get_unchecked(")
{
findings.push(rust_finding(
VulnType::MemorySafety,
Severity::High,
RustFindingMeta {
cwe_id: "CWE-20",
title: "Unchecked Byte/String Conversion",
description:
"unchecked UTF-8 or byte indexing detected without visible validation",
},
RustFindingLocation {
file: &file_path,
line: line_number,
column: trimmed
.find("as_bytes")
.or_else(|| trimmed.find("from_utf8_unchecked"))
.unwrap_or(0) as u32,
},
"Validate lengths/UTF-8 before conversion or use checked APIs",
0.82,
));
}
if trimmed.contains("Command::new(") || trimmed.contains("std::process::Command::new(") {
in_command_block = true;
command_block_start_line = line_number;
}
if in_command_block
&& trimmed.contains(".arg(")
&& !trimmed.contains(".arg(\"")
&& !trimmed.contains(".arg('")
{
findings.push(rust_finding(
VulnType::CommandInjection,
Severity::Critical,
RustFindingMeta {
cwe_id: "CWE-78",
title: "Unsanitized Process Argument",
description: "Command argument appears to be variable-driven without visible sanitization",
},
RustFindingLocation {
file: &file_path,
line: command_block_start_line.max(line_number),
column: trimmed.find(".arg(").unwrap_or(0) as u32,
},
"Validate/allowlist user-controlled arguments before passing to Command",
0.80,
));
}
if in_command_block && (trimmed.ends_with(';') || trimmed.contains(");")) {
in_command_block = false;
command_block_start_line = 0;
}
}
findings
}
struct RustFindingMeta<'a> {
cwe_id: &'a str,
title: &'a str,
description: &'a str,
}
struct RustFindingLocation<'a> {
file: &'a str,
line: u32,
column: u32,
}
fn rust_finding(
vuln_type: VulnType,
severity: Severity,
meta: RustFindingMeta<'_>,
location: RustFindingLocation<'_>,
remediation: &str,
confidence: f64,
) -> VulnFinding {
VulnFinding {
vuln_type,
severity,
cwe_id: meta.cwe_id.to_string(),
title: meta.title.to_string(),
description: meta.description.to_string(),
file: location.file.to_string(),
line: location.line,
column: location.column,
function: None,
taint_flow: Vec::new(),
remediation: remediation.to_string(),
confidence,
direct_sink: false,
}
}
fn has_nearby_safety_comment(lines: &[&str], index: usize) -> bool {
let start = index.saturating_sub(2);
(start..index).any(|i| lines[i].contains("SAFETY:"))
}
fn format_string_contains_sql_keyword(line: &str) -> bool {
let Some(literal) = extract_first_format_string_literal(line) else {
return false;
};
let upper = literal.to_uppercase();
let bytes = upper.as_bytes();
const KEYWORDS: &[&str] = &["SELECT", "INSERT", "UPDATE", "DELETE", "FROM", "WHERE"];
for kw in KEYWORDS {
let kw_bytes = kw.as_bytes();
let mut start = 0usize;
while let Some(off) = upper[start..].find(kw) {
let abs = start + off;
let before_ok = abs == 0 || !is_word_byte(bytes[abs - 1]);
let after_idx = abs + kw_bytes.len();
let after_ok = after_idx >= bytes.len() || !is_word_byte(bytes[after_idx]);
if before_ok && after_ok {
return true;
}
start = abs + 1;
}
}
false
}
fn is_word_byte(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'_'
}
fn extract_first_format_string_literal(line: &str) -> Option<String> {
let macro_pos = line.find("format!(")?;
let after_paren = &line[macro_pos + "format!(".len()..];
let mut chars = after_paren.char_indices();
let (start_idx, start_ch) = loop {
let (i, c) = chars.next()?;
if !c.is_whitespace() {
break (i, c);
}
};
if start_ch != '"' {
return None;
}
let body = &after_paren[start_idx + 1..];
let mut out = String::new();
let mut iter = body.chars();
while let Some(c) = iter.next() {
if c == '\\' {
if let Some(next) = iter.next() {
out.push(c);
out.push(next);
} else {
return None;
}
} else if c == '"' {
return Some(out);
} else {
out.push(c);
}
}
None
}
pub(super) fn is_rust_test_file(path: &Path) -> bool {
let path_str = path.to_string_lossy();
path_str.contains("/tests/")
|| path_str.contains("\\tests\\")
|| path_str.ends_with("_test.rs")
|| path_str.ends_with("tests.rs")
}
pub(super) fn is_js_test_file(path: &Path) -> bool {
let path_str = path.to_string_lossy();
let ext_match = path_str.ends_with(".js")
|| path_str.ends_with(".jsx")
|| path_str.ends_with(".ts")
|| path_str.ends_with(".tsx")
|| path_str.ends_with(".cjs")
|| path_str.ends_with(".mjs");
if !ext_match {
return false;
}
if path_str.contains("/fixtures/") || path_str.contains("\\fixtures\\") {
return false;
}
let has_test_path_component = path_str.contains("/test/")
|| path_str.contains("\\test\\")
|| path_str.starts_with("test/")
|| path_str.starts_with("test\\")
|| path_str.contains("/tests/")
|| path_str.contains("\\tests\\")
|| path_str.starts_with("tests/")
|| path_str.starts_with("tests\\")
|| path_str.contains("/__tests__/")
|| path_str.contains("\\__tests__\\")
|| path_str.starts_with("__tests__/")
|| path_str.starts_with("__tests__\\");
let has_test_filename_suffix = path_str.ends_with(".test.js")
|| path_str.ends_with(".test.jsx")
|| path_str.ends_with(".test.ts")
|| path_str.ends_with(".test.tsx")
|| path_str.ends_with(".test.cjs")
|| path_str.ends_with(".test.mjs")
|| path_str.ends_with(".spec.js")
|| path_str.ends_with(".spec.jsx")
|| path_str.ends_with(".spec.ts")
|| path_str.ends_with(".spec.tsx")
|| path_str.ends_with(".spec.cjs")
|| path_str.ends_with(".spec.mjs")
|| path_str.ends_with(".e2e.js")
|| path_str.ends_with(".e2e.jsx")
|| path_str.ends_with(".e2e.ts")
|| path_str.ends_with(".e2e.tsx")
|| path_str.ends_with(".e2e.cjs")
|| path_str.ends_with(".e2e.mjs");
has_test_path_component || has_test_filename_suffix
}
fn is_smell_finding(f: &VulnFinding) -> bool {
f.vuln_type == VulnType::Panic && f.title.starts_with("Potential Panic")
}
fn vuln_type_name(vt: VulnType) -> &'static str {
match vt {
VulnType::SqlInjection => "SQL Injection",
VulnType::Xss => "Cross-Site Scripting (XSS)",
VulnType::CommandInjection => "Command Injection",
VulnType::Ssrf => "Server-Side Request Forgery (SSRF)",
VulnType::PathTraversal => "Path Traversal",
VulnType::Deserialization => "Insecure Deserialization",
VulnType::UnsafeCode => "Unsafe Code Risk",
VulnType::MemorySafety => "Memory Safety Violation",
VulnType::Panic => "Unchecked Panic Path",
VulnType::Xxe => "XML External Entity (XXE)",
VulnType::OpenRedirect => "Open Redirect",
VulnType::LdapInjection => "LDAP Injection",
VulnType::XpathInjection => "XPath Injection",
}
}
fn build_summary(findings: &[VulnFinding], files_with_vulns: u32) -> VulnSummary {
let mut by_severity: HashMap<String, u32> = HashMap::new();
let mut by_type: HashMap<String, u32> = HashMap::new();
for finding in findings {
*by_severity.entry(finding.severity.to_string()).or_insert(0) += 1;
let key = serde_json::to_value(finding.vuln_type)
.ok()
.and_then(|v| v.as_str().map(String::from))
.unwrap_or_else(|| format!("{:?}", finding.vuln_type).to_lowercase());
*by_type.entry(key).or_insert(0) += 1;
}
VulnSummary {
total_findings: findings.len() as u32,
by_severity,
by_type,
files_with_vulns,
}
}
fn format_vuln_text(report: &VulnReport) -> String {
let mut out = String::new();
out.push_str("=== Vulnerability Scan Results ===\n\n");
if report.findings.is_empty() {
out.push_str("No vulnerabilities found.\n");
} else {
out.push_str(&format!(
"Found {} vulnerabilities:\n\n",
report.findings.len()
));
for (i, finding) in report.findings.iter().enumerate() {
out.push_str(&format!(
"{}. [{}] {} ({})\n",
i + 1,
finding.severity.to_string().to_uppercase(),
finding.title,
finding.cwe_id
));
out.push_str(&format!(" File: {}:{}\n", finding.file, finding.line));
out.push_str(&format!(" {}\n", finding.description));
if !finding.taint_flow.is_empty() {
out.push_str(" Taint Flow:\n");
for (j, flow) in finding.taint_flow.iter().enumerate() {
out.push_str(&format!(
" {}. {}:{} - {}\n",
j + 1,
flow.file,
flow.line,
flow.description
));
if !flow.code_snippet.is_empty() {
out.push_str(&format!(" {}\n", flow.code_snippet.trim()));
}
}
}
out.push_str(&format!(" Remediation: {}\n\n", finding.remediation));
}
}
if let Some(summary) = &report.summary {
out.push_str("=== Summary ===\n");
out.push_str(&format!(
"Total: {} vulnerabilities\n",
summary.total_findings
));
out.push_str(&format!(
"Files with vulnerabilities: {}\n",
summary.files_with_vulns
));
if !summary.by_severity.is_empty() {
out.push_str("By Severity:\n");
for (sev, count) in &summary.by_severity {
out.push_str(&format!(" {}: {}\n", sev, count));
}
}
}
out.push_str(&format!("\nScan duration: {}ms\n", report.scan_duration_ms));
out.push_str(&format!("Files scanned: {}\n", report.files_scanned));
out
}
#[inline]
fn sarif_clamp_pos(value: u32) -> u32 {
value.max(1)
}
fn generate_sarif(report: &VulnReport) -> Value {
let results: Vec<Value> = report
.findings
.iter()
.map(|f| {
json!({
"ruleId": f.cwe_id,
"level": match f.severity {
Severity::Critical | Severity::High => "error",
Severity::Medium => "warning",
Severity::Low | Severity::Info => "note",
},
"message": {
"text": f.description
},
"locations": [{
"physicalLocation": {
"artifactLocation": {
"uri": f.file
},
"region": {
"startLine": sarif_clamp_pos(f.line),
"startColumn": sarif_clamp_pos(f.column)
}
}
}],
"codeFlows": if f.taint_flow.is_empty() { None } else {
Some(vec![{
json!({
"threadFlows": [{
"locations": f.taint_flow.iter().map(|tf| {
json!({
"location": {
"physicalLocation": {
"artifactLocation": {
"uri": tf.file
},
"region": {
"startLine": sarif_clamp_pos(tf.line),
"startColumn": sarif_clamp_pos(tf.column)
}
},
"message": {
"text": tf.description
}
}
})
}).collect::<Vec<_>>()
}]
})
}])
}
})
})
.collect();
let rules: Vec<Value> = report
.findings
.iter()
.map(|f| &f.vuln_type)
.collect::<HashSet<_>>()
.into_iter()
.map(|vt| {
json!({
"id": vt.cwe_id(),
"name": vuln_type_name(*vt),
"shortDescription": {
"text": vuln_type_name(*vt)
},
"defaultConfiguration": {
"level": match vt.default_severity() {
Severity::Critical | Severity::High => "error",
Severity::Medium => "warning",
Severity::Low | Severity::Info => "note",
}
}
})
})
.collect();
json!({
"$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
"version": "2.1.0",
"runs": [{
"tool": {
"driver": {
"name": "tldr-vuln",
"version": env!("CARGO_PKG_VERSION"),
"informationUri": "https://github.com/tldr-code/tldr-rs",
"rules": rules
}
},
"results": results
}]
})
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_vuln_type_cwe_mapping() {
assert_eq!(VulnType::SqlInjection.cwe_id(), "CWE-89");
assert_eq!(VulnType::Xss.cwe_id(), "CWE-79");
assert_eq!(VulnType::CommandInjection.cwe_id(), "CWE-78");
}
#[test]
fn test_vuln_type_severity() {
assert_eq!(
VulnType::SqlInjection.default_severity(),
Severity::Critical
);
assert_eq!(VulnType::Xss.default_severity(), Severity::High);
assert_eq!(VulnType::OpenRedirect.default_severity(), Severity::Medium);
}
#[test]
fn test_collect_files_includes_rust() {
let temp = TempDir::new().unwrap();
std::fs::write(temp.path().join("a.py"), "print('ok')").unwrap();
std::fs::write(temp.path().join("b.rs"), "fn main() {}").unwrap();
std::fs::write(temp.path().join("c.txt"), "ignore").unwrap();
let files = collect_files(temp.path(), None, false).unwrap();
assert!(files.iter().any(|f| f.ends_with("a.py")));
assert!(files.iter().any(|f| f.ends_with("b.rs")));
assert!(!files.iter().any(|f| f.ends_with("c.txt")));
}
#[test]
fn test_analyze_rust_detects_unsafe_without_safety_comment() {
let source = r#"
pub fn raw_copy(ptr: *mut u8) {
unsafe { *ptr = 7; }
}
"#;
let findings = analyze_rust_file(Path::new("src/lib.rs"), source);
assert!(findings.iter().any(|f| f.vuln_type == VulnType::UnsafeCode));
}
#[test]
fn test_analyze_rust_detects_command_and_sql_patterns() {
let source = r#"
use std::process::Command;
pub fn run(user: &str, name: &str) {
let q = format!("SELECT * FROM users WHERE name = '{}'", name);
let _ = Command::new("sh").arg(user).output();
}
"#;
let findings = analyze_rust_file(Path::new("src/lib.rs"), source);
assert!(findings
.iter()
.any(|f| f.vuln_type == VulnType::SqlInjection));
assert!(findings
.iter()
.any(|f| f.vuln_type == VulnType::CommandInjection));
}
#[test]
fn test_analyze_rust_detects_transmute_usage() {
let source = r#"
use std::mem;
pub fn cast(x: u32) -> i32 {
unsafe { mem::transmute(x) }
}
"#;
let findings = analyze_rust_file(Path::new("src/lib.rs"), source);
assert!(findings
.iter()
.any(|f| f.vuln_type == VulnType::MemorySafety && f.title.contains("transmute")));
}
#[test]
fn test_analyze_rust_detects_raw_pointer_operation() {
let source = r#"
pub unsafe fn read_ptr(p: *const u8) -> u8 {
std::ptr::read(p)
}
"#;
let findings = analyze_rust_file(Path::new("src/lib.rs"), source);
assert!(findings
.iter()
.any(|f| f.vuln_type == VulnType::MemorySafety && f.title.contains("Raw Pointer")));
}
#[test]
fn test_analyze_rust_detects_unwrap_in_non_test_code() {
let source = r#"
pub fn parse(s: &str) -> i32 {
s.parse::<i32>().unwrap()
}
"#;
let findings = analyze_rust_file(Path::new("src/lib.rs"), source);
assert!(findings
.iter()
.any(|f| f.vuln_type == VulnType::Panic && f.title.contains("unwrap")));
}
#[test]
fn test_analyze_rust_detects_unchecked_bytes_patterns() {
let source = r#"
pub fn from_raw(bytes: &[u8]) -> &str {
unsafe { std::str::from_utf8_unchecked(bytes) }
}
"#;
let findings = analyze_rust_file(Path::new("src/lib.rs"), source);
assert!(findings
.iter()
.any(|f| f.vuln_type == VulnType::MemorySafety
&& f.title.contains("Unchecked Byte/String Conversion")));
}
fn make_vuln_args_for_test(
path: PathBuf,
output: PathBuf,
include_smells: bool,
) -> VulnArgs {
VulnArgs {
path,
lang: Some(Language::Rust),
severity: None,
vuln_type: None,
include_informational: false,
include_smells,
include_tests: false,
output: Some(output),
no_default_ignore: false,
}
}
fn run_and_parse_findings(args: &VulnArgs) -> Vec<serde_json::Value> {
let _ = args.run(OutputFormat::Json);
let output_path = args.output.as_ref().unwrap();
let raw = std::fs::read_to_string(output_path).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&raw).unwrap();
parsed["findings"].as_array().cloned().unwrap_or_default()
}
#[test]
fn test_vulnargs_run_default_suppresses_panic() {
let temp = TempDir::new().unwrap();
let fixture_path = temp.path().join("smelly.rs");
std::fs::write(
&fixture_path,
"pub fn process(s: &str) -> i32 {\n let n: i32 = s.parse().unwrap();\n unsafe { *(0xdead as *mut u8) = 0; }\n n\n}\n",
)
.unwrap();
let output_path = temp.path().join("out.json");
let args = make_vuln_args_for_test(fixture_path, output_path, false);
let findings = run_and_parse_findings(&args);
let panic_count = findings
.iter()
.filter(|f| f["vuln_type"].as_str() == Some("panic"))
.count();
assert_eq!(
panic_count, 0,
"default --include-smells=false must suppress Panic findings; got {} in {:?}",
panic_count, findings
);
let unsafe_count = findings
.iter()
.filter(|f| f["vuln_type"].as_str() == Some("unsafe_code"))
.count();
assert!(
unsafe_count >= 1,
"UnsafeCode emission must NOT be suppressed by --include-smells=false; got {} in {:?}",
unsafe_count,
findings
);
}
#[test]
fn test_vulnargs_run_include_smells_emits_panic() {
let temp = TempDir::new().unwrap();
let fixture_path = temp.path().join("smelly.rs");
std::fs::write(
&fixture_path,
"pub fn process(s: &str) -> i32 {\n let n: i32 = s.parse().unwrap();\n unsafe { *(0xdead as *mut u8) = 0; }\n n\n}\n",
)
.unwrap();
let output_path = temp.path().join("out.json");
let args = make_vuln_args_for_test(fixture_path, output_path, true);
let findings = run_and_parse_findings(&args);
let panic_count = findings
.iter()
.filter(|f| f["vuln_type"].as_str() == Some("panic"))
.count();
assert!(
panic_count >= 1,
"--include-smells=true must restore Panic emission; got {} in {:?}",
panic_count,
findings
);
}
#[test]
fn test_is_js_test_file_path_components() {
assert!(is_js_test_file(Path::new("test/foo.js")));
assert!(is_js_test_file(Path::new("project/test/bar.ts")));
assert!(is_js_test_file(Path::new("tests/foo.ts")));
assert!(is_js_test_file(Path::new("project/tests/bar.js")));
assert!(is_js_test_file(Path::new("src/__tests__/x.js")));
assert!(is_js_test_file(Path::new("src/__tests__/y.tsx")));
}
#[test]
fn test_is_js_test_file_filename_suffixes() {
assert!(is_js_test_file(Path::new("src/foo.test.js")));
assert!(is_js_test_file(Path::new("src/foo.test.ts")));
assert!(is_js_test_file(Path::new("src/foo.test.jsx")));
assert!(is_js_test_file(Path::new("src/foo.test.tsx")));
assert!(is_js_test_file(Path::new("src/foo.spec.js")));
assert!(is_js_test_file(Path::new("src/foo.spec.ts")));
assert!(is_js_test_file(Path::new("src/foo.spec.tsx")));
assert!(is_js_test_file(Path::new("e2e/login.e2e.js")));
assert!(is_js_test_file(Path::new("src/login.e2e.ts")));
}
#[test]
fn test_is_js_test_file_negatives() {
assert!(!is_js_test_file(Path::new("src/foo.js")));
assert!(!is_js_test_file(Path::new("src/foo.ts")));
assert!(!is_js_test_file(Path::new("lib/index.js")));
assert!(!is_js_test_file(Path::new("lib/test_helper.js")));
assert!(!is_js_test_file(Path::new("test/foo.py")));
assert!(!is_js_test_file(Path::new("tests/foo.rs")));
assert!(!is_js_test_file(Path::new("test/foo.go")));
assert!(!is_js_test_file(Path::new("src/testimony.js")));
assert!(!is_js_test_file(Path::new("src/contest.js")));
}
#[test]
fn test_is_js_test_file_fixture_exemption() {
assert!(!is_js_test_file(Path::new(
"crates/tldr-cli/tests/fixtures/vuln_migration_v1/javascript/path_traversal_positive.js"
)));
assert!(!is_js_test_file(Path::new(
"crates/tldr-cli/tests/fixtures/vuln_migration_v1/typescript/sql_injection_positive.ts"
)));
assert!(!is_js_test_file(Path::new(
"/abs/path/crates/tldr-cli/tests/fixtures/vuln_migration_v1/javascript/xss_positive.js"
)));
}
fn make_finding(file: &str, line: u32, column: u32) -> VulnFinding {
VulnFinding {
vuln_type: VulnType::SqlInjection,
severity: Severity::High,
cwe_id: "CWE-89".to_string(),
title: "Synthetic finding".to_string(),
description: "Test fixture".to_string(),
file: file.to_string(),
line,
column,
function: None,
taint_flow: vec![],
remediation: "Test remediation".to_string(),
confidence: 0.9,
direct_sink: false,
}
}
#[test]
fn test_vuln_summary_files_with_vulns_unique_count() {
let findings = vec![
make_finding("src/a.rs", 10, 1),
make_finding("src/a.rs", 20, 1),
make_finding("src/a.rs", 30, 1),
make_finding("src/b.rs", 5, 1),
make_finding("src/b.rs", 15, 1),
];
let unique_files: HashSet<&str> =
findings.iter().map(|f| f.file.as_str()).collect();
let summary = build_summary(&findings, unique_files.len() as u32);
assert_eq!(
summary.files_with_vulns, 2,
"5 findings across 2 unique files must yield files_with_vulns=2; \
got {} (full summary: {:?})",
summary.files_with_vulns, summary
);
assert_eq!(
summary.total_findings, 5,
"total_findings must equal findings.len()"
);
assert!(
summary.files_with_vulns <= summary.total_findings,
"files_with_vulns ({}) must never exceed total_findings ({})",
summary.files_with_vulns,
summary.total_findings
);
}
#[test]
fn test_vuln_summary_zero_findings_zero_files_with_vulns() {
let findings: Vec<VulnFinding> = vec![];
let unique_files: HashSet<&str> =
findings.iter().map(|f| f.file.as_str()).collect();
let summary = build_summary(&findings, unique_files.len() as u32);
assert_eq!(summary.total_findings, 0);
assert_eq!(
summary.files_with_vulns, 0,
"zero findings MUST yield files_with_vulns=0; got {}",
summary.files_with_vulns
);
}
#[test]
fn test_vuln_sarif_startcolumn_at_least_one() {
let finding_with_zero_pos = VulnFinding {
vuln_type: VulnType::SqlInjection,
severity: Severity::High,
cwe_id: "CWE-89".to_string(),
title: "Synthetic finding".to_string(),
description: "Test fixture".to_string(),
file: "src/x.py".to_string(),
line: 0,
column: 0,
function: None,
taint_flow: vec![TaintFlow {
file: "src/x.py".to_string(),
line: 0,
column: 0,
code_snippet: "x = input()".to_string(),
description: "source".to_string(),
}],
remediation: "Sanitize input".to_string(),
confidence: 0.9,
direct_sink: false,
};
let report = VulnReport {
findings: vec![finding_with_zero_pos],
summary: None,
scan_duration_ms: 0,
files_scanned: 1,
files_skipped: 0,
warnings: Vec::new(),
};
let sarif = generate_sarif(&report);
fn walk_regions(value: &Value, violations: &mut Vec<String>) {
match value {
Value::Object(map) => {
if let Some(region) = map.get("region") {
if let Some(line) = region.get("startLine").and_then(|v| v.as_u64()) {
if line < 1 {
violations.push(format!("startLine={} < 1", line));
}
}
if let Some(col) = region.get("startColumn").and_then(|v| v.as_u64()) {
if col < 1 {
violations.push(format!("startColumn={} < 1", col));
}
}
}
for v in map.values() {
walk_regions(v, violations);
}
}
Value::Array(arr) => {
for v in arr {
walk_regions(v, violations);
}
}
_ => {}
}
}
let mut violations: Vec<String> = Vec::new();
walk_regions(&sarif, &mut violations);
assert!(
violations.is_empty(),
"SARIF emitter must clamp all startLine/startColumn values to >= 1 \
(SARIF 2.1.0 §3.30.5/§3.30.6); violations: {:?}\nSARIF: {}",
violations,
serde_json::to_string_pretty(&sarif).unwrap()
);
}
}