use crate::deep::candidate::Candidate;
use crate::scanner::matcher::compute_finding_id;
use crate::types::{AuthCategory, Confidence, Finding, ScanPass, Surface};
use serde::Deserialize;
use std::path::Path;
#[derive(Debug, Clone, Deserialize)]
pub struct SemanticFinding {
pub line_start: usize,
pub line_end: usize,
pub category: AuthCategory,
pub confidence: Confidence,
pub description: String,
pub reasoning: String,
pub is_false_positive: bool,
}
pub fn into_finding(
sem: SemanticFinding,
candidate: &Candidate,
seed: Option<&Finding>,
scan_root: &Path,
) -> Finding {
tracing::debug!(
file = %candidate.file.display(),
lines = format!("{}-{}", sem.line_start, sem.line_end),
category = ?sem.category,
confidence = ?sem.confidence,
is_false_positive = sem.is_false_positive,
reasoning_len = sem.reasoning.len(),
"semantic finding"
);
let rule_id = match (
seed.and_then(|s| s.pattern_rule.as_deref()),
seed.map(|s| (s.line_start, s.line_end)),
) {
(Some(pr), Some((s_start, s_end)))
if ranges_overlap(s_start, s_end, sem.line_start, sem.line_end) =>
{
format!("{pr}-semantic")
}
_ => format!("semantic-{}", sem.category.slug()),
};
let code_snippet = extract_lines(scan_root, &candidate.file, sem.line_start, sem.line_end)
.or_else(|| slice_candidate_snippet(candidate, sem.line_start, sem.line_end))
.unwrap_or_default();
let id = compute_finding_id(
&rule_id,
&candidate.file,
sem.line_start,
sem.line_end,
&code_snippet,
);
Finding {
id,
file: candidate.file.clone(),
line_start: sem.line_start,
line_end: sem.line_end,
code_snippet,
language: candidate.language,
category: sem.category,
confidence: sem.confidence,
description: sem.description,
pattern_rule: Some(rule_id),
rego_stub: None, pass: ScanPass::Semantic,
surface: Surface::classify(&candidate.file),
}
}
fn slice_candidate_snippet(
candidate: &crate::deep::candidate::Candidate,
sem_start: usize,
sem_end: usize,
) -> Option<String> {
if candidate.source_snippet.is_empty() {
return None;
}
if sem_start == 0
|| sem_end < sem_start
|| sem_start < candidate.line_start
|| sem_end > candidate.line_end
{
return None;
}
let lines: Vec<&str> = candidate.source_snippet.lines().collect();
if lines.is_empty() {
return None;
}
let start_idx = sem_start - candidate.line_start;
let end_idx = sem_end - candidate.line_start;
if start_idx >= lines.len() {
return None;
}
let end_inclusive = end_idx.min(lines.len() - 1);
Some(lines[start_idx..=end_inclusive].join("\n"))
}
fn extract_lines(scan_root: &Path, relative: &Path, start: usize, end: usize) -> Option<String> {
if start == 0 || end < start {
return None;
}
let content = std::fs::read_to_string(scan_root.join(relative)).ok()?;
let lines: Vec<&str> = content.lines().collect();
if lines.is_empty() {
return None;
}
if start > lines.len() {
return None;
}
let s = start - 1;
let e = end.min(lines.len()).max(s + 1);
Some(lines[s..e].join("\n"))
}
fn ranges_overlap(a_start: usize, a_end: usize, b_start: usize, b_end: usize) -> bool {
a_start <= b_end && b_start <= a_end
}
#[cfg(test)]
mod tests {
use super::*;
use crate::deep::candidate::CandidateKind;
use crate::types::Language;
use std::fs;
use std::path::PathBuf;
use tempfile::tempdir;
fn make_candidate(file: &str, language: Language) -> Candidate {
Candidate {
kind: CandidateKind::Escalation,
file: PathBuf::from(file),
language,
line_start: 1,
line_end: 100,
source_snippet: String::new(),
imports: Vec::new(),
original_finding_id: Some("structural-1".into()),
seed_category: Some(AuthCategory::Custom),
}
}
fn make_seed(pattern_rule: Option<&str>) -> Finding {
Finding {
id: "structural-1".into(),
file: PathBuf::from("src/auth.ts"),
line_start: 5,
line_end: 5,
code_snippet: String::new(),
language: Language::TypeScript,
category: AuthCategory::Custom,
confidence: Confidence::Low,
description: "matched custom rule".into(),
pattern_rule: pattern_rule.map(String::from),
rego_stub: None,
pass: ScanPass::Structural,
surface: Surface::Backend,
}
}
fn make_semantic(line_start: usize, line_end: usize) -> SemanticFinding {
SemanticFinding {
line_start,
line_end,
category: AuthCategory::Rbac,
confidence: Confidence::High,
description: "isAdmin role check".into(),
reasoning: "function name + return value structure indicates rbac".into(),
is_false_positive: false,
}
}
fn write_file(dir: &Path, name: &str, content: &str) -> PathBuf {
let p = dir.join(name);
if let Some(parent) = p.parent() {
fs::create_dir_all(parent).unwrap();
}
fs::write(&p, content).unwrap();
p
}
#[test]
fn into_finding_marks_pass_semantic() {
let dir = tempdir().unwrap();
write_file(
dir.path(),
"src/auth.ts",
"line one\nline two\nline three\n",
);
let cand = make_candidate("src/auth.ts", Language::TypeScript);
let sem = make_semantic(1, 2);
let f = into_finding(sem, &cand, None, dir.path());
assert_eq!(f.pass, ScanPass::Semantic);
}
#[test]
fn into_finding_marks_seed_lineage_when_ranges_overlap() {
let dir = tempdir().unwrap();
write_file(dir.path(), "src/auth.ts", "line\n");
let cand = make_candidate("src/auth.ts", Language::TypeScript);
let seed = make_seed(Some("ts-foo")); let sem = SemanticFinding {
line_start: 4,
line_end: 7,
..make_semantic(0, 0)
}; let f = into_finding(sem, &cand, Some(&seed), dir.path());
assert_eq!(f.pattern_rule.as_deref(), Some("ts-foo-semantic"));
}
#[test]
fn into_finding_drops_seed_lineage_when_ranges_disjoint() {
let dir = tempdir().unwrap();
write_file(dir.path(), "src/auth.ts", "line\n");
let cand = make_candidate("src/auth.ts", Language::TypeScript);
let seed = make_seed(Some("ts-ownership-check")); let sem = SemanticFinding {
line_start: 17,
line_end: 23,
category: AuthCategory::FeatureGate,
..make_semantic(0, 0)
}; let f = into_finding(sem, &cand, Some(&seed), dir.path());
assert_eq!(f.pattern_rule.as_deref(), Some("semantic-feature_gate"));
}
#[test]
fn into_finding_uses_synthetic_rule_id_for_cold_regions() {
let dir = tempdir().unwrap();
write_file(dir.path(), "src/auth.ts", "line\n");
let cand = make_candidate("src/auth.ts", Language::TypeScript);
let sem = make_semantic(1, 1); let f = into_finding(sem, &cand, None, dir.path());
assert_eq!(f.pattern_rule.as_deref(), Some("semantic-rbac"));
let f2 = into_finding(make_semantic(1, 1), &cand, None, dir.path());
assert_eq!(f.id, f2.id);
}
#[test]
fn into_finding_id_differs_when_lines_differ() {
let dir = tempdir().unwrap();
write_file(
dir.path(),
"src/auth.ts",
&(1..=20)
.map(|i| format!("line {i}"))
.collect::<Vec<_>>()
.join("\n"),
);
let cand = make_candidate("src/auth.ts", Language::TypeScript);
let f1 = into_finding(make_semantic(1, 1), &cand, None, dir.path());
let f2 = into_finding(make_semantic(5, 5), &cand, None, dir.path());
assert_ne!(f1.id, f2.id);
}
#[test]
fn into_finding_extracts_code_snippet_from_file() {
let dir = tempdir().unwrap();
let content = (1..=10)
.map(|i| format!("line {i}"))
.collect::<Vec<_>>()
.join("\n");
write_file(dir.path(), "src/auth.ts", &content);
let cand = make_candidate("src/auth.ts", Language::TypeScript);
let f = into_finding(make_semantic(3, 5), &cand, None, dir.path());
assert!(f.code_snippet.contains("line 3"));
assert!(f.code_snippet.contains("line 4"));
assert!(f.code_snippet.contains("line 5"));
assert!(!f.code_snippet.contains("line 2"));
assert!(!f.code_snippet.contains("line 6"));
}
#[test]
fn into_finding_falls_back_to_empty_snippet_on_read_error() {
let dir = tempdir().unwrap();
let cand = make_candidate("nonexistent.ts", Language::TypeScript);
let f = into_finding(make_semantic(1, 5), &cand, None, dir.path());
assert_eq!(f.code_snippet, "");
assert_eq!(f.pass, ScanPass::Semantic);
assert_eq!(f.line_start, 1);
assert_eq!(f.line_end, 5);
}
#[test]
fn into_finding_falls_back_to_candidate_snippet_when_file_unreadable() {
let dir = tempdir().unwrap();
let mut cand = make_candidate("missing.ts", Language::TypeScript);
cand.line_start = 10;
cand.line_end = 14;
cand.source_snippet = "line 10\nline 11\nline 12\nline 13\nline 14".to_string();
let sem = make_semantic(11, 12);
let f = into_finding(sem, &cand, None, dir.path());
assert!(f.code_snippet.contains("line 11"));
assert!(f.code_snippet.contains("line 12"));
assert!(!f.code_snippet.contains("line 10"));
assert!(!f.code_snippet.contains("line 13"));
}
#[test]
fn slice_candidate_snippet_rejects_ranges_outside_window() {
let cand = Candidate {
kind: CandidateKind::ColdRegion,
file: PathBuf::from("a.ts"),
language: Language::TypeScript,
line_start: 10,
line_end: 14,
source_snippet: "line 10\nline 11\nline 12\nline 13\nline 14".to_string(),
imports: Vec::new(),
original_finding_id: None,
seed_category: None,
};
assert!(slice_candidate_snippet(&cand, 5, 8).is_none());
assert!(slice_candidate_snippet(&cand, 20, 22).is_none());
assert!(slice_candidate_snippet(&cand, 12, 11).is_none());
assert!(slice_candidate_snippet(&cand, 0, 12).is_none());
let mut empty = cand.clone();
empty.source_snippet.clear();
assert!(slice_candidate_snippet(&empty, 11, 12).is_none());
}
#[test]
fn slice_candidate_snippet_clamps_when_snippet_was_truncated() {
let cand = Candidate {
kind: CandidateKind::ColdRegion,
file: PathBuf::from("a.ts"),
language: Language::TypeScript,
line_start: 10,
line_end: 20, source_snippet: "line 10\nline 11\nline 12".to_string(), imports: Vec::new(),
original_finding_id: None,
seed_category: None,
};
let got = slice_candidate_snippet(&cand, 11, 15).unwrap();
assert!(got.contains("line 11"));
assert!(got.contains("line 12"));
}
#[test]
fn ranges_overlap_covers_inclusive_boundaries() {
assert!(ranges_overlap(5, 10, 10, 15)); assert!(ranges_overlap(10, 15, 5, 10)); assert!(ranges_overlap(5, 10, 7, 7)); assert!(ranges_overlap(7, 7, 5, 10)); assert!(ranges_overlap(1, 100, 50, 60)); assert!(!ranges_overlap(5, 10, 11, 20)); assert!(!ranges_overlap(11, 20, 5, 10)); assert!(!ranges_overlap(5, 5, 6, 6)); }
#[test]
fn category_slugs_round_trip() {
assert_eq!(AuthCategory::Rbac.slug(), "rbac");
assert_eq!(AuthCategory::Abac.slug(), "abac");
assert_eq!(AuthCategory::Middleware.slug(), "middleware");
assert_eq!(AuthCategory::BusinessRule.slug(), "business_rule");
assert_eq!(AuthCategory::Ownership.slug(), "ownership");
assert_eq!(AuthCategory::FeatureGate.slug(), "feature_gate");
assert_eq!(AuthCategory::Custom.slug(), "custom");
}
}