use std::path::Path;
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use crate::analysis::walker::Language;
pub mod comments;
pub mod rust;
pub mod c;
pub mod cpp;
pub mod elixir;
pub mod go;
pub mod haskell;
pub mod java;
pub mod javascript;
pub mod python;
pub mod ruby;
pub mod scala;
pub mod typescript;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SignalTier {
High,
Medium,
Low,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SignalKind {
Panic,
Assert,
WarnComment,
LinterDisable,
UnwrapLike,
Guard,
RawApi,
}
impl SignalKind {
pub fn default_tier(self) -> SignalTier {
match self {
SignalKind::WarnComment => SignalTier::High,
SignalKind::Panic => SignalTier::High,
SignalKind::Assert => SignalTier::High,
SignalKind::LinterDisable => SignalTier::Medium,
SignalKind::Guard => SignalTier::Medium,
SignalKind::UnwrapLike => SignalTier::Medium,
SignalKind::RawApi => SignalTier::Low,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct Signal {
pub file_line: u32,
pub tier: SignalTier,
pub kind: SignalKind,
pub evidence: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SignalReport {
pub file: String,
pub language: String,
pub signal_count: usize,
pub signals: Vec<Signal>,
}
impl SignalReport {
pub fn truncate(&mut self, limit: usize) {
if limit > 0 && self.signals.len() > limit {
self.signals.truncate(limit);
self.signal_count = self.signals.len();
}
}
}
pub fn language_label(lang: Language) -> &'static str {
match lang {
Language::Rust => "rust",
Language::TypeScript => "typescript",
Language::JavaScript => "javascript",
Language::Python => "python",
Language::Go => "go",
Language::Java => "java",
Language::C => "c",
Language::Cpp => "cpp",
Language::Ruby => "ruby",
Language::Scala => "scala",
Language::Elixir => "elixir",
Language::Haskell => "haskell",
Language::Unknown => "unknown",
}
}
pub fn extract_signals(path: &Path, language: Language) -> Result<SignalReport> {
let source = std::fs::read_to_string(path)
.with_context(|| format!("failed to read {}", path.display()))?;
let mut signals = match language {
Language::Rust => rust::extract(&source)?,
Language::Python => python::extract(&source)?,
Language::TypeScript => typescript::extract(&source)?,
Language::JavaScript => javascript::extract(&source)?,
Language::Go => go::extract(&source)?,
Language::Java => java::extract(&source)?,
Language::C => c::extract(&source)?,
Language::Cpp => cpp::extract(&source)?,
Language::Ruby => ruby::extract(&source)?,
Language::Scala => scala::extract(&source)?,
Language::Elixir => elixir::extract(&source)?,
Language::Haskell => haskell::extract(&source)?,
Language::Unknown => comments::scan_unknown(&source, language),
};
sort_canonical(&mut signals);
Ok(SignalReport {
file: path.display().to_string(),
language: language_label(language).to_string(),
signal_count: signals.len(),
signals,
})
}
pub(crate) fn node_text(source: &[u8], node: tree_sitter::Node) -> String {
let start = node.start_byte();
let end = node.end_byte().min(source.len());
if start >= end {
return String::new();
}
String::from_utf8_lossy(&source[start..end]).into_owned()
}
pub(crate) fn trim_evidence(text: &str) -> String {
let one_line = text.replace('\n', " ");
if one_line.chars().count() <= 200 {
one_line.trim().to_string()
} else {
let truncated: String = one_line.chars().take(200).collect();
format!("{}…", truncated.trim_end())
}
}
pub fn sort_canonical(signals: &mut [Signal]) {
signals.sort_by(|a, b| {
let tier_rank = |t: SignalTier| match t {
SignalTier::High => 2,
SignalTier::Medium => 1,
SignalTier::Low => 0,
};
tier_rank(b.tier)
.cmp(&tier_rank(a.tier))
.then(a.file_line.cmp(&b.file_line))
});
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn signal_tier_default_mapping() {
assert_eq!(SignalKind::Panic.default_tier(), SignalTier::High);
assert_eq!(SignalKind::WarnComment.default_tier(), SignalTier::High);
assert_eq!(SignalKind::Assert.default_tier(), SignalTier::High);
assert_eq!(SignalKind::LinterDisable.default_tier(), SignalTier::Medium);
assert_eq!(SignalKind::Guard.default_tier(), SignalTier::Medium);
assert_eq!(SignalKind::UnwrapLike.default_tier(), SignalTier::Medium);
assert_eq!(SignalKind::RawApi.default_tier(), SignalTier::Low);
}
#[test]
fn sort_canonical_orders_by_tier_then_line() {
let mut signals = vec![
Signal {
file_line: 5,
tier: SignalTier::Low,
kind: SignalKind::RawApi,
evidence: "a".into(),
},
Signal {
file_line: 2,
tier: SignalTier::High,
kind: SignalKind::Panic,
evidence: "b".into(),
},
Signal {
file_line: 10,
tier: SignalTier::High,
kind: SignalKind::WarnComment,
evidence: "c".into(),
},
Signal {
file_line: 1,
tier: SignalTier::Medium,
kind: SignalKind::Guard,
evidence: "d".into(),
},
];
sort_canonical(&mut signals);
assert_eq!(signals[0].file_line, 2); assert_eq!(signals[1].file_line, 10); assert_eq!(signals[2].file_line, 1); assert_eq!(signals[3].file_line, 5); }
#[test]
fn language_label_is_stable_snake_case() {
assert_eq!(language_label(Language::Rust), "rust");
assert_eq!(language_label(Language::TypeScript), "typescript");
assert_eq!(language_label(Language::Cpp), "cpp");
assert_eq!(language_label(Language::Haskell), "haskell");
assert_eq!(language_label(Language::Unknown), "unknown");
}
#[test]
fn truncate_respects_limit_zero_means_unlimited() {
let mut report = SignalReport {
file: "x".into(),
language: "rust".into(),
signal_count: 3,
signals: vec![
Signal {
file_line: 1,
tier: SignalTier::High,
kind: SignalKind::Panic,
evidence: "a".into(),
};
3
],
};
report.truncate(0); assert_eq!(report.signal_count, 3);
report.truncate(2);
assert_eq!(report.signal_count, 2);
}
}