use std::collections::HashMap;
use std::path::Path;
use std::process::{Command, Stdio};
use anyhow::{Context, Result};
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};
static TSC_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(.+?)\((\d+),(\d+)\):\s*(error|warning)\s+(TS\d+):\s*(.+)")
.expect("Invalid TSC regex pattern")
});
static GO_VET_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(.+?):(\d+):(\d+):\s*(.+)").expect("Invalid go vet regex pattern")
});
static GCC_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(.+?):(\d+):(\d+):\s*(error|warning):\s*(.+)")
.expect("Invalid GCC regex pattern")
});
static JAVAC_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(.+?):(\d+):\s*(error|warning):\s*(.+)").expect("Invalid javac regex pattern")
});
static CPPCHECK_ERROR_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"<error[^>]*id="([^"]*)"[^>]*severity="([^"]*)"[^>]*msg="([^"]*)"[^>]*>"#)
.expect("Invalid cppcheck error regex pattern")
});
static CPPCHECK_LOCATION_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"<location[^>]*file="([^"]*)"[^>]*line="(\d+)"[^>]*column="(\d+)""#)
.expect("Invalid cppcheck location regex pattern")
});
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Diagnostic {
pub file: String,
pub line: u32,
pub column: u32,
pub severity: String,
pub message: String,
#[serde(default)]
pub rule: String,
pub source: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiagnosticsResult {
pub target: String,
pub language: String,
pub tools: Vec<String>,
pub diagnostics: Vec<Diagnostic>,
pub error_count: usize,
pub warning_count: usize,
#[serde(default)]
pub file_count: usize,
}
pub fn detect_language(file_path: &Path) -> &'static str {
let ext = file_path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
match ext {
"py" => "python",
"ts" => "typescript",
"tsx" => "tsx",
"js" | "mjs" | "cjs" => "javascript",
"jsx" => "tsx",
"go" => "go",
"rs" => "rust",
"java" => "java",
"c" | "h" => "c",
"cpp" | "cc" | "cxx" | "hpp" => "cpp",
"rb" => "ruby",
"php" => "php",
"kt" => "kotlin",
"swift" => "swift",
"cs" => "csharp",
"scala" => "scala",
"ex" | "exs" => "elixir",
_ => "unknown",
}
}
pub fn detect_project_language(path: &Path) -> &'static str {
let mut counts: HashMap<&str, usize> = HashMap::new();
if let Ok(entries) = std::fs::read_dir(path) {
for entry in entries.flatten() {
let path = entry.path();
if path.is_file() {
let lang = detect_language(&path);
if lang != "unknown" {
*counts.entry(lang).or_insert(0) += 1;
}
}
}
}
if path.join("Cargo.toml").exists() {
return "rust";
}
if path.join("go.mod").exists() {
return "go";
}
if path.join("pyproject.toml").exists() || path.join("setup.py").exists() {
return "python";
}
if path.join("package.json").exists() {
if path.join("tsconfig.json").exists() {
return "typescript";
}
return "javascript";
}
if path.join("pom.xml").exists() || path.join("build.gradle").exists() {
return "java";
}
counts
.into_iter()
.max_by_key(|(_, count)| *count)
.map(|(lang, _)| lang)
.unwrap_or("unknown")
}
fn command_exists(cmd: &str) -> bool {
which::which(cmd).is_ok()
}
#[inline]
fn fast_utf8_to_string(bytes: &[u8]) -> String {
match simdutf8::basic::from_utf8(bytes) {
Ok(valid_str) => valid_str.to_string(),
Err(_) => String::from_utf8_lossy(bytes).into_owned(),
}
}
fn run_command(
cmd: &str,
args: &[&str],
cwd: Option<&Path>,
_timeout_secs: u64,
) -> Result<(String, String, bool)> {
let mut command = Command::new(cmd);
command
.args(args)
.stdout(Stdio::piped())
.stderr(Stdio::piped());
if let Some(dir) = cwd {
command.current_dir(dir);
}
let child = command.spawn().context("Failed to spawn command")?;
let output = child
.wait_with_output()
.context("Failed to wait for command")?;
let stdout = fast_utf8_to_string(&output.stdout);
let stderr = fast_utf8_to_string(&output.stderr);
let success = output.status.success();
Ok((stdout, stderr, success))
}
#[derive(Debug, Deserialize)]
struct PyrightOutput {
#[serde(default, rename = "generalDiagnostics")]
general_diagnostics: Vec<PyrightDiagnostic>,
}
#[derive(Debug, Deserialize)]
struct PyrightDiagnostic {
file: String,
#[serde(default)]
range: PyrightRange,
severity: String,
message: String,
#[serde(default)]
rule: String,
}
#[derive(Debug, Deserialize, Default)]
struct PyrightRange {
start: PyrightPosition,
}
#[derive(Debug, Deserialize, Default)]
struct PyrightPosition {
line: u32,
character: u32,
}
fn parse_pyright_output(stdout: &str) -> Vec<Diagnostic> {
let Ok(data) = serde_json::from_str::<PyrightOutput>(stdout) else {
return Vec::new();
};
data.general_diagnostics
.into_iter()
.map(|d| Diagnostic {
file: d.file,
line: d.range.start.line + 1, column: d.range.start.character + 1,
severity: d.severity,
message: d.message,
rule: d.rule,
source: "pyright".to_string(),
})
.collect()
}
#[derive(Debug, Deserialize)]
struct RuffDiagnostic {
filename: String,
#[serde(default)]
location: RuffLocation,
message: String,
#[serde(default)]
code: String,
}
#[derive(Debug, Deserialize, Default)]
struct RuffLocation {
row: u32,
column: u32,
}
fn parse_ruff_output(stdout: &str) -> Vec<Diagnostic> {
let Ok(data) = serde_json::from_str::<Vec<RuffDiagnostic>>(stdout) else {
return Vec::new();
};
data.into_iter()
.map(|d| Diagnostic {
file: d.filename,
line: d.location.row,
column: d.location.column,
severity: "warning".to_string(),
message: d.message,
rule: d.code,
source: "ruff".to_string(),
})
.collect()
}
fn parse_tsc_output(stderr: &str) -> Vec<Diagnostic> {
stderr
.lines()
.filter_map(|line| {
let caps = TSC_RE.captures(line)?;
Some(Diagnostic {
file: caps.get(1)?.as_str().to_string(),
line: caps.get(2)?.as_str().parse().ok()?,
column: caps.get(3)?.as_str().parse().ok()?,
severity: caps.get(4)?.as_str().to_string(),
message: caps.get(6)?.as_str().to_string(),
rule: caps.get(5)?.as_str().to_string(),
source: "tsc".to_string(),
})
})
.collect()
}
fn parse_go_vet_output(stderr: &str) -> Vec<Diagnostic> {
stderr
.lines()
.filter_map(|line| {
let caps = GO_VET_RE.captures(line)?;
Some(Diagnostic {
file: caps.get(1)?.as_str().to_string(),
line: caps.get(2)?.as_str().parse().ok()?,
column: caps.get(3)?.as_str().parse().ok()?,
severity: "error".to_string(),
message: caps.get(4)?.as_str().to_string(),
rule: String::new(),
source: "go vet".to_string(),
})
})
.collect()
}
#[derive(Debug, Deserialize)]
struct GolangciLintOutput {
#[serde(default, rename = "Issues")]
issues: Vec<GolangciLintIssue>,
}
#[derive(Debug, Deserialize)]
struct GolangciLintIssue {
#[serde(default, rename = "Text")]
text: String,
#[serde(default, rename = "FromLinter")]
from_linter: String,
#[serde(default, rename = "Pos")]
pos: GolangciLintPos,
}
#[derive(Debug, Deserialize, Default)]
struct GolangciLintPos {
#[serde(default, rename = "Filename")]
filename: String,
#[serde(default, rename = "Line")]
line: u32,
#[serde(default, rename = "Column")]
column: u32,
}
fn parse_golangci_lint_output(stdout: &str) -> Vec<Diagnostic> {
let Ok(data) = serde_json::from_str::<GolangciLintOutput>(stdout) else {
return Vec::new();
};
data.issues
.into_iter()
.map(|issue| Diagnostic {
file: issue.pos.filename,
line: issue.pos.line,
column: issue.pos.column,
severity: "warning".to_string(),
message: issue.text,
rule: issue.from_linter,
source: "golangci-lint".to_string(),
})
.collect()
}
#[derive(Debug, Deserialize)]
struct CargoMessage {
reason: String,
#[serde(default)]
message: Option<CargoCompilerMessage>,
}
#[derive(Debug, Deserialize)]
struct CargoCompilerMessage {
level: String,
message: String,
#[serde(default)]
code: Option<CargoCode>,
#[serde(default)]
spans: Vec<CargoSpan>,
}
#[derive(Debug, Deserialize)]
struct CargoCode {
#[serde(default)]
code: String,
}
#[derive(Debug, Deserialize)]
struct CargoSpan {
file_name: String,
line_start: u32,
column_start: u32,
}
fn parse_cargo_output(stdout: &str, source: &str) -> Vec<Diagnostic> {
stdout
.lines()
.filter_map(|line| {
let msg: CargoMessage = serde_json::from_str(line).ok()?;
if msg.reason != "compiler-message" {
return None;
}
let message = msg.message?;
let span = message.spans.first()?;
Some(Diagnostic {
file: span.file_name.clone(),
line: span.line_start,
column: span.column_start,
severity: message.level,
message: message.message,
rule: message.code.map(|c| c.code).unwrap_or_default(),
source: source.to_string(),
})
})
.collect()
}
fn parse_gcc_output(stderr: &str, source: &str) -> Vec<Diagnostic> {
stderr
.lines()
.filter_map(|line| {
let caps = GCC_RE.captures(line)?;
Some(Diagnostic {
file: caps.get(1)?.as_str().to_string(),
line: caps.get(2)?.as_str().parse().ok()?,
column: caps.get(3)?.as_str().parse().ok()?,
severity: caps.get(4)?.as_str().to_string(),
message: caps.get(5)?.as_str().to_string(),
rule: String::new(),
source: source.to_string(),
})
})
.collect()
}
fn parse_javac_output(stderr: &str) -> Vec<Diagnostic> {
stderr
.lines()
.filter_map(|line| {
let caps = JAVAC_RE.captures(line)?;
Some(Diagnostic {
file: caps.get(1)?.as_str().to_string(),
line: caps.get(2)?.as_str().parse().ok()?,
column: 0,
severity: caps.get(3)?.as_str().to_string(),
message: caps.get(4)?.as_str().to_string(),
rule: String::new(),
source: "javac".to_string(),
})
})
.collect()
}
fn parse_cppcheck_output(stderr: &str) -> Vec<Diagnostic> {
let mut diagnostics = Vec::new();
for error_match in CPPCHECK_ERROR_RE.captures_iter(stderr) {
let rule = error_match.get(1).map(|m| m.as_str()).unwrap_or("");
let severity = error_match.get(2).map(|m| m.as_str()).unwrap_or("error");
let message = error_match.get(3).map(|m| m.as_str()).unwrap_or("");
let error_end = error_match.get(0).map(|m| m.end()).unwrap_or(0);
if let Some(loc_match) = CPPCHECK_LOCATION_RE.captures(&stderr[error_end..]) {
diagnostics.push(Diagnostic {
file: loc_match.get(1).map(|m| m.as_str()).unwrap_or("").to_string(),
line: loc_match
.get(2)
.and_then(|m| m.as_str().parse().ok())
.unwrap_or(0),
column: loc_match
.get(3)
.and_then(|m| m.as_str().parse().ok())
.unwrap_or(0),
severity: severity.to_string(),
message: message.to_string(),
rule: rule.to_string(),
source: "cppcheck".to_string(),
});
}
}
diagnostics
}
pub fn get_diagnostics(
file_path: &Path,
language: Option<&str>,
include_lint: bool,
) -> Result<DiagnosticsResult> {
let path = file_path
.canonicalize()
.unwrap_or_else(|_| file_path.to_path_buf());
if !path.exists() {
anyhow::bail!("File not found: {}", path.display());
}
let lang = language.unwrap_or_else(|| detect_language(&path));
let mut all_diagnostics = Vec::new();
let mut tools_used = Vec::new();
match lang {
"python" => {
if command_exists("pyright") {
if let Ok((stdout, _, _)) =
run_command("pyright", &["--outputjson", path.to_str().unwrap_or("")], None, 30)
{
all_diagnostics.extend(parse_pyright_output(&stdout));
tools_used.push("pyright".to_string());
}
}
if include_lint && command_exists("ruff") {
if let Ok((stdout, _, _)) = run_command(
"ruff",
&["check", "--output-format=json", path.to_str().unwrap_or("")],
None,
10,
) {
all_diagnostics.extend(parse_ruff_output(&stdout));
tools_used.push("ruff".to_string());
}
}
}
"typescript" | "tsx" | "javascript" => {
if command_exists("tsc") {
if let Ok((_, stderr, _)) = run_command(
"tsc",
&["--noEmit", "--pretty", "false", path.to_str().unwrap_or("")],
None,
30,
) {
all_diagnostics.extend(parse_tsc_output(&stderr));
tools_used.push("tsc".to_string());
}
}
}
"go" => {
if command_exists("go") {
if let Ok((_, stderr, _)) =
run_command("go", &["vet", path.to_str().unwrap_or("")], None, 30)
{
all_diagnostics.extend(parse_go_vet_output(&stderr));
tools_used.push("go vet".to_string());
}
}
if include_lint && command_exists("golangci-lint") {
if let Ok((stdout, _, _)) = run_command(
"golangci-lint",
&["run", "--out-format=json", path.to_str().unwrap_or("")],
None,
60,
) {
all_diagnostics.extend(parse_golangci_lint_output(&stdout));
tools_used.push("golangci-lint".to_string());
}
}
}
"rust" => {
let parent = path.parent();
if command_exists("cargo") {
if let Ok((stdout, _, _)) =
run_command("cargo", &["check", "--message-format=json"], parent, 120)
{
all_diagnostics.extend(parse_cargo_output(&stdout, "cargo check"));
tools_used.push("cargo check".to_string());
}
}
if include_lint && command_exists("cargo") {
if let Ok((stdout, _, _)) =
run_command("cargo", &["clippy", "--message-format=json"], parent, 120)
{
all_diagnostics.extend(parse_cargo_output(&stdout, "clippy"));
tools_used.push("clippy".to_string());
}
}
}
"java" => {
if command_exists("javac") {
if let Ok((_, stderr, _)) = run_command(
"javac",
&["-Xlint:all", path.to_str().unwrap_or("")],
None,
30,
) {
all_diagnostics.extend(parse_javac_output(&stderr));
tools_used.push("javac".to_string());
}
}
}
"c" | "cpp" => {
let compiler = if lang == "cpp" { "g++" } else { "gcc" };
if command_exists(compiler) {
if let Ok((_, stderr, _)) = run_command(
compiler,
&["-fsyntax-only", "-Wall", path.to_str().unwrap_or("")],
None,
30,
) {
all_diagnostics.extend(parse_gcc_output(&stderr, compiler));
tools_used.push(compiler.to_string());
}
}
if include_lint && command_exists("cppcheck") {
if let Ok((_, stderr, _)) = run_command(
"cppcheck",
&["--xml", "--enable=all", path.to_str().unwrap_or("")],
None,
30,
) {
all_diagnostics.extend(parse_cppcheck_output(&stderr));
tools_used.push("cppcheck".to_string());
}
}
}
_ => {
}
}
all_diagnostics.sort_by(|a, b| {
(&a.file, a.line, a.column).cmp(&(&b.file, b.line, b.column))
});
let error_count = all_diagnostics.iter().filter(|d| d.severity == "error").count();
let warning_count = all_diagnostics.iter().filter(|d| d.severity == "warning").count();
Ok(DiagnosticsResult {
target: path.display().to_string(),
language: lang.to_string(),
tools: tools_used,
diagnostics: all_diagnostics,
error_count,
warning_count,
file_count: 0,
})
}
pub fn get_project_diagnostics(
project_path: &Path,
language: Option<&str>,
include_lint: bool,
) -> Result<DiagnosticsResult> {
let path = project_path
.canonicalize()
.unwrap_or_else(|_| project_path.to_path_buf());
if !path.exists() {
anyhow::bail!("Path not found: {}", path.display());
}
let lang = language.unwrap_or_else(|| detect_project_language(&path));
let mut all_diagnostics = Vec::new();
let mut tools_used = Vec::new();
match lang {
"python" => {
if command_exists("pyright") {
if let Ok((stdout, _, _)) =
run_command("pyright", &["--outputjson", "."], Some(&path), 120)
{
all_diagnostics.extend(parse_pyright_output(&stdout));
tools_used.push("pyright".to_string());
}
}
if include_lint && command_exists("ruff") {
if let Ok((stdout, _, _)) =
run_command("ruff", &["check", "--output-format=json", "."], Some(&path), 60)
{
all_diagnostics.extend(parse_ruff_output(&stdout));
tools_used.push("ruff".to_string());
}
}
}
"typescript" | "tsx" | "javascript" => {
if command_exists("tsc") {
if let Ok((_, stderr, _)) =
run_command("tsc", &["--noEmit", "--pretty", "false"], Some(&path), 120)
{
all_diagnostics.extend(parse_tsc_output(&stderr));
tools_used.push("tsc".to_string());
}
}
}
"go" => {
if command_exists("go") {
if let Ok((_, stderr, _)) =
run_command("go", &["vet", "./..."], Some(&path), 120)
{
all_diagnostics.extend(parse_go_vet_output(&stderr));
tools_used.push("go vet".to_string());
}
}
if include_lint && command_exists("golangci-lint") {
if let Ok((stdout, _, _)) = run_command(
"golangci-lint",
&["run", "--out-format=json", "./..."],
Some(&path),
120,
) {
all_diagnostics.extend(parse_golangci_lint_output(&stdout));
tools_used.push("golangci-lint".to_string());
}
}
}
"rust" => {
if command_exists("cargo") {
if let Ok((stdout, _, _)) =
run_command("cargo", &["check", "--message-format=json"], Some(&path), 180)
{
all_diagnostics.extend(parse_cargo_output(&stdout, "cargo check"));
tools_used.push("cargo check".to_string());
}
}
if include_lint && command_exists("cargo") {
if let Ok((stdout, _, _)) =
run_command("cargo", &["clippy", "--message-format=json"], Some(&path), 180)
{
all_diagnostics.extend(parse_cargo_output(&stdout, "clippy"));
tools_used.push("clippy".to_string());
}
}
}
"java" => {
if command_exists("javac") {
tools_used.push("javac".to_string());
}
}
_ => {
}
}
all_diagnostics.sort_by(|a, b| {
(&a.file, a.line, a.column).cmp(&(&b.file, b.line, b.column))
});
let error_count = all_diagnostics.iter().filter(|d| d.severity == "error").count();
let warning_count = all_diagnostics.iter().filter(|d| d.severity == "warning").count();
let file_count = all_diagnostics
.iter()
.map(|d| &d.file)
.collect::<std::collections::HashSet<_>>()
.len();
Ok(DiagnosticsResult {
target: path.display().to_string(),
language: lang.to_string(),
tools: tools_used,
diagnostics: all_diagnostics,
error_count,
warning_count,
file_count,
})
}
pub fn format_diagnostics_text(result: &DiagnosticsResult) -> String {
let mut output = Vec::new();
output.push(format!(
"Diagnostics for: {} ({})",
result.target, result.language
));
output.push(format!("Tools used: {}", result.tools.join(", ")));
output.push(format!(
"Found {} errors, {} warnings",
result.error_count, result.warning_count
));
output.push(String::new());
for diag in &result.diagnostics {
let severity = if diag.severity == "error" { "E" } else { "W" };
let rule = if diag.rule.is_empty() {
String::new()
} else {
format!(" [{}]", diag.rule)
};
output.push(format!(
"{} {}:{}:{}: {}{}",
severity, diag.file, diag.line, diag.column, diag.message, rule
));
}
output.join("\n")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_language() {
assert_eq!(detect_language(Path::new("test.py")), "python");
assert_eq!(detect_language(Path::new("test.ts")), "typescript");
assert_eq!(detect_language(Path::new("test.tsx")), "tsx");
assert_eq!(detect_language(Path::new("test.js")), "javascript");
assert_eq!(detect_language(Path::new("test.go")), "go");
assert_eq!(detect_language(Path::new("test.rs")), "rust");
assert_eq!(detect_language(Path::new("test.java")), "java");
assert_eq!(detect_language(Path::new("test.c")), "c");
assert_eq!(detect_language(Path::new("test.cpp")), "cpp");
assert_eq!(detect_language(Path::new("test.unknown")), "unknown");
}
#[test]
fn test_parse_pyright_output() {
let sample = r#"{"generalDiagnostics":[{"file":"test.py","range":{"start":{"line":5,"character":10}},"severity":"error","message":"Type error","rule":"reportGeneralTypeIssues"}]}"#;
let diagnostics = parse_pyright_output(sample);
assert_eq!(diagnostics.len(), 1);
assert_eq!(diagnostics[0].file, "test.py");
assert_eq!(diagnostics[0].line, 6); assert_eq!(diagnostics[0].column, 11);
assert_eq!(diagnostics[0].severity, "error");
assert_eq!(diagnostics[0].source, "pyright");
}
#[test]
fn test_parse_ruff_output() {
let sample = r#"[{"filename":"test.py","location":{"row":10,"column":5},"message":"Unused import","code":"F401"}]"#;
let diagnostics = parse_ruff_output(sample);
assert_eq!(diagnostics.len(), 1);
assert_eq!(diagnostics[0].file, "test.py");
assert_eq!(diagnostics[0].line, 10);
assert_eq!(diagnostics[0].column, 5);
assert_eq!(diagnostics[0].rule, "F401");
assert_eq!(diagnostics[0].source, "ruff");
}
#[test]
fn test_parse_tsc_output() {
let sample = "src/index.ts(10,5): error TS2322: Type 'string' is not assignable to type 'number'.";
let diagnostics = parse_tsc_output(sample);
assert_eq!(diagnostics.len(), 1);
assert_eq!(diagnostics[0].file, "src/index.ts");
assert_eq!(diagnostics[0].line, 10);
assert_eq!(diagnostics[0].column, 5);
assert_eq!(diagnostics[0].rule, "TS2322");
assert_eq!(diagnostics[0].source, "tsc");
}
#[test]
fn test_parse_go_vet_output() {
let sample = "main.go:15:3: unreachable code";
let diagnostics = parse_go_vet_output(sample);
assert_eq!(diagnostics.len(), 1);
assert_eq!(diagnostics[0].file, "main.go");
assert_eq!(diagnostics[0].line, 15);
assert_eq!(diagnostics[0].column, 3);
assert_eq!(diagnostics[0].source, "go vet");
}
#[test]
fn test_parse_gcc_output() {
let sample = "test.c:10:5: error: expected ';' before 'return'";
let diagnostics = parse_gcc_output(sample, "gcc");
assert_eq!(diagnostics.len(), 1);
assert_eq!(diagnostics[0].file, "test.c");
assert_eq!(diagnostics[0].line, 10);
assert_eq!(diagnostics[0].column, 5);
assert_eq!(diagnostics[0].severity, "error");
assert_eq!(diagnostics[0].source, "gcc");
}
}