use crate::checker::{Diagnostic, Severity};
use anyhow::Result;
use serde::Deserialize;
use std::collections::HashMap;
use tracing::{debug, warn};
use super::Engine;
pub struct ValeEngine {
config_path: Option<String>,
}
impl ValeEngine {
#[must_use]
pub const fn new(config_path: Option<String>) -> Self {
Self { config_path }
}
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
struct ValeAlert {
message: String,
severity: String,
line: u32,
span: (u32, u32),
check: String,
#[serde(default)]
action: ValeAction,
}
#[derive(Deserialize, Default)]
#[serde(rename_all = "PascalCase")]
struct ValeAction {
#[serde(default)]
name: String,
#[serde(default, deserialize_with = "deserialize_null_as_empty_vec")]
params: Vec<String>,
}
fn deserialize_null_as_empty_vec<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
Option::<Vec<String>>::deserialize(deserializer).map(Option::unwrap_or_default)
}
fn ext_for_language_id(language_id: &str) -> &str {
match language_id {
"html" => ".html",
"latex" => ".tex",
"typst" => ".typ",
"restructuredtext" => ".rst",
"org" => ".org",
_ => ".md",
}
}
#[allow(clippy::cast_possible_truncation)]
fn line_span_to_byte_range(text: &str, line: u32, span: (u32, u32)) -> (u32, u32) {
let target_line = line.saturating_sub(1) as usize;
let mut byte_offset: u32 = 0;
for (i, l) in text.split('\n').enumerate() {
if i == target_line {
let col_start = span.0.saturating_sub(1) as usize;
let col_end = span.1 as usize; let start = byte_offset + col_start.min(l.len()) as u32;
let end = byte_offset + col_end.min(l.len()) as u32;
return (start, end);
}
byte_offset += l.len() as u32 + 1;
}
(byte_offset, byte_offset)
}
fn map_severity(vale_severity: &str) -> i32 {
match vale_severity {
"error" => Severity::Error as i32,
"suggestion" => Severity::Hint as i32,
_ => Severity::Warning as i32,
}
}
fn suggestions_from_action(action: &ValeAction) -> Vec<String> {
match action.name.as_str() {
"replace" | "suggest" => action.params.clone(),
"remove" => vec![String::new()],
_ => Vec::new(),
}
}
#[async_trait::async_trait]
impl Engine for ValeEngine {
fn name(&self) -> &'static str {
"vale"
}
async fn check(&mut self, text: &str, language_id: &str) -> Result<Vec<Diagnostic>> {
use tokio::io::AsyncWriteExt;
use tokio::process::Command;
let ext = ext_for_language_id(language_id);
let mut cmd = Command::new("vale");
cmd.arg("--output=JSON")
.arg("--no-exit")
.arg(format!("--ext={ext}"));
if let Some(cfg) = &self.config_path {
cmd.arg(format!("--config={cfg}"));
}
cmd.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped());
let output = match cmd.spawn() {
Ok(mut child) => {
if let Some(mut stdin) = child.stdin.take() {
let _ = stdin.write_all(text.as_bytes()).await;
let _ = stdin.shutdown().await;
}
child.wait_with_output().await?
}
Err(e) => {
warn!("Failed to spawn vale: {e}");
return Ok(vec![]);
}
};
if output.status.code() == Some(2) {
let stderr = String::from_utf8_lossy(&output.stderr);
warn!(stderr = stderr.trim(), "Vale runtime error");
return Ok(vec![]);
}
let stdout = String::from_utf8_lossy(&output.stdout);
if stdout.trim().is_empty() {
return Ok(vec![]);
}
let vale_output: HashMap<String, Vec<ValeAlert>> = match serde_json::from_str(&stdout) {
Ok(o) => o,
Err(e) => {
warn!("Failed to parse Vale JSON output: {e}");
debug!(stdout = %stdout, "Raw Vale output");
return Ok(vec![]);
}
};
let mut diagnostics = Vec::new();
for alerts in vale_output.into_values() {
for alert in alerts {
let (start_byte, end_byte) = line_span_to_byte_range(text, alert.line, alert.span);
diagnostics.push(Diagnostic {
start_byte,
end_byte,
message: alert.message,
suggestions: suggestions_from_action(&alert.action),
rule_id: format!("vale.{}", alert.check),
severity: map_severity(&alert.severity),
unified_id: String::new(),
confidence: 0.75,
});
}
}
Ok(diagnostics)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn line_span_to_byte_range_first_line() {
let text = "Hello world";
let (start, end) = line_span_to_byte_range(text, 1, (7, 11));
assert_eq!(&text[start as usize..end as usize], "world");
}
#[test]
fn line_span_to_byte_range_second_line() {
let text = "First line\nSecond line here";
let (start, end) = line_span_to_byte_range(text, 2, (8, 11));
assert_eq!(&text[start as usize..end as usize], "line");
}
#[test]
fn line_span_to_byte_range_clamped() {
let text = "short";
let (start, end) = line_span_to_byte_range(text, 1, (1, 100));
assert_eq!(start, 0);
assert_eq!(end, 5);
}
#[test]
fn map_severity_values() {
assert_eq!(map_severity("error"), Severity::Error as i32);
assert_eq!(map_severity("warning"), Severity::Warning as i32);
assert_eq!(map_severity("suggestion"), Severity::Hint as i32);
assert_eq!(map_severity("unknown"), Severity::Warning as i32);
}
#[test]
fn suggestions_from_replace_action() {
let action = ValeAction {
name: "replace".to_string(),
params: vec!["use".to_string(), "utilize".to_string()],
};
assert_eq!(suggestions_from_action(&action), vec!["use", "utilize"]);
}
#[test]
fn suggestions_from_remove_action() {
let action = ValeAction {
name: "remove".to_string(),
params: vec![],
};
assert_eq!(suggestions_from_action(&action), vec![""]);
}
#[test]
fn suggestions_from_empty_action() {
let action = ValeAction::default();
assert!(suggestions_from_action(&action).is_empty());
}
#[test]
fn ext_for_known_languages() {
assert_eq!(ext_for_language_id("markdown"), ".md");
assert_eq!(ext_for_language_id("html"), ".html");
assert_eq!(ext_for_language_id("latex"), ".tex");
assert_eq!(ext_for_language_id("restructuredtext"), ".rst");
assert_eq!(ext_for_language_id("org"), ".org");
}
#[test]
fn vale_alert_deserializes() {
let json = r#"{
"Action": {"Name": "replace", "Params": ["use"]},
"Span": [13, 20],
"Check": "Microsoft.Wordiness",
"Description": "",
"Link": "https://example.com",
"Message": "Consider using 'use' instead of 'utilize'.",
"Severity": "warning",
"Match": "utilize",
"Line": 5
}"#;
let alert: ValeAlert = serde_json::from_str(json).unwrap();
assert_eq!(alert.check, "Microsoft.Wordiness");
assert_eq!(alert.severity, "warning");
assert_eq!(alert.line, 5);
assert_eq!(alert.span, (13, 20));
assert_eq!(alert.action.name, "replace");
assert_eq!(alert.action.params, vec!["use"]);
}
#[test]
fn vale_full_json_output_deserializes() {
let json = r#"{
"stdin.md": [
{
"Action": {"Name": "replace", "Params": ["use"]},
"Span": [13, 20],
"Check": "Microsoft.Wordiness",
"Description": "",
"Link": "",
"Message": "Consider using 'use'.",
"Severity": "warning",
"Match": "utilize",
"Line": 1
}
]
}"#;
let output: HashMap<String, Vec<ValeAlert>> = serde_json::from_str(json).unwrap();
assert_eq!(output.len(), 1);
let alerts = &output["stdin.md"];
assert_eq!(alerts.len(), 1);
assert_eq!(alerts[0].check, "Microsoft.Wordiness");
}
#[test]
fn vale_alert_null_params_deserializes() {
let json = r#"{
"Action": {"Name": "", "Params": null},
"Span": [1, 2],
"Check": "Google.We",
"Message": "Avoid first-person plural.",
"Severity": "warning",
"Match": "We",
"Line": 1
}"#;
let alert: ValeAlert = serde_json::from_str(json).unwrap();
assert!(alert.action.params.is_empty());
assert!(alert.action.name.is_empty());
}
#[tokio::test]
async fn vale_engine_missing_binary() -> Result<()> {
let mut engine = ValeEngine::new(None);
let result = engine.check("test text", "en-US").await;
assert!(result.is_ok());
Ok(())
}
#[tokio::test]
#[ignore]
async fn vale_engine_live() -> Result<()> {
let mut engine = ValeEngine::new(Some("/tmp/vale-test/.vale.ini".to_string()));
let text = "We would like to utilize this.";
let diagnostics = engine.check(text, "en-US").await?;
println!("Vale returned {} diagnostics:", diagnostics.len());
for d in &diagnostics {
println!(
" [{}-{}] {} (rule: {}, suggestions: {:?})",
d.start_byte, d.end_byte, d.message, d.rule_id, d.suggestions
);
}
assert!(
!diagnostics.is_empty(),
"Expected at least 1 diagnostic from Vale"
);
assert!(diagnostics[0].rule_id.starts_with("vale."));
Ok(())
}
}