Skip to main content

lang_check/engines/
vale.rs

1use crate::checker::{Diagnostic, Severity};
2use anyhow::Result;
3use serde::Deserialize;
4use std::collections::HashMap;
5use tracing::{debug, warn};
6
7use super::Engine;
8
9pub struct ValeEngine {
10    config_path: Option<String>,
11}
12
13impl ValeEngine {
14    #[must_use]
15    pub const fn new(config_path: Option<String>) -> Self {
16        Self { config_path }
17    }
18}
19
20/// A single alert from Vale's `--output=JSON` format.
21#[derive(Deserialize)]
22#[serde(rename_all = "PascalCase")]
23struct ValeAlert {
24    message: String,
25    severity: String,
26    line: u32,
27    span: (u32, u32),
28    check: String,
29    #[serde(default)]
30    action: ValeAction,
31}
32
33/// Fix action attached to a Vale alert.
34#[derive(Deserialize, Default)]
35#[serde(rename_all = "PascalCase")]
36struct ValeAction {
37    #[serde(default)]
38    name: String,
39    #[serde(default, deserialize_with = "deserialize_null_as_empty_vec")]
40    params: Vec<String>,
41}
42
43fn deserialize_null_as_empty_vec<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
44where
45    D: serde::Deserializer<'de>,
46{
47    Option::<Vec<String>>::deserialize(deserializer).map(Option::unwrap_or_default)
48}
49
50/// Map a Vale file extension hint from the language ID.
51/// The orchestrator passes a BCP-47 tag (e.g. "en-US"), but we also accept
52/// file-type IDs for direct use in tests.
53fn ext_for_language_id(language_id: &str) -> &str {
54    match language_id {
55        "html" => ".html",
56        "latex" => ".tex",
57        "typst" => ".typ",
58        "restructuredtext" => ".rst",
59        "org" => ".org",
60        // "markdown", BCP-47 tags, and anything unknown default to .md
61        _ => ".md",
62    }
63}
64
65/// Convert a 1-based line number and 1-based column span to byte offsets.
66#[allow(clippy::cast_possible_truncation)]
67fn line_span_to_byte_range(text: &str, line: u32, span: (u32, u32)) -> (u32, u32) {
68    let target_line = line.saturating_sub(1) as usize;
69    let mut byte_offset: u32 = 0;
70
71    for (i, l) in text.split('\n').enumerate() {
72        if i == target_line {
73            let col_start = span.0.saturating_sub(1) as usize;
74            let col_end = span.1 as usize; // span end is inclusive in Vale
75            let start = byte_offset + col_start.min(l.len()) as u32;
76            let end = byte_offset + col_end.min(l.len()) as u32;
77            return (start, end);
78        }
79        byte_offset += l.len() as u32 + 1;
80    }
81
82    (byte_offset, byte_offset)
83}
84
85fn map_severity(vale_severity: &str) -> i32 {
86    match vale_severity {
87        "error" => Severity::Error as i32,
88        "suggestion" => Severity::Hint as i32,
89        // "warning" and anything unknown
90        _ => Severity::Warning as i32,
91    }
92}
93
94fn suggestions_from_action(action: &ValeAction) -> Vec<String> {
95    match action.name.as_str() {
96        "replace" | "suggest" => action.params.clone(),
97        "remove" => vec![String::new()],
98        _ => Vec::new(),
99    }
100}
101
102#[async_trait::async_trait]
103impl Engine for ValeEngine {
104    fn name(&self) -> &'static str {
105        "vale"
106    }
107
108    async fn check(&mut self, text: &str, language_id: &str) -> Result<Vec<Diagnostic>> {
109        use tokio::io::AsyncWriteExt;
110        use tokio::process::Command;
111
112        let ext = ext_for_language_id(language_id);
113        let mut cmd = Command::new("vale");
114        cmd.arg("--output=JSON")
115            .arg("--no-exit")
116            .arg(format!("--ext={ext}"));
117
118        if let Some(cfg) = &self.config_path {
119            cmd.arg(format!("--config={cfg}"));
120        }
121
122        cmd.stdin(std::process::Stdio::piped())
123            .stdout(std::process::Stdio::piped())
124            .stderr(std::process::Stdio::piped());
125
126        let output = match cmd.spawn() {
127            Ok(mut child) => {
128                if let Some(mut stdin) = child.stdin.take() {
129                    let _ = stdin.write_all(text.as_bytes()).await;
130                    let _ = stdin.shutdown().await;
131                }
132                child.wait_with_output().await?
133            }
134            Err(e) => {
135                warn!("Failed to spawn vale: {e}");
136                return Ok(vec![]);
137            }
138        };
139
140        // Vale exit code 2 = runtime error; 0 or 1 = normal
141        if output.status.code() == Some(2) {
142            let stderr = String::from_utf8_lossy(&output.stderr);
143            warn!(stderr = stderr.trim(), "Vale runtime error");
144            return Ok(vec![]);
145        }
146
147        let stdout = String::from_utf8_lossy(&output.stdout);
148        if stdout.trim().is_empty() {
149            return Ok(vec![]);
150        }
151
152        let vale_output: HashMap<String, Vec<ValeAlert>> = match serde_json::from_str(&stdout) {
153            Ok(o) => o,
154            Err(e) => {
155                warn!("Failed to parse Vale JSON output: {e}");
156                debug!(stdout = %stdout, "Raw Vale output");
157                return Ok(vec![]);
158            }
159        };
160
161        let mut diagnostics = Vec::new();
162        for alerts in vale_output.into_values() {
163            for alert in alerts {
164                let (start_byte, end_byte) = line_span_to_byte_range(text, alert.line, alert.span);
165
166                diagnostics.push(Diagnostic {
167                    start_byte,
168                    end_byte,
169                    message: alert.message,
170                    suggestions: suggestions_from_action(&alert.action),
171                    rule_id: format!("vale.{}", alert.check),
172                    severity: map_severity(&alert.severity),
173                    unified_id: String::new(),
174                    confidence: 0.75,
175                });
176            }
177        }
178
179        Ok(diagnostics)
180    }
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186
187    #[test]
188    fn line_span_to_byte_range_first_line() {
189        let text = "Hello world";
190        // Line 1, columns 7-11 (1-based) → "world"
191        let (start, end) = line_span_to_byte_range(text, 1, (7, 11));
192        assert_eq!(&text[start as usize..end as usize], "world");
193    }
194
195    #[test]
196    fn line_span_to_byte_range_second_line() {
197        let text = "First line\nSecond line here";
198        // Line 2, columns 8-11 (1-based) → "line"
199        let (start, end) = line_span_to_byte_range(text, 2, (8, 11));
200        assert_eq!(&text[start as usize..end as usize], "line");
201    }
202
203    #[test]
204    fn line_span_to_byte_range_clamped() {
205        let text = "short";
206        // Span extends beyond line length — should clamp
207        let (start, end) = line_span_to_byte_range(text, 1, (1, 100));
208        assert_eq!(start, 0);
209        assert_eq!(end, 5);
210    }
211
212    #[test]
213    fn map_severity_values() {
214        assert_eq!(map_severity("error"), Severity::Error as i32);
215        assert_eq!(map_severity("warning"), Severity::Warning as i32);
216        assert_eq!(map_severity("suggestion"), Severity::Hint as i32);
217        assert_eq!(map_severity("unknown"), Severity::Warning as i32);
218    }
219
220    #[test]
221    fn suggestions_from_replace_action() {
222        let action = ValeAction {
223            name: "replace".to_string(),
224            params: vec!["use".to_string(), "utilize".to_string()],
225        };
226        assert_eq!(suggestions_from_action(&action), vec!["use", "utilize"]);
227    }
228
229    #[test]
230    fn suggestions_from_remove_action() {
231        let action = ValeAction {
232            name: "remove".to_string(),
233            params: vec![],
234        };
235        assert_eq!(suggestions_from_action(&action), vec![""]);
236    }
237
238    #[test]
239    fn suggestions_from_empty_action() {
240        let action = ValeAction::default();
241        assert!(suggestions_from_action(&action).is_empty());
242    }
243
244    #[test]
245    fn ext_for_known_languages() {
246        assert_eq!(ext_for_language_id("markdown"), ".md");
247        assert_eq!(ext_for_language_id("html"), ".html");
248        assert_eq!(ext_for_language_id("latex"), ".tex");
249        assert_eq!(ext_for_language_id("restructuredtext"), ".rst");
250        assert_eq!(ext_for_language_id("org"), ".org");
251    }
252
253    #[test]
254    fn vale_alert_deserializes() {
255        let json = r#"{
256            "Action": {"Name": "replace", "Params": ["use"]},
257            "Span": [13, 20],
258            "Check": "Microsoft.Wordiness",
259            "Description": "",
260            "Link": "https://example.com",
261            "Message": "Consider using 'use' instead of 'utilize'.",
262            "Severity": "warning",
263            "Match": "utilize",
264            "Line": 5
265        }"#;
266        let alert: ValeAlert = serde_json::from_str(json).unwrap();
267        assert_eq!(alert.check, "Microsoft.Wordiness");
268        assert_eq!(alert.severity, "warning");
269        assert_eq!(alert.line, 5);
270        assert_eq!(alert.span, (13, 20));
271        assert_eq!(alert.action.name, "replace");
272        assert_eq!(alert.action.params, vec!["use"]);
273    }
274
275    #[test]
276    fn vale_full_json_output_deserializes() {
277        let json = r#"{
278            "stdin.md": [
279                {
280                    "Action": {"Name": "replace", "Params": ["use"]},
281                    "Span": [13, 20],
282                    "Check": "Microsoft.Wordiness",
283                    "Description": "",
284                    "Link": "",
285                    "Message": "Consider using 'use'.",
286                    "Severity": "warning",
287                    "Match": "utilize",
288                    "Line": 1
289                }
290            ]
291        }"#;
292        let output: HashMap<String, Vec<ValeAlert>> = serde_json::from_str(json).unwrap();
293        assert_eq!(output.len(), 1);
294        let alerts = &output["stdin.md"];
295        assert_eq!(alerts.len(), 1);
296        assert_eq!(alerts[0].check, "Microsoft.Wordiness");
297    }
298
299    #[test]
300    fn vale_alert_null_params_deserializes() {
301        // Vale sends `"Params": null` when no action params exist
302        let json = r#"{
303            "Action": {"Name": "", "Params": null},
304            "Span": [1, 2],
305            "Check": "Google.We",
306            "Message": "Avoid first-person plural.",
307            "Severity": "warning",
308            "Match": "We",
309            "Line": 1
310        }"#;
311        let alert: ValeAlert = serde_json::from_str(json).unwrap();
312        assert!(alert.action.params.is_empty());
313        assert!(alert.action.name.is_empty());
314    }
315
316    #[tokio::test]
317    async fn vale_engine_missing_binary() -> Result<()> {
318        let mut engine = ValeEngine::new(None);
319        // If vale is not on PATH, should return empty (not error)
320        let result = engine.check("test text", "en-US").await;
321        assert!(result.is_ok());
322        Ok(())
323    }
324
325    /// Live integration test — requires `vale` on PATH with Google style.
326    /// Run with: `cargo test vale_engine_live -- --ignored --nocapture`
327    #[tokio::test]
328    #[ignore]
329    async fn vale_engine_live() -> Result<()> {
330        let mut engine = ValeEngine::new(Some("/tmp/vale-test/.vale.ini".to_string()));
331        let text = "We would like to utilize this.";
332        let diagnostics = engine.check(text, "en-US").await?;
333
334        println!("Vale returned {} diagnostics:", diagnostics.len());
335        for d in &diagnostics {
336            println!(
337                "  [{}-{}] {} (rule: {}, suggestions: {:?})",
338                d.start_byte, d.end_byte, d.message, d.rule_id, d.suggestions
339            );
340        }
341
342        assert!(
343            !diagnostics.is_empty(),
344            "Expected at least 1 diagnostic from Vale"
345        );
346        // Verify rule_id is namespaced with "vale."
347        assert!(diagnostics[0].rule_id.starts_with("vale."));
348        Ok(())
349    }
350}