Skip to main content

hyprcorrect_core/
languagetool.rs

1//! LanguageTool HTTP correction provider (M5).
2//!
3//! POSTs the text to a self-hosted LanguageTool server's
4//! `/v2/check` endpoint and turns the JSON `matches` array into
5//! [`crate::providers::Correction`]s.
6//!
7//! Off until the user enables it in Preferences → LanguageTool.
8//! Bring-your-own server — the project does not bundle LanguageTool
9//! itself (it's Java + dictionaries; would dwarf the crate).
10
11use std::time::Duration;
12
13use crate::LanguageToolConfig;
14use crate::providers::Correction;
15
16const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
17
18/// Errors from a LanguageTool check.
19#[derive(Debug, thiserror::Error)]
20pub enum LanguageToolError {
21    /// The user hasn't enabled LanguageTool in the config.
22    #[error("LanguageTool is disabled in the config")]
23    Disabled,
24    /// `url` field is empty.
25    #[error("LanguageTool URL is empty")]
26    NoUrl,
27    /// Network or HTTP error reaching the server.
28    #[error("LanguageTool request failed: {0}")]
29    Request(String),
30    /// Couldn't make sense of the response body.
31    #[error("LanguageTool response was unparseable: {0}")]
32    Response(String),
33}
34
35/// The LanguageTool HTTP correction provider.
36#[derive(Debug, Clone)]
37pub struct LanguageToolProvider {
38    endpoint: String,
39}
40
41impl LanguageToolProvider {
42    /// Build the provider from the user's [`LanguageToolConfig`].
43    /// Returns `Err` cleanly when the config is disabled or empty —
44    /// the daemon treats either as "fall back to spellbook".
45    ///
46    /// # Errors
47    ///
48    /// See [`LanguageToolError`].
49    pub fn from_config(lt: &LanguageToolConfig) -> Result<Self, LanguageToolError> {
50        if !lt.enabled {
51            return Err(LanguageToolError::Disabled);
52        }
53        let url = lt.url.trim().trim_end_matches('/');
54        if url.is_empty() {
55            return Err(LanguageToolError::NoUrl);
56        }
57        Ok(Self {
58            endpoint: format!("{url}/v2/check"),
59        })
60    }
61
62    /// Check `text` against the LanguageTool server. Returns one
63    /// [`Correction`] per match (deduplicated implicitly by
64    /// LanguageTool's own ranking).
65    ///
66    /// # Errors
67    ///
68    /// See [`LanguageToolError`].
69    pub fn check_text(&self, text: &str) -> Result<Vec<Correction>, LanguageToolError> {
70        if text.trim().is_empty() {
71            return Ok(Vec::new());
72        }
73        let agent = ureq::AgentBuilder::new().timeout(REQUEST_TIMEOUT).build();
74        // `level=picky` turns on LanguageTool's extra grammar/style rules
75        // (more than the default set). Real-word confusions like
76        // wear/where still need the server's optional n-gram data loaded.
77        let response = agent
78            .post(&self.endpoint)
79            .send_form(&[("text", text), ("language", "en-US"), ("level", "picky")])
80            .map_err(|e| LanguageToolError::Request(e.to_string()))?;
81        let json: serde_json::Value = response
82            .into_json()
83            .map_err(|e| LanguageToolError::Response(e.to_string()))?;
84        Ok(parse_matches(&json, text))
85    }
86}
87
88fn parse_matches(json: &serde_json::Value, text: &str) -> Vec<Correction> {
89    let Some(matches) = json["matches"].as_array() else {
90        return Vec::new();
91    };
92    let mut out = Vec::with_capacity(matches.len());
93    for m in matches {
94        let offset = match m["offset"].as_u64() {
95            Some(n) => n as usize,
96            None => continue,
97        };
98        let length = match m["length"].as_u64() {
99            Some(n) => n as usize,
100            None => continue,
101        };
102        if length == 0 {
103            continue;
104        }
105        let end = offset.saturating_add(length);
106        if end > text.len() || !text.is_char_boundary(offset) || !text.is_char_boundary(end) {
107            continue;
108        }
109        let suggestions: Vec<String> = m["replacements"]
110            .as_array()
111            .into_iter()
112            .flat_map(|a| a.iter())
113            .filter_map(|r| r["value"].as_str().map(str::to_string))
114            .collect();
115        if suggestions.is_empty() {
116            continue;
117        }
118        out.push(Correction {
119            span: offset..end,
120            original: text[offset..end].to_string(),
121            suggestions,
122        });
123    }
124    out
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130
131    const SAMPLE: &str = r#"{
132        "matches": [
133            {
134                "offset": 4,
135                "length": 4,
136                "replacements": [{"value": "hello"}, {"value": "helot"}]
137            },
138            {
139                "offset": 9,
140                "length": 5,
141                "replacements": [{"value": "world"}]
142            }
143        ]
144    }"#;
145
146    #[test]
147    fn parses_matches_into_corrections() {
148        let json: serde_json::Value = serde_json::from_str(SAMPLE).unwrap();
149        let text = "the helo wrold";
150        let cs = parse_matches(&json, text);
151        assert_eq!(cs.len(), 2);
152        assert_eq!(cs[0].span, 4..8);
153        assert_eq!(cs[0].original, "helo");
154        assert_eq!(cs[0].suggestions, vec!["hello", "helot"]);
155        assert_eq!(cs[1].span, 9..14);
156        assert_eq!(cs[1].original, "wrold");
157        assert_eq!(cs[1].suggestions, vec!["world"]);
158    }
159
160    #[test]
161    fn ignores_matches_with_no_replacements() {
162        let json: serde_json::Value =
163            serde_json::from_str(r#"{"matches":[{"offset":0,"length":3,"replacements":[]}]}"#)
164                .unwrap();
165        let cs = parse_matches(&json, "the");
166        assert!(cs.is_empty());
167    }
168
169    #[test]
170    fn ignores_matches_with_out_of_range_spans() {
171        let json: serde_json::Value = serde_json::from_str(
172            r#"{"matches":[{"offset":10,"length":5,"replacements":[{"value":"x"}]}]}"#,
173        )
174        .unwrap();
175        let cs = parse_matches(&json, "short");
176        assert!(cs.is_empty());
177    }
178
179    #[test]
180    fn disabled_config_errors_cleanly() {
181        let lt = LanguageToolConfig {
182            enabled: false,
183            url: "http://localhost:8081".into(),
184            ngram_dir: None,
185        };
186        assert!(matches!(
187            LanguageToolProvider::from_config(&lt),
188            Err(LanguageToolError::Disabled)
189        ));
190    }
191
192    #[test]
193    fn empty_url_errors_cleanly() {
194        let lt = LanguageToolConfig {
195            enabled: true,
196            url: "  ".into(),
197            ngram_dir: None,
198        };
199        assert!(matches!(
200            LanguageToolProvider::from_config(&lt),
201            Err(LanguageToolError::NoUrl)
202        ));
203    }
204}