hyprcorrect_core/
languagetool.rs1use std::time::Duration;
12
13use crate::LanguageToolConfig;
14use crate::providers::Correction;
15
16const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
17
18#[derive(Debug, thiserror::Error)]
20pub enum LanguageToolError {
21 #[error("LanguageTool is disabled in the config")]
23 Disabled,
24 #[error("LanguageTool URL is empty")]
26 NoUrl,
27 #[error("LanguageTool request failed: {0}")]
29 Request(String),
30 #[error("LanguageTool response was unparseable: {0}")]
32 Response(String),
33}
34
35#[derive(Debug, Clone)]
37pub struct LanguageToolProvider {
38 endpoint: String,
39}
40
41impl LanguageToolProvider {
42 pub fn from_config(lt: &LanguageToolConfig) -> Result<Self, LanguageToolError> {
50 if !lt.enabled {
51 return Err(LanguageToolError::Disabled);
52 }
53 let url = lt.url.trim().trim_end_matches('/');
54 if url.is_empty() {
55 return Err(LanguageToolError::NoUrl);
56 }
57 Ok(Self {
58 endpoint: format!("{url}/v2/check"),
59 })
60 }
61
62 pub fn check_text(&self, text: &str) -> Result<Vec<Correction>, LanguageToolError> {
70 if text.trim().is_empty() {
71 return Ok(Vec::new());
72 }
73 let agent = ureq::AgentBuilder::new().timeout(REQUEST_TIMEOUT).build();
74 let response = agent
78 .post(&self.endpoint)
79 .send_form(&[("text", text), ("language", "en-US"), ("level", "picky")])
80 .map_err(|e| LanguageToolError::Request(e.to_string()))?;
81 let json: serde_json::Value = response
82 .into_json()
83 .map_err(|e| LanguageToolError::Response(e.to_string()))?;
84 Ok(parse_matches(&json, text))
85 }
86}
87
88fn parse_matches(json: &serde_json::Value, text: &str) -> Vec<Correction> {
89 let Some(matches) = json["matches"].as_array() else {
90 return Vec::new();
91 };
92 let mut out = Vec::with_capacity(matches.len());
93 for m in matches {
94 let offset = match m["offset"].as_u64() {
95 Some(n) => n as usize,
96 None => continue,
97 };
98 let length = match m["length"].as_u64() {
99 Some(n) => n as usize,
100 None => continue,
101 };
102 if length == 0 {
103 continue;
104 }
105 let end = offset.saturating_add(length);
106 if end > text.len() || !text.is_char_boundary(offset) || !text.is_char_boundary(end) {
107 continue;
108 }
109 let suggestions: Vec<String> = m["replacements"]
110 .as_array()
111 .into_iter()
112 .flat_map(|a| a.iter())
113 .filter_map(|r| r["value"].as_str().map(str::to_string))
114 .collect();
115 if suggestions.is_empty() {
116 continue;
117 }
118 out.push(Correction {
119 span: offset..end,
120 original: text[offset..end].to_string(),
121 suggestions,
122 });
123 }
124 out
125}
126
127#[cfg(test)]
128mod tests {
129 use super::*;
130
131 const SAMPLE: &str = r#"{
132 "matches": [
133 {
134 "offset": 4,
135 "length": 4,
136 "replacements": [{"value": "hello"}, {"value": "helot"}]
137 },
138 {
139 "offset": 9,
140 "length": 5,
141 "replacements": [{"value": "world"}]
142 }
143 ]
144 }"#;
145
146 #[test]
147 fn parses_matches_into_corrections() {
148 let json: serde_json::Value = serde_json::from_str(SAMPLE).unwrap();
149 let text = "the helo wrold";
150 let cs = parse_matches(&json, text);
151 assert_eq!(cs.len(), 2);
152 assert_eq!(cs[0].span, 4..8);
153 assert_eq!(cs[0].original, "helo");
154 assert_eq!(cs[0].suggestions, vec!["hello", "helot"]);
155 assert_eq!(cs[1].span, 9..14);
156 assert_eq!(cs[1].original, "wrold");
157 assert_eq!(cs[1].suggestions, vec!["world"]);
158 }
159
160 #[test]
161 fn ignores_matches_with_no_replacements() {
162 let json: serde_json::Value =
163 serde_json::from_str(r#"{"matches":[{"offset":0,"length":3,"replacements":[]}]}"#)
164 .unwrap();
165 let cs = parse_matches(&json, "the");
166 assert!(cs.is_empty());
167 }
168
169 #[test]
170 fn ignores_matches_with_out_of_range_spans() {
171 let json: serde_json::Value = serde_json::from_str(
172 r#"{"matches":[{"offset":10,"length":5,"replacements":[{"value":"x"}]}]}"#,
173 )
174 .unwrap();
175 let cs = parse_matches(&json, "short");
176 assert!(cs.is_empty());
177 }
178
179 #[test]
180 fn disabled_config_errors_cleanly() {
181 let lt = LanguageToolConfig {
182 enabled: false,
183 url: "http://localhost:8081".into(),
184 ngram_dir: None,
185 };
186 assert!(matches!(
187 LanguageToolProvider::from_config(<),
188 Err(LanguageToolError::Disabled)
189 ));
190 }
191
192 #[test]
193 fn empty_url_errors_cleanly() {
194 let lt = LanguageToolConfig {
195 enabled: true,
196 url: " ".into(),
197 ngram_dir: None,
198 };
199 assert!(matches!(
200 LanguageToolProvider::from_config(<),
201 Err(LanguageToolError::NoUrl)
202 ));
203 }
204}