use std::time::Duration;
use crate::LanguageToolConfig;
use crate::providers::Correction;
const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
#[derive(Debug, thiserror::Error)]
pub enum LanguageToolError {
#[error("LanguageTool is disabled in the config")]
Disabled,
#[error("LanguageTool URL is empty")]
NoUrl,
#[error("LanguageTool request failed: {0}")]
Request(String),
#[error("LanguageTool response was unparseable: {0}")]
Response(String),
}
#[derive(Debug, Clone)]
pub struct LanguageToolProvider {
endpoint: String,
}
impl LanguageToolProvider {
pub fn from_config(lt: &LanguageToolConfig) -> Result<Self, LanguageToolError> {
if !lt.enabled {
return Err(LanguageToolError::Disabled);
}
let url = lt.url.trim().trim_end_matches('/');
if url.is_empty() {
return Err(LanguageToolError::NoUrl);
}
Ok(Self {
endpoint: format!("{url}/v2/check"),
})
}
pub fn check_text(&self, text: &str) -> Result<Vec<Correction>, LanguageToolError> {
if text.trim().is_empty() {
return Ok(Vec::new());
}
let agent = ureq::AgentBuilder::new().timeout(REQUEST_TIMEOUT).build();
let response = agent
.post(&self.endpoint)
.send_form(&[("text", text), ("language", "en-US"), ("level", "picky")])
.map_err(|e| LanguageToolError::Request(e.to_string()))?;
let json: serde_json::Value = response
.into_json()
.map_err(|e| LanguageToolError::Response(e.to_string()))?;
Ok(parse_matches(&json, text))
}
}
fn parse_matches(json: &serde_json::Value, text: &str) -> Vec<Correction> {
let Some(matches) = json["matches"].as_array() else {
return Vec::new();
};
let mut out = Vec::with_capacity(matches.len());
for m in matches {
let offset = match m["offset"].as_u64() {
Some(n) => n as usize,
None => continue,
};
let length = match m["length"].as_u64() {
Some(n) => n as usize,
None => continue,
};
if length == 0 {
continue;
}
let end = offset.saturating_add(length);
if end > text.len() || !text.is_char_boundary(offset) || !text.is_char_boundary(end) {
continue;
}
let suggestions: Vec<String> = m["replacements"]
.as_array()
.into_iter()
.flat_map(|a| a.iter())
.filter_map(|r| r["value"].as_str().map(str::to_string))
.collect();
if suggestions.is_empty() {
continue;
}
out.push(Correction {
span: offset..end,
original: text[offset..end].to_string(),
suggestions,
});
}
out
}
#[cfg(test)]
mod tests {
use super::*;
const SAMPLE: &str = r#"{
"matches": [
{
"offset": 4,
"length": 4,
"replacements": [{"value": "hello"}, {"value": "helot"}]
},
{
"offset": 9,
"length": 5,
"replacements": [{"value": "world"}]
}
]
}"#;
#[test]
fn parses_matches_into_corrections() {
let json: serde_json::Value = serde_json::from_str(SAMPLE).unwrap();
let text = "the helo wrold";
let cs = parse_matches(&json, text);
assert_eq!(cs.len(), 2);
assert_eq!(cs[0].span, 4..8);
assert_eq!(cs[0].original, "helo");
assert_eq!(cs[0].suggestions, vec!["hello", "helot"]);
assert_eq!(cs[1].span, 9..14);
assert_eq!(cs[1].original, "wrold");
assert_eq!(cs[1].suggestions, vec!["world"]);
}
#[test]
fn ignores_matches_with_no_replacements() {
let json: serde_json::Value =
serde_json::from_str(r#"{"matches":[{"offset":0,"length":3,"replacements":[]}]}"#)
.unwrap();
let cs = parse_matches(&json, "the");
assert!(cs.is_empty());
}
#[test]
fn ignores_matches_with_out_of_range_spans() {
let json: serde_json::Value = serde_json::from_str(
r#"{"matches":[{"offset":10,"length":5,"replacements":[{"value":"x"}]}]}"#,
)
.unwrap();
let cs = parse_matches(&json, "short");
assert!(cs.is_empty());
}
#[test]
fn disabled_config_errors_cleanly() {
let lt = LanguageToolConfig {
enabled: false,
url: "http://localhost:8081".into(),
ngram_dir: None,
};
assert!(matches!(
LanguageToolProvider::from_config(<),
Err(LanguageToolError::Disabled)
));
}
#[test]
fn empty_url_errors_cleanly() {
let lt = LanguageToolConfig {
enabled: true,
url: " ".into(),
ngram_dir: None,
};
assert!(matches!(
LanguageToolProvider::from_config(<),
Err(LanguageToolError::NoUrl)
));
}
}