use std::ops::Range;
use async_trait::async_trait;
#[async_trait]
pub trait CorrectionProvider: Send + Sync {
async fn check(&self, text: &str, ctx: &Context) -> Result<Vec<Correction>, Error>;
}
#[derive(Debug, Clone)]
pub struct Correction {
pub span: Range<usize>,
pub original: String,
pub suggestions: Vec<String>,
}
#[derive(Debug, Clone, Default)]
pub struct Context {
pub app_id: Option<String>,
pub locale: Option<String>,
}
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("could not initialize correction provider: {0}")]
Init(String),
#[error("correction request failed: {0}")]
Request(String),
#[error("malformed correction response: {0}")]
Response(String),
}
pub struct OfflineProvider {
dictionary: spellbook::Dictionary,
}
impl OfflineProvider {
pub fn from_hunspell(aff: &str, dic: &str) -> Result<Self, Error> {
let dictionary =
spellbook::Dictionary::new(aff, dic).map_err(|e| Error::Init(format!("{e:?}")))?;
Ok(Self { dictionary })
}
pub fn en_us() -> Result<Self, Error> {
Self::from_hunspell(
include_str!("../dictionaries/en_US/en_US.aff"),
include_str!("../dictionaries/en_US/en_US.dic"),
)
}
pub fn check_text(&self, text: &str) -> Vec<Correction> {
let mut corrections = Vec::new();
for (offset, word) in words(text) {
if self.dictionary.check(word) {
continue;
}
let mut suggestions = Vec::new();
self.dictionary.suggest(word, &mut suggestions);
corrections.push(Correction {
span: offset..offset + word.len(),
original: word.to_string(),
suggestions,
});
}
corrections
}
}
#[async_trait]
impl CorrectionProvider for OfflineProvider {
async fn check(&self, text: &str, _ctx: &Context) -> Result<Vec<Correction>, Error> {
Ok(self.check_text(text))
}
}
fn words(text: &str) -> Vec<(usize, &str)> {
let mut out = Vec::new();
let mut start: Option<usize> = None;
for (i, c) in text.char_indices() {
if c.is_whitespace() {
if let Some(s) = start.take() {
out.push((s, &text[s..i]));
}
} else if start.is_none() {
start = Some(i);
}
}
if let Some(s) = start {
out.push((s, &text[s..]));
}
out
}
#[cfg(test)]
mod tests {
use super::*;
const TEST_AFF: &str = "";
const TEST_DIC: &str = "5\nhello\nworld\nthe\nquick\nveneer\n";
fn provider() -> OfflineProvider {
OfflineProvider::from_hunspell(TEST_AFF, TEST_DIC).unwrap()
}
#[test]
fn correct_words_produce_no_corrections() {
assert!(provider().check_text("hello world").is_empty());
}
#[test]
fn a_misspelling_is_flagged_with_suggestions() {
let corrections = provider().check_text("helo");
assert_eq!(corrections.len(), 1);
assert_eq!(corrections[0].original, "helo");
assert!(
corrections[0].suggestions.iter().any(|s| s == "hello"),
"expected 'hello' among suggestions, got {:?}",
corrections[0].suggestions,
);
}
#[test]
fn correction_span_locates_the_word() {
let corrections = provider().check_text("the helo");
assert_eq!(corrections.len(), 1);
assert_eq!(corrections[0].span, 4..8);
}
#[test]
fn only_misspelled_words_are_reported() {
let corrections = provider().check_text("the quick fakeword");
assert_eq!(corrections.len(), 1);
assert_eq!(corrections[0].original, "fakeword");
}
static EN_US: std::sync::LazyLock<OfflineProvider> =
std::sync::LazyLock::new(|| OfflineProvider::en_us().expect("bundled en_US parses"));
#[test]
fn en_us_accepts_common_words() {
assert!(EN_US.check_text("the quick brown fox").is_empty());
}
#[test]
fn en_us_flags_a_misspelling_with_the_right_fix() {
let corrections = EN_US.check_text("teh");
assert_eq!(corrections.len(), 1);
assert!(
corrections[0].suggestions.iter().any(|s| s == "the"),
"expected 'the' among suggestions, got {:?}",
corrections[0].suggestions,
);
}
#[test]
fn en_us_suggests_for_the_motivating_typo() {
let corrections = EN_US.check_text("vernuer");
assert_eq!(corrections.len(), 1);
assert!(
!corrections[0].suggestions.is_empty(),
"expected suggestions for 'vernuer'",
);
}
}