Skip to main content

hyprcorrect_core/
providers.rs

1//! The correction-provider interface and the bundled offline provider.
2//!
3//! [`CorrectionProvider`] is the interface; [`OfflineProvider`] is the
4//! bundled default — a Hunspell-compatible spell-checker (`spellbook`)
5//! that runs fully in-process. Network providers (an LLM backend, a
6//! LanguageTool HTTP client) land in milestones M4 and M5. See the
7//! "Correction providers" section of `DESIGN.md`.
8
9use std::ops::Range;
10
11use async_trait::async_trait;
12
13/// A spelling/typo correction backend.
14#[async_trait]
15pub trait CorrectionProvider: Send + Sync {
16    /// Check `text` and return the corrections this provider would make.
17    ///
18    /// `ctx` carries the focused-application id and the text's locale,
19    /// which contextual providers may use.
20    async fn check(&self, text: &str, ctx: &Context) -> Result<Vec<Correction>, Error>;
21}
22
23/// A single suggested fix for one span of the checked text.
24#[derive(Debug, Clone)]
25pub struct Correction {
26    /// Byte range of the flagged word within the checked text.
27    pub span: Range<usize>,
28    /// The original (flagged) text covered by `span`.
29    pub original: String,
30    /// Replacement candidates, best first.
31    pub suggestions: Vec<String>,
32}
33
34/// Context passed to a provider alongside the text to check.
35#[derive(Debug, Clone, Default)]
36pub struct Context {
37    /// The focused application's identifier, when known — the Wayland
38    /// app id or the macOS bundle id.
39    pub app_id: Option<String>,
40    /// BCP-47 locale of the text, e.g. `en-US`.
41    pub locale: Option<String>,
42}
43
44/// An error returned by a [`CorrectionProvider`].
45#[derive(Debug, thiserror::Error)]
46pub enum Error {
47    /// A provider could not be initialized — e.g. a malformed dictionary.
48    #[error("could not initialize correction provider: {0}")]
49    Init(String),
50    /// The provider could not be reached, or the request itself failed.
51    #[error("correction request failed: {0}")]
52    Request(String),
53    /// The provider's response could not be understood.
54    #[error("malformed correction response: {0}")]
55    Response(String),
56}
57
58/// The bundled offline correction provider.
59///
60/// Wraps [`spellbook`], a pure-Rust, Hunspell-compatible spell-checker,
61/// over an English dictionary. Fully local and instant — this is the
62/// provider behind `fix-word`. Contextual fixes route elsewhere.
63pub struct OfflineProvider {
64    dictionary: spellbook::Dictionary,
65}
66
67impl OfflineProvider {
68    /// Build the provider from Hunspell `.aff` and `.dic` data.
69    ///
70    /// # Errors
71    ///
72    /// Returns [`Error::Init`] if the dictionary fails to parse.
73    pub fn from_hunspell(aff: &str, dic: &str) -> Result<Self, Error> {
74        let dictionary =
75            spellbook::Dictionary::new(aff, dic).map_err(|e| Error::Init(format!("{e:?}")))?;
76        Ok(Self { dictionary })
77    }
78
79    /// Build the provider from the bundled `en_US` dictionary.
80    ///
81    /// The dictionary is vendored from wooorm/dictionaries (the `en`
82    /// dictionary, derived from SCOWL) and embedded at compile time; its
83    /// license is at `dictionaries/en_US/LICENSE`.
84    ///
85    /// # Errors
86    ///
87    /// Returns [`Error::Init`] if the bundled dictionary fails to parse,
88    /// which would indicate a packaging bug.
89    pub fn en_us() -> Result<Self, Error> {
90        Self::from_hunspell(
91            include_str!("../dictionaries/en_US/en_US.aff"),
92            include_str!("../dictionaries/en_US/en_US.dic"),
93        )
94    }
95
96    /// Spell-check `text`, returning one [`Correction`] per misspelled
97    /// word. This is the synchronous core behind the async trait method.
98    pub fn check_text(&self, text: &str) -> Vec<Correction> {
99        let mut corrections = Vec::new();
100        for (offset, word) in words(text) {
101            if self.dictionary.check(word) {
102                continue;
103            }
104            let mut suggestions = Vec::new();
105            self.dictionary.suggest(word, &mut suggestions);
106            corrections.push(Correction {
107                span: offset..offset + word.len(),
108                original: word.to_string(),
109                suggestions,
110            });
111        }
112        corrections
113    }
114}
115
116#[async_trait]
117impl CorrectionProvider for OfflineProvider {
118    async fn check(&self, text: &str, _ctx: &Context) -> Result<Vec<Correction>, Error> {
119        Ok(self.check_text(text))
120    }
121}
122
123/// Iterate the whitespace-delimited words of `text` as
124/// `(byte offset, word)` pairs.
125fn words(text: &str) -> Vec<(usize, &str)> {
126    let mut out = Vec::new();
127    let mut start: Option<usize> = None;
128    for (i, c) in text.char_indices() {
129        if c.is_whitespace() {
130            if let Some(s) = start.take() {
131                out.push((s, &text[s..i]));
132            }
133        } else if start.is_none() {
134            start = Some(i);
135        }
136    }
137    if let Some(s) = start {
138        out.push((s, &text[s..]));
139    }
140    out
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    // A tiny Hunspell dictionary: an empty `.aff` and a `.dic` of a few
148    // words (its first line is the entry count).
149    const TEST_AFF: &str = "";
150    const TEST_DIC: &str = "5\nhello\nworld\nthe\nquick\nveneer\n";
151
152    fn provider() -> OfflineProvider {
153        OfflineProvider::from_hunspell(TEST_AFF, TEST_DIC).unwrap()
154    }
155
156    #[test]
157    fn correct_words_produce_no_corrections() {
158        assert!(provider().check_text("hello world").is_empty());
159    }
160
161    #[test]
162    fn a_misspelling_is_flagged_with_suggestions() {
163        let corrections = provider().check_text("helo");
164        assert_eq!(corrections.len(), 1);
165        assert_eq!(corrections[0].original, "helo");
166        assert!(
167            corrections[0].suggestions.iter().any(|s| s == "hello"),
168            "expected 'hello' among suggestions, got {:?}",
169            corrections[0].suggestions,
170        );
171    }
172
173    #[test]
174    fn correction_span_locates_the_word() {
175        let corrections = provider().check_text("the helo");
176        assert_eq!(corrections.len(), 1);
177        // "helo" sits at bytes 4..8 of "the helo".
178        assert_eq!(corrections[0].span, 4..8);
179    }
180
181    #[test]
182    fn only_misspelled_words_are_reported() {
183        let corrections = provider().check_text("the quick fakeword");
184        assert_eq!(corrections.len(), 1);
185        assert_eq!(corrections[0].original, "fakeword");
186    }
187
188    /// The real bundled en_US dictionary, parsed once for the tests below.
189    static EN_US: std::sync::LazyLock<OfflineProvider> =
190        std::sync::LazyLock::new(|| OfflineProvider::en_us().expect("bundled en_US parses"));
191
192    #[test]
193    fn en_us_accepts_common_words() {
194        assert!(EN_US.check_text("the quick brown fox").is_empty());
195    }
196
197    #[test]
198    fn en_us_flags_a_misspelling_with_the_right_fix() {
199        let corrections = EN_US.check_text("teh");
200        assert_eq!(corrections.len(), 1);
201        assert!(
202            corrections[0].suggestions.iter().any(|s| s == "the"),
203            "expected 'the' among suggestions, got {:?}",
204            corrections[0].suggestions,
205        );
206    }
207
208    #[test]
209    fn en_us_suggests_for_the_motivating_typo() {
210        // The prototype's example — a real typo should yield suggestions.
211        let corrections = EN_US.check_text("vernuer");
212        assert_eq!(corrections.len(), 1);
213        assert!(
214            !corrections[0].suggestions.is_empty(),
215            "expected suggestions for 'vernuer'",
216        );
217    }
218}