Skip to main content

hyprcorrect_core/
definitions.rs

1//! Word definitions for the review popup's suggestion dropdown.
2//!
3//! Two sources, selected by [`crate::DefinitionSource`]:
4//!
5//! - **Local** (default): a bundled WordNet 3.1 gloss set
6//!   (`assets/definitions-en.tsv.gz` — the sense-1 gloss of each
7//!   single-word lemma), lazily gunzipped into an in-memory map on first
8//!   lookup. Fully offline; ~83k words.
9//! - **Online**: `api.dictionaryapi.dev` (no API key). The request blocks
10//!   on DNS/TLS/HTTP, so [`define_online`] is meant to run on a worker
11//!   thread — never the egui loop — and sends the looked-up word to a
12//!   third party.
13//!
14//! Words with no entry (proper nouns, most function words, misspellings)
15//! return `None`; the UI shows that gracefully.
16
17use std::collections::HashMap;
18use std::io::Read;
19use std::sync::OnceLock;
20use std::time::Duration;
21
22use crate::DefinitionSource;
23
24/// Bundled, gzipped `word\tdefinition\n` set derived from WordNet 3.1.
25/// See `assets/WORDNET-LICENSE.txt`.
26static DEFS_GZ: &[u8] = include_bytes!("../assets/definitions-en.tsv.gz");
27
28/// Lazily-decompressed `word -> definition` map (lowercased keys). Built
29/// once per process on the first local lookup.
30fn local_map() -> &'static HashMap<String, String> {
31    static MAP: OnceLock<HashMap<String, String>> = OnceLock::new();
32    MAP.get_or_init(|| {
33        let mut text = String::new();
34        if flate2::read::GzDecoder::new(DEFS_GZ)
35            .read_to_string(&mut text)
36            .is_err()
37        {
38            return HashMap::new();
39        }
40        text.lines()
41            .filter_map(|line| line.split_once('\t'))
42            .map(|(w, d)| (w.to_string(), d.to_string()))
43            .collect()
44    })
45}
46
47/// A bundled offline definition for `word`, if WordNet has one.
48/// Case-insensitive.
49pub fn define_local(word: &str) -> Option<String> {
50    let key = word.trim().to_ascii_lowercase();
51    if key.is_empty() {
52        return None;
53    }
54    local_map().get(&key).cloned()
55}
56
57/// A definition from the configured `source`, for the synchronous paths.
58/// `Online` returns `None` here because it blocks — callers fetch it via
59/// [`define_online`] on a worker thread instead.
60pub fn define(word: &str, source: DefinitionSource) -> Option<String> {
61    match source {
62        DefinitionSource::Local => define_local(word),
63        DefinitionSource::Off | DefinitionSource::Online => None,
64    }
65}
66
67/// Fetch a one-line definition from `api.dictionaryapi.dev`. Blocking, so
68/// run it on a background thread. Returns `None` on any error or an
69/// unknown word.
70pub fn define_online(word: &str) -> Option<String> {
71    let word = word.trim();
72    if word.is_empty() {
73        return None;
74    }
75    let url = format!(
76        "https://api.dictionaryapi.dev/api/v2/entries/en/{}",
77        urlencode(word)
78    );
79    let agent: ureq::Agent = ureq::AgentBuilder::new()
80        .timeout(Duration::from_secs(8))
81        .build();
82    let json: serde_json::Value = agent.get(&url).call().ok()?.into_json().ok()?;
83    // Shape: [ { "meanings": [ { "definitions": [ { "definition": "…" } ] } ] } ]
84    let def = json
85        .get(0)?
86        .get("meanings")?
87        .as_array()?
88        .iter()
89        .find_map(|m| {
90            m.get("definitions")?
91                .as_array()?
92                .iter()
93                .find_map(|d| d.get("definition").and_then(serde_json::Value::as_str))
94        })?;
95    Some(def.trim().to_string())
96}
97
98/// Percent-encode a single URL path segment (the looked-up word). Words
99/// are mostly ASCII letters; anything outside the unreserved set is
100/// encoded so the URL stays valid.
101fn urlencode(s: &str) -> String {
102    let mut out = String::with_capacity(s.len());
103    for b in s.bytes() {
104        match b {
105            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
106                out.push(b as char);
107            }
108            _ => out.push_str(&format!("%{b:02X}")),
109        }
110    }
111    out
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117
118    #[test]
119    fn local_covers_common_words_and_misses_gracefully() {
120        // Real words WordNet covers.
121        assert!(define_local("difference").is_some());
122        assert!(define_local("acquiesce").is_some());
123        // Case-insensitive.
124        assert!(define_local("Veneer").is_some());
125        // Not in WordNet → graceful None (joke word, empty).
126        assert!(define_local("recombobulate").is_none());
127        assert!(define_local("").is_none());
128        assert!(define_local("   ").is_none());
129    }
130
131    #[test]
132    fn define_routes_by_source() {
133        assert!(define("acquiesce", DefinitionSource::Local).is_some());
134        // Online is resolved off-thread, Off is silent — both None here.
135        assert!(define("acquiesce", DefinitionSource::Online).is_none());
136        assert!(define("acquiesce", DefinitionSource::Off).is_none());
137    }
138}