Skip to main content

owl_spell/
engine.rs

1use regex::Regex;
2use serde::{Deserialize, Serialize};
3use std::collections::HashSet;
4use std::env;
5use std::fs;
6use std::io::Write;
7use std::path::{Path, PathBuf};
8use std::vec::Vec;
9use thiserror::Error;
10
11// TODO: enhance this error class for better messages
12
13#[derive(Error, Debug)]
14pub enum OwlError {
15    #[error("File error: {0}")]
16    FileError(#[from] std::io::Error),
17    #[error("File error: {0} reading '{1}'")]
18    FilenameError(std::io::Error, PathBuf),
19    #[error("TOML error: {0}")]
20    TomlSerError(#[from] toml::ser::Error),
21    #[error("TOML error: {0}")]
22    TomlDeError(#[from] toml::de::Error),
23    #[error("Failed to load dictionary: {0}")]
24    InvalidDict(String),
25    #[error("Missing .owl.toml, run with --init")]
26    MissingConfig(),
27}
28
29#[derive(PartialEq)]
30pub enum Status {
31    Ignored,
32    GlobalIgnored,
33    Misspelled,
34}
35
36impl Status {
37    pub fn as_str(&self) -> &'static str {
38        match self {
39            Status::Ignored => "ignored",
40            Status::GlobalIgnored => "global_ignored",
41            Status::Misspelled => "misspelled",
42        }
43    }
44}
45
46/// Result of a spell check
47pub struct WordBad {
48    pub word: String,
49    pub file: String,
50    #[allow(dead_code)] // for now
51    pub line_num: usize,
52}
53
54// keep URLs intact & don't spell check them
55const WORD_PATTERN: &str = r"<[^>]+>|https?://[^\s]+|\b[a-zA-Z]+(?:[''][a-zA-Z]+)*\b";
56
57#[derive(Deserialize, Debug, Serialize, Default)]
58pub struct Config {
59    #[serde(default = "default_lang")]
60    pub lang: String,
61    #[serde(default = "default_fts")]
62    pub extensions: Vec<String>,
63    #[serde(default)]
64    pub ignore: Vec<String>,
65}
66
67fn default_lang() -> String {
68    "en".to_string()
69}
70
71fn default_fts() -> Vec<String> {
72    vec!["txt".to_string(), "md".to_string(), "qmd".to_string()]
73}
74
75fn skip_spellcheck(s: &str) -> bool {
76    (s.starts_with("<") && s.ends_with(">"))
77        || s.starts_with("http://")
78        || s.starts_with("https://")
79}
80
81pub fn find_file_upward_stop_at_git(filename: &str, dir: &Path) -> Option<PathBuf> {
82    let file_path = dir.join(filename);
83    let git_path = dir.join(".git");
84
85    if file_path.exists() && file_path.is_file() {
86        return Some(file_path);
87    }
88    if git_path.exists() && git_path.is_dir() {
89        return Some(file_path);
90    }
91
92    match dir.parent() {
93        Some(parent) => find_file_upward_stop_at_git(filename, parent),
94        _ => None,
95    }
96}
97
98/// Get config file to use
99///
100/// First, starting at current directory and traversing up until found,
101///  a la .gitignore, etc.
102///
103/// If either is found, will assume/create a file in the initial directory
104///  passed.
105pub fn get_config_file(path: &Path) -> PathBuf {
106    let filename = ".owl.toml";
107    match find_file_upward_stop_at_git(filename, path) {
108        Some(found) => found,
109        _ => {
110            if path.is_file() {
111                path.parent()
112                    .unwrap_or(Path::new("."))
113                    .join(filename)
114                    .clone()
115            } else {
116                path.join(filename).clone()
117            }
118        }
119    }
120}
121
122pub fn get_home_dir() -> Result<PathBuf, OwlError> {
123    let owl_dir = env::home_dir()
124        .unwrap_or(PathBuf::from("."))
125        .join(".config/owl");
126    let dict_dir = owl_dir.join("dicts");
127    // create ~/.config/owl/dicts/ and all parent dirs
128    fs::create_dir_all(&dict_dir)?;
129
130    Ok(owl_dir)
131}
132
133pub fn save_config(config: &Config, path: &Path) -> Result<(), OwlError> {
134    let toml_string = toml::to_string(&config)?;
135    fs::write(&path, toml_string)?;
136    Ok(())
137}
138
139/// Load words from a file, returning a vector of strings (one word per line)
140pub fn load_words(filename: &Path) -> HashSet<String> {
141    let content = fs::read_to_string(filename).unwrap_or_default();
142    content
143        .lines()
144        .map(|line| line.trim().to_string())
145        .filter(|line| !line.is_empty())
146        .collect()
147}
148
149#[derive(Debug)]
150pub struct Engine {
151    dict: spellbook::Dictionary,     // star of the show
152    pub word_regex: Regex,           // for splitting words
153    pub config_file: PathBuf,        // .owl.toml
154    pub global_ignore_file: PathBuf, // ~/.config/owl/owlignore
155    pub config: Config,
156    pub local_ignored_words: HashSet<String>,
157    pub global_ignored_words: HashSet<String>,
158}
159
160impl Engine {
161    /// Instantiate owl engine for checking words with ignores.
162    #[must_use]
163    pub fn new(path: &Path) -> Result<Self, OwlError> {
164        let home_dir = get_home_dir()?;
165
166        let global_ignore_file = home_dir.join("spellignore").clone();
167        let global_ignored_words = load_words(&global_ignore_file);
168        let config_file = get_config_file(path);
169        let maybe_config_content = fs::read_to_string(&config_file);
170        let config_content = maybe_config_content.map_err(|_| OwlError::MissingConfig())?;
171        let config: Config = toml::from_str(&config_content)?;
172
173        let local_ignored_words: HashSet<_> = config.ignore.iter().cloned().collect();
174
175        // load dictionaries
176        let dict_dir = home_dir.join("dicts");
177        let aff_path = dict_dir.join(format!("{}.aff", &config.lang));
178        let aff = std::fs::read_to_string(&aff_path);
179        let dic_path = dict_dir.join(format!("{}.dic", &config.lang));
180        let dic = std::fs::read_to_string(&dic_path);
181        let dict = match (aff, dic) {
182            (Err(err), _) => return Err(OwlError::FilenameError(err, aff_path)),
183            (_, Err(err)) => return Err(OwlError::FilenameError(err, dic_path)),
184            (Ok(aff_txt), Ok(dic_txt)) => spellbook::Dictionary::new(&aff_txt, &dic_txt)
185                .map_err(|e| OwlError::InvalidDict(format!("{:?}", e)))?,
186        };
187
188        Ok(Self {
189            word_regex: Regex::new(WORD_PATTERN).expect("Invalid hard-coded regex!"),
190            dict,
191            config,
192            global_ignore_file,
193            global_ignored_words,
194            config_file,
195            local_ignored_words,
196        })
197    }
198
199    /// Run spellcheck on file, returning `WordBad` per match.
200    ///
201    /// Return values are not deduped yet.
202    #[must_use]
203    pub fn get_misspelled(&self, path: &str) -> Result<Vec<WordBad>, OwlError> {
204        let content = fs::read_to_string(path)?;
205        let lines: Vec<_> = content
206            .lines()
207            .map(std::string::ToString::to_string)
208            .collect();
209        Ok(self.get_misspelled_from_lines(&lines, path))
210    }
211
212    pub fn get_misspelled_from_lines(&self, lines: &[String], filename: &str) -> Vec<WordBad> {
213        let mut bad = vec![];
214        for (line_num, line) in lines.iter().enumerate() {
215            for word in self.word_regex.find_iter(line) {
216                if !skip_spellcheck(word.as_str()) && !self.dict.check(word.as_str()) {
217                    bad.push(WordBad {
218                        word: word.as_str().to_string(),
219                        line_num,
220                        file: filename.to_string(),
221                    });
222                }
223            }
224        }
225        bad
226    }
227
228    pub fn add_global_ignore(&mut self, word: &str) -> Result<(), OwlError> {
229        let mut file = fs::OpenOptions::new()
230            .create(true)
231            .append(true)
232            .open(&self.global_ignore_file)?;
233        self.global_ignored_words.insert(word.to_string());
234        writeln!(file, "{}", word.trim())?;
235        Ok(())
236    }
237
238    pub fn delete_global_ignore(&mut self, word: &str) -> Result<(), OwlError> {
239        self.global_ignored_words.remove(word);
240        let mut words: Vec<String> = self.global_ignored_words.iter().cloned().collect();
241        words.sort();
242        let content = words.join("\n");
243        fs::write(&self.global_ignore_file, content + "\n")?;
244        Ok(())
245    }
246
247    pub fn add_local_ignore(&mut self, word: &str) -> Result<(), OwlError> {
248        self.local_ignored_words.insert(word.to_string());
249        self.config.ignore.push(word.to_string());
250        self.config.ignore.sort();
251        save_config(&self.config, &self.config_file)?;
252        Ok(())
253    }
254
255    pub fn delete_local_ignore(&mut self, word: &str) -> Result<(), OwlError> {
256        // filter from list and
257        self.local_ignored_words.remove(word);
258        self.config.ignore.retain(|w| w != word);
259        save_config(&self.config, &self.config_file)?;
260        Ok(())
261    }
262
263    /// Check if a word is ignored.
264    ///
265    #[must_use]
266    pub fn get_ignore_status(&self, word: &str) -> Status {
267        if self.local_ignored_words.contains(word) {
268            Status::Ignored
269        } else if self.global_ignored_words.contains(word) {
270            Status::GlobalIgnored
271        } else {
272            Status::Misspelled
273        }
274    }
275
276    /// Make suggestions using underlying library.
277    ///
278    /// TODO: incorporate ignores?
279    pub fn suggest(&self, word: &str, suggestions: &mut Vec<String>) {
280        self.dict.suggest(word, suggestions);
281    }
282
283    #[must_use]
284    pub fn check(&self, word: &str) -> bool {
285        self.dict.check(word)
286    }
287}