use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use serde::Deserialize;
use tokio::sync::mpsc;
const MAX_SUGGESTIONS: usize = 4;
const MAX_PER_DICT: usize = 6;
const MIN_WORD_LEN: usize = 2;
const URL_PREFIXES: &[&str] = &[
"http:", "https:", "ftp:", "ftps:", "ssh:", "irc:", "ircs:", "git:", "svn:", "file:", "telnet:",
];
struct LangDict {
#[expect(dead_code, reason = "stored for diagnostics and /spellcheck status")]
lang: String,
dict: Arc<spellbook::Dictionary>,
}
const COMPUTING_DICT_STEM: &str = "computing";
pub struct SpellChecker {
dicts: Vec<LangDict>,
computing_dict: Option<Arc<spellbook::Dictionary>>,
}
impl SpellChecker {
pub fn load(languages: &[String], dict_dir: &Path, computing: bool) -> Self {
let mut dicts = Vec::new();
for lang in languages {
match load_dictionary(lang, dict_dir) {
Ok(dict) => {
tracing::info!(lang = %lang, "spellcheck dictionary loaded");
dicts.push(LangDict {
lang: lang.clone(),
dict: Arc::new(dict),
});
}
Err(e) => {
tracing::warn!(lang = %lang, error = %e, "failed to load spellcheck dictionary");
}
}
}
let computing_dict = if computing {
load_dictionary(COMPUTING_DICT_STEM, dict_dir).map_or_else(
|_| {
tracing::info!(
"computing dictionary not found — run /spellcheck get computing"
);
None
},
|dict| {
tracing::info!("computing/IT dictionary loaded");
Some(Arc::new(dict))
},
)
} else {
None
};
Self {
dicts,
computing_dict,
}
}
pub fn check(&self, word: &str, nicks: &HashSet<String>) -> bool {
if (self.dicts.is_empty() && self.computing_dict.is_none()) || word.len() < MIN_WORD_LEN {
return true;
}
if is_url(word) {
return true;
}
if is_number_like(word) {
return true;
}
if word.contains('_') {
return true;
}
let word_lower = word.to_lowercase();
if nicks.iter().any(|n| n.to_lowercase() == word_lower) {
return true;
}
if let Some(ref cd) = self.computing_dict
&& cd.check(word)
{
return true;
}
self.dicts.iter().any(|ld| ld.dict.check(word))
}
pub fn suggest(&self, word: &str) -> Vec<String> {
let mut all: Vec<String> = Vec::new();
let mut seen = HashSet::new();
for ld in &self.dicts {
let mut dict_suggestions = Vec::new();
ld.dict.suggest(word, &mut dict_suggestions);
for s in dict_suggestions.into_iter().take(MAX_PER_DICT) {
let lower = s.to_lowercase();
if seen.contains(&lower) {
continue;
}
seen.insert(lower);
all.push(s);
if all.len() >= MAX_SUGGESTIONS {
return all;
}
}
}
all
}
pub const fn is_active(&self) -> bool {
!self.dicts.is_empty() || self.computing_dict.is_some()
}
pub const fn dict_count(&self) -> usize {
self.dicts.len()
}
pub const fn has_computing(&self) -> bool {
self.computing_dict.is_some()
}
pub fn resolve_dict_dir(configured: &str) -> PathBuf {
if configured.is_empty() {
crate::constants::dicts_dir()
} else {
PathBuf::from(configured)
}
}
}
#[derive(Debug, Clone, Deserialize)]
pub struct DictManifest {
#[expect(dead_code, reason = "reserved for future manifest format changes")]
pub version: u32,
pub dictionaries: HashMap<String, DictInfo>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct DictInfo {
pub name: String,
}
#[derive(Debug)]
pub struct DictListEntry {
pub code: String,
pub name: String,
pub installed: bool,
}
#[derive(Debug)]
pub enum DictEvent {
ListResult {
entries: Vec<DictListEntry>,
},
Downloaded { lang: String },
Error { message: String },
}
pub fn spawn_fetch_manifest(
client: reqwest::Client,
dict_dir: PathBuf,
tx: mpsc::Sender<DictEvent>,
) {
tokio::spawn(async move {
let event = match fetch_manifest(&client).await {
Ok(manifest) => {
let mut entries: Vec<DictListEntry> = manifest
.dictionaries
.into_iter()
.map(|(code, info)| {
let installed = dict_dir.join(format!("{code}.dic")).exists();
DictListEntry {
code,
name: info.name,
installed,
}
})
.collect();
entries.sort_by(|a, b| a.code.cmp(&b.code));
DictEvent::ListResult { entries }
}
Err(e) => DictEvent::Error {
message: format!("Failed to fetch dictionary list: {e}"),
},
};
let _ = tx.send(event).await;
});
}
pub fn spawn_download_dict(
lang: String,
client: reqwest::Client,
dict_dir: PathBuf,
tx: mpsc::Sender<DictEvent>,
) {
tokio::spawn(async move {
let base = crate::constants::DICTS_REPO_URL;
let event = match download_dict_files(&client, base, &lang, &dict_dir).await {
Ok(()) => DictEvent::Downloaded { lang },
Err(e) => DictEvent::Error {
message: format!("Failed to download {lang}: {e}"),
},
};
let _ = tx.send(event).await;
});
}
async fn fetch_manifest(client: &reqwest::Client) -> color_eyre::eyre::Result<DictManifest> {
let url = crate::constants::DICTS_MANIFEST_URL;
let resp = client.get(url).send().await?.error_for_status()?;
let manifest: DictManifest = resp.json().await?;
Ok(manifest)
}
async fn download_dict_files(
client: &reqwest::Client,
base_url: &str,
lang: &str,
dict_dir: &Path,
) -> color_eyre::eyre::Result<()> {
for ext in &["aff", "dic"] {
let url = format!("{base_url}/{lang}.{ext}");
let resp = client.get(&url).send().await?.error_for_status()?;
let bytes = resp.bytes().await?;
let path = dict_dir.join(format!("{lang}.{ext}"));
tokio::fs::write(&path, &bytes).await?;
tracing::info!(lang = %lang, ext = %ext, bytes = bytes.len(), "dictionary file saved");
}
Ok(())
}
pub fn strip_word_punctuation(word: &str) -> (&str, usize, usize) {
let bytes = word.as_bytes();
let len = word.len();
let start = word
.char_indices()
.find(|(_, c)| c.is_alphanumeric())
.map_or(len, |(i, _)| i);
if start >= len {
return ("", 0, 0);
}
let end = word
.char_indices()
.rev()
.find(|(_, c)| c.is_alphanumeric())
.map_or(start, |(i, c)| i + c.len_utf8());
let _ = bytes; (&word[start..end], start, end)
}
fn is_url(word: &str) -> bool {
let lower = word.to_lowercase();
URL_PREFIXES.iter().any(|prefix| lower.starts_with(prefix))
}
fn is_number_like(word: &str) -> bool {
!word.is_empty()
&& word
.chars()
.all(|c| c.is_ascii_digit() || c.is_ascii_punctuation())
}
fn load_dictionary(lang: &str, dir: &Path) -> color_eyre::eyre::Result<spellbook::Dictionary> {
let aff_path = dir.join(format!("{lang}.aff"));
let dic_path = dir.join(format!("{lang}.dic"));
let aff_content = std::fs::read_to_string(&aff_path)
.map_err(|e| color_eyre::eyre::eyre!("{}: {e}", aff_path.display()))?;
let dic_content = std::fs::read_to_string(&dic_path)
.map_err(|e| color_eyre::eyre::eyre!("{}: {e}", dic_path.display()))?;
let dict = spellbook::Dictionary::new(&aff_content, &dic_content)
.map_err(|e| color_eyre::eyre::eyre!("parse error for {lang}: {e}"))?;
Ok(dict)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_checker_accepts_everything() {
let checker = SpellChecker { dicts: vec![], computing_dict: None };
assert!(checker.check("anything", &HashSet::new()));
assert!(checker.check("xyzzy", &HashSet::new()));
}
#[test]
fn short_words_always_accepted() {
let checker = SpellChecker { dicts: vec![], computing_dict: None };
assert!(checker.check("a", &HashSet::new()));
assert!(checker.check("", &HashSet::new()));
}
#[test]
fn words_with_digits_skipped() {
let checker = SpellChecker { dicts: vec![], computing_dict: None };
assert!(checker.check("123", &HashSet::new()));
assert!(checker.check("10:30", &HashSet::new()));
}
#[test]
fn words_with_underscore_skipped() {
let checker = SpellChecker { dicts: vec![], computing_dict: None };
assert!(checker.check("foo_bar", &HashSet::new()));
}
#[test]
fn urls_skipped() {
let checker = SpellChecker { dicts: vec![], computing_dict: None };
assert!(checker.check("https://example.com", &HashSet::new()));
assert!(checker.check("irc://server", &HashSet::new()));
}
#[test]
fn nicks_skipped() {
let checker = SpellChecker { dicts: vec![], computing_dict: None };
let nicks: HashSet<String> = ["kofany", "ferris"].iter().map(ToString::to_string).collect();
assert!(checker.check("kofany", &nicks));
assert!(checker.check("Kofany", &nicks)); }
#[test]
fn number_like_detection() {
assert!(is_number_like("123"));
assert!(is_number_like("10:30"));
assert!(is_number_like("$5.99"));
assert!(!is_number_like("hello"));
assert!(!is_number_like("test123")); assert!(!is_number_like(""));
}
#[test]
fn strip_punctuation_trailing() {
let (word, start, end) = strip_word_punctuation("hello!");
assert_eq!(word, "hello");
assert_eq!(start, 0);
assert_eq!(end, 5);
}
#[test]
fn strip_punctuation_question() {
let (word, _, _) = strip_word_punctuation("do?");
assert_eq!(word, "do");
}
#[test]
fn strip_punctuation_quotes() {
let (word, start, end) = strip_word_punctuation("'test'");
assert_eq!(word, "test");
assert_eq!(start, 1);
assert_eq!(end, 5);
}
#[test]
fn strip_punctuation_apostrophe_inside() {
let (word, _, _) = strip_word_punctuation("don't");
assert_eq!(word, "don't");
}
#[test]
fn strip_punctuation_hyphen_inside() {
let (word, start, end) = strip_word_punctuation("--well-known--");
assert_eq!(word, "well-known");
assert_eq!(start, 2);
assert_eq!(end, 12);
}
#[test]
fn strip_punctuation_empty() {
let (word, _, _) = strip_word_punctuation("...");
assert_eq!(word, "");
}
#[test]
fn strip_punctuation_clean_word() {
let (word, start, end) = strip_word_punctuation("hello");
assert_eq!(word, "hello");
assert_eq!(start, 0);
assert_eq!(end, 5);
}
#[test]
fn is_active_empty() {
let checker = SpellChecker { dicts: vec![], computing_dict: None };
assert!(!checker.is_active());
}
#[test]
fn resolve_dict_dir_default() {
let path = SpellChecker::resolve_dict_dir("");
assert!(path.ends_with("dicts"));
}
#[test]
fn resolve_dict_dir_custom() {
let path = SpellChecker::resolve_dict_dir("/custom/path");
assert_eq!(path, PathBuf::from("/custom/path"));
}
#[test]
fn load_nonexistent_directory() {
let checker = SpellChecker::load(
&["nonexistent_XX".to_string()],
Path::new("/tmp/repartee_test_no_dicts"),
false,
);
assert!(!checker.is_active());
assert_eq!(checker.dict_count(), 0);
}
#[test]
fn suggest_empty_checker() {
let checker = SpellChecker { dicts: vec![], computing_dict: None };
let suggestions = checker.suggest("hello");
assert!(suggestions.is_empty());
}
#[test]
fn url_detection() {
assert!(is_url("https://example.com"));
assert!(is_url("HTTP://FOO.BAR"));
assert!(is_url("ftp://files"));
assert!(!is_url("hello"));
assert!(!is_url("httpwhat"));
}
#[test]
fn computing_dict_check() {
let aff = "SET UTF-8\n";
let dic = "2\nKubernetes\nIRCnet\n";
let dict = spellbook::Dictionary::new(aff, dic).unwrap();
let checker = SpellChecker {
dicts: vec![],
computing_dict: Some(Arc::new(dict)),
};
assert!(checker.check("Kubernetes", &HashSet::new()));
assert!(checker.check("IRCnet", &HashSet::new()));
assert!(!checker.check("xyzzyplugh", &HashSet::new()));
}
#[test]
fn has_computing_false_when_none() {
let checker = SpellChecker { dicts: vec![], computing_dict: None };
assert!(!checker.has_computing());
}
#[test]
fn has_computing_true_when_loaded() {
let aff = "SET UTF-8\n";
let dic = "1\ntest\n";
let dict = spellbook::Dictionary::new(aff, dic).unwrap();
let checker = SpellChecker {
dicts: vec![],
computing_dict: Some(Arc::new(dict)),
};
assert!(checker.has_computing());
}
#[test]
fn computing_dict_is_active() {
let aff = "SET UTF-8\n";
let dic = "1\ntokio\n";
let dict = spellbook::Dictionary::new(aff, dic).unwrap();
let checker = SpellChecker {
dicts: vec![],
computing_dict: Some(Arc::new(dict)),
};
assert!(checker.is_active());
}
#[test]
fn load_real_computing_dict() {
let dict_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("dicts");
if !dict_dir.join("computing.dic").exists() {
eprintln!("skipping: computing.dic not found (run scripts/build-computing-dict.sh)");
return;
}
let checker = SpellChecker::load(&[], &dict_dir, true);
assert!(checker.has_computing(), "computing dict should be loaded");
assert!(checker.is_active());
let empty = HashSet::new();
assert!(checker.check("IRCnet", &empty));
assert!(checker.check("netsplit", &empty));
assert!(checker.check("WeeChat", &empty));
assert!(checker.check("Kubernetes", &empty));
assert!(checker.check("PRIVMSG", &empty));
assert!(checker.check("chanserv", &empty));
}
#[test]
fn manifest_deserialize() {
let json = r#"{
"version": 1,
"dictionaries": {
"en_US": { "name": "English (US)" },
"pl_PL": { "name": "Polish" }
}
}"#;
let manifest: DictManifest = serde_json::from_str(json).unwrap();
assert_eq!(manifest.dictionaries.len(), 2);
assert_eq!(manifest.dictionaries["en_US"].name, "English (US)");
assert_eq!(manifest.dictionaries["pl_PL"].name, "Polish");
}
#[test]
fn manifest_empty_dictionaries() {
let json = r#"{ "version": 1, "dictionaries": {} }"#;
let manifest: DictManifest = serde_json::from_str(json).unwrap();
assert!(manifest.dictionaries.is_empty());
}
}