use codebook::Codebook;
use codebook_config::{CodebookConfig, CodebookConfigFile};
use globset::Glob;
use ignore::WalkBuilder;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use string_offsets::{AllConfig, StringOffsets};
macro_rules! err {
($($arg:tt)*) => {
eprintln!("error: {}", format_args!($($arg)*))
};
}
pub enum LintResult {
Clean,
Errors,
Failure,
}
fn relative_to_root(root_canonical: Option<&Path>, path: &Path) -> String {
root_canonical
.and_then(|root| {
let canon = path.canonicalize().ok()?;
canon
.strip_prefix(root)
.ok()
.map(|rel| rel.to_string_lossy().into_owned())
})
.unwrap_or_else(|| path.to_string_lossy().into_owned())
}
pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> LintResult {
let config = match CodebookConfigFile::load(Some(root)) {
Ok(c) => Arc::new(c),
Err(e) => {
err!("failed to load config: {e}");
return LintResult::Failure;
}
};
print_config_source(&config);
eprintln!();
let codebook = match Codebook::new(config.clone()) {
Ok(c) => c,
Err(e) => {
err!("failed to initialize: {e}");
return LintResult::Failure;
}
};
let root_canonical = root.canonicalize().ok();
let (resolved, mut had_failure) = resolve_paths(files, root);
let mut seen_words: HashSet<String> = HashSet::new();
let mut total_errors = 0usize;
let mut files_with_errors = 0usize;
for path in &resolved {
let relative = relative_to_root(root_canonical.as_deref(), path);
if config.should_ignore_path(Path::new(&relative)) {
continue;
}
if !config.should_include_path(Path::new(&relative)) {
continue;
}
let (errors, file_failure) =
check_file(path, &relative, &codebook, &mut seen_words, unique, suggest);
had_failure |= file_failure;
if errors > 0 {
total_errors += errors;
files_with_errors += 1;
}
}
let unique_label = if unique { "unique " } else { "" };
eprintln!(
"Found {total_errors} {unique_label}spelling error(s) in {files_with_errors} file(s)."
);
if had_failure {
LintResult::Failure
} else if total_errors > 0 {
LintResult::Errors
} else {
LintResult::Clean
}
}
fn check_file(
path: &Path,
relative: &str,
codebook: &Codebook,
seen_words: &mut HashSet<String>,
unique: bool,
suggest: bool,
) -> (usize, bool) {
let text = match std::fs::read_to_string(path) {
Ok(t) => t,
Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
return (0, false);
}
Err(e) => {
err!("{}: {e}", path.display());
return (0, true);
}
};
let display = relative.strip_prefix("./").unwrap_or(relative);
let offsets = StringOffsets::<AllConfig>::new(&text);
let mut locations = codebook.spell_check(&text, None, Some(relative));
for wl in &mut locations {
wl.locations.sort_by_key(|r| r.start_byte);
}
locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0));
let mut hits: Vec<(String, &str, Option<Vec<String>>)> = Vec::new();
for wl in &locations {
if unique && !seen_words.insert(wl.word.to_lowercase()) {
continue;
}
let mut suggestions = if suggest {
codebook.get_suggestions(wl.word.as_str())
} else {
None
};
let ranges = if unique {
&wl.locations[..1]
} else {
&wl.locations[..]
};
for (i, range) in ranges.iter().enumerate() {
let pos = offsets.utf8_to_char_pos(range.start_byte.min(text.len()));
let sugg = if i + 1 < ranges.len() {
suggestions.clone()
} else {
suggestions.take()
};
hits.push((
format!("{}:{}", pos.line + 1, pos.col + 1),
wl.word.as_str(),
sugg,
));
}
}
if hits.is_empty() {
return (0, false);
}
let pad_len = hits.iter().map(|(lc, _, _)| lc.len()).max().unwrap_or(0);
println!("{display}");
for (linecol, word, suggestions) in &hits {
let pad = " ".repeat(pad_len - linecol.len());
if let Some(s) = suggestions {
println!(" {display}:{linecol}{pad} {word} -> {}", s.join(", "));
} else {
println!(" {display}:{linecol}{pad} {word}");
}
}
println!();
(hits.len(), false)
}
fn print_config_source(config: &CodebookConfigFile) {
let cwd = std::env::current_dir().unwrap_or_default();
let (label, path) = match (
config.project_config_path().filter(|p| p.is_file()),
config.global_config_path().filter(|p| p.is_file()),
) {
(Some(p), _) => ("using config", p),
(None, Some(g)) => ("using global config", g),
(None, None) => {
eprintln!("No config found, using default config");
return;
}
};
let display = path
.strip_prefix(&cwd)
.unwrap_or(&path)
.display()
.to_string();
eprintln!("{label} {display}");
}
fn resolve_paths(patterns: &[String], root: &Path) -> (Vec<PathBuf>, bool) {
let mut paths = Vec::new();
let mut had_failure = false;
for pattern in patterns {
let p = root.join(pattern);
if p.is_dir() {
had_failure |= collect_walk(&mut WalkBuilder::new(&p), &mut paths);
} else if p.is_file() {
paths.push(p);
} else {
let pattern_str = p.to_string_lossy();
let matcher = match Glob::new(&pattern_str).map(|g| g.compile_matcher()) {
Ok(m) => m,
Err(e) => {
err!("invalid pattern '{pattern_str}': {e}");
had_failure = true;
continue;
}
};
let before = paths.len();
let mut walker = WalkBuilder::new(root);
for entry in walker.follow_links(false).build() {
match entry {
Ok(e) if e.file_type().is_some_and(|ft| ft.is_file()) => {
if matcher.is_match(e.path()) {
paths.push(e.into_path());
}
}
Ok(_) => {}
Err(e) => {
err!("walk error: {e}");
had_failure = true;
}
}
}
if paths.len() == before {
err!("no match for '{pattern_str}'");
had_failure = true;
}
}
}
paths.sort();
paths.dedup();
(paths, had_failure)
}
fn collect_walk(walker: &mut WalkBuilder, out: &mut Vec<PathBuf>) -> bool {
let mut had_failure = false;
for entry in walker.follow_links(false).build() {
match entry {
Ok(e) if e.file_type().is_some_and(|ft| ft.is_file()) => out.push(e.into_path()),
Ok(_) => {}
Err(e) => {
err!("walk error: {e}");
had_failure = true;
}
}
}
had_failure
}
#[cfg(test)]
mod tests {
use super::*;
use codebook::Codebook;
use codebook_config::CodebookConfigMemory;
use std::collections::HashSet;
use std::fs;
use std::sync::Arc;
use tempfile::tempdir;
#[test]
fn test_path_and_dir_resolution() {
let dir = tempdir().unwrap();
let sub = dir.path().join("sub");
fs::create_dir_all(&sub).unwrap();
let f1 = dir.path().join("a.rs");
let f2 = sub.join("b.txt");
fs::write(&f1, "").unwrap();
fs::write(&f2, "").unwrap();
let root_canon = dir.path().canonicalize().unwrap();
assert_eq!(relative_to_root(Some(&root_canon), &f1), "a.rs");
let pattern = format!("{}/**/*.*", dir.path().display());
let (paths, err) = resolve_paths(&[pattern], dir.path());
assert!(!err);
assert_eq!(paths.len(), 2);
let path_strs: HashSet<_> = paths.iter().map(|p| p.to_string_lossy()).collect();
assert!(path_strs.iter().any(|s| s.ends_with("a.rs")));
assert!(path_strs.iter().any(|s| s.ends_with("b.txt")));
let (_, err_missing) = resolve_paths(&["nonexistent.rs".into()], dir.path());
assert!(err_missing);
}
#[test]
fn test_check_file_logic() {
let dir = tempdir().unwrap();
let f = dir.path().join("test.txt");
fs::write(&f, "actualbad\n🦀 actualbad").unwrap();
let cb = Codebook::new(Arc::new(CodebookConfigMemory::default())).unwrap();
let mut seen = HashSet::new();
let (count, err) = check_file(&f, "test.txt", &cb, &mut seen, false, false);
assert_eq!(count, 2);
assert!(!err);
let mut seen_unique = HashSet::new();
let (c1, _) = check_file(&f, "f1.txt", &cb, &mut seen_unique, true, false);
let (c2, _) = check_file(&f, "f2.txt", &cb, &mut seen_unique, true, false);
assert_eq!(c1, 1, "Should flag word once");
assert_eq!(c2, 0, "Should skip already-seen word in second file");
let (_, err_io) = check_file(
&dir.path().join("missing"),
"!",
&cb,
&mut seen,
false,
false,
);
assert!(err_io);
}
#[test]
fn test_unicode_line_col() {
let cases = [
("actualbad", 0, 1, 1), ("ok\nactualbad", 3, 2, 1), ("résumé actualbad", 9, 1, 8), ("🦀 actualbad", 5, 1, 3), ];
for (text, offset, line, col) in cases {
let table = StringOffsets::<AllConfig>::new(text);
let pos = table.utf8_to_char_pos(offset);
assert_eq!(
(pos.line + 1, pos.col + 1),
(line, col),
"Failed on: {}",
text
);
}
}
}