use crate::{
Lint, Token, TokenKind, TokenStringExt,
expr::{All, Expr, OwnedExprExt, SequenceExpr},
linting::{ExprLinter, LintKind, Suggestion, expr_linter::Chunk},
spell::Dictionary,
};
pub struct DisjointPrefixes<D> {
expr: All,
dict: D,
}
const OUT_EXCEPTIONS: &[&str] = &["boxes", "facing", "live", "numbers", "playing"];
const OVER_EXCEPTIONS: &[&str] = &["all", "joy", "long", "night", "reading", "steps", "time"];
const UNDER_EXCEPTIONS: &[&str] = &["development", "mine"];
const UP_EXCEPTIONS: &[&str] = &["loading", "right", "state", "time", "trend"];
impl<D> DisjointPrefixes<D>
where
D: Dictionary,
{
pub fn new(dict: D) -> Self {
Self {
expr: SequenceExpr::word_set(&[
"anti", "auto", "bi", "counter", "de", "dis", "extra", "fore", "hyper", "il", "im",
"inter", "ir", "macro", "mal", "micro", "mid", "mini", "mis", "mono", "multi",
"non", "omni", "post", "pre", "pro", "re", "semi", "sub", "super", "trans", "tri",
"ultra", "un", "uni",
"co",
"out", "over", "under",
"up",
])
.t_ws_h()
.then_kind_either(TokenKind::is_verb, TokenKind::is_noun)
.then_optional_hyphen()
.and_not(SequenceExpr::any_of(vec![
Box::new(SequenceExpr::anything().t_any().t_any().then_hyphen()),
Box::new(SequenceExpr::aco("co").t_any().t_set(&["op", "ops"])),
Box::new(SequenceExpr::aco("out").t_ws().t_set(OUT_EXCEPTIONS)),
Box::new(SequenceExpr::aco("over").t_ws().t_set(OVER_EXCEPTIONS)),
Box::new(SequenceExpr::aco("under").t_ws().t_set(UNDER_EXCEPTIONS)),
Box::new(SequenceExpr::aco("up").t_ws().t_set(UP_EXCEPTIONS)),
])),
dict,
}
}
}
impl<D> ExprLinter for DisjointPrefixes<D>
where
D: Dictionary,
{
type Unit = Chunk;
fn expr(&self) -> &dyn Expr {
&self.expr
}
fn match_to_lint_with_context(
&self,
toks: &[Token],
src: &[char],
ctx: Option<(&[Token], &[Token])>,
) -> Option<Lint> {
let toks_span = toks.span()?;
let (pre, _) = ctx?;
if pre.last().is_some_and(|p| p.kind.is_hyphen()) {
return None;
}
let original = format!(
"{}{}{}",
toks[0].get_str(src),
if toks[1].kind.is_hyphen() { '-' } else { ' ' },
toks[2].get_str(src)
);
if self.dict.contains_word_str(&original) {
return None;
}
let mut hyphenated = None;
if !toks[1].kind.is_hyphen() {
hyphenated = Some(format!("{}-{}", toks[0].get_str(src), toks[2].get_str(src)));
}
let joined = Some(format!("{}{}", toks[0].get_str(src), toks[2].get_str(src)));
let joined_valid = joined
.as_ref()
.is_some_and(|j| self.dict.contains_word_str(j));
let hyphenated_valid = hyphenated
.as_ref()
.is_some_and(|h| self.dict.contains_word_str(h));
if !joined_valid && !hyphenated_valid {
return None;
}
let suggestions = [(&hyphenated, hyphenated_valid), (&joined, joined_valid)]
.into_iter()
.filter_map(|(word, is_valid)| word.as_ref().filter(|_| is_valid))
.collect::<Vec<_>>();
let suggestions = suggestions
.iter()
.map(|s| {
Suggestion::replace_with_match_case(s.chars().collect(), toks_span.get_content(src))
})
.collect();
Some(Lint {
span: toks_span,
lint_kind: LintKind::Spelling,
suggestions,
message: "This looks like a prefix that can be joined with the rest of the word."
.to_string(),
..Default::default()
})
}
fn description(&self) -> &str {
"Looks for words with their prefixes written with a space or hyphen between instead of joined."
}
}
#[cfg(test)]
mod tests {
use super::DisjointPrefixes;
use crate::{
linting::tests::{assert_no_lints, assert_suggestion_result},
spell::FstDictionary,
};
#[test]
fn fix_hyphenated_to_joined() {
assert_suggestion_result(
"Download pre-built binaries or build from source.",
DisjointPrefixes::new(FstDictionary::curated()),
"Download prebuilt binaries or build from source.",
);
}
#[test]
fn fix_open_to_joined() {
assert_suggestion_result(
"Advanced Nginx configuration available for super users",
DisjointPrefixes::new(FstDictionary::curated()),
"Advanced Nginx configuration available for superusers",
);
}
#[test]
fn dont_join_open_co_op() {
assert_no_lints(
"They are cheaper at the co op.",
DisjointPrefixes::new(FstDictionary::curated()),
);
}
#[test]
fn dont_join_hyphenated_co_op() {
assert_no_lints(
"Almost everything is cheaper at the co-op.",
DisjointPrefixes::new(FstDictionary::curated()),
);
}
#[test]
fn fix_open_to_hyphenated() {
assert_suggestion_result(
"My hobby is de extinction of the dinosaurs.",
DisjointPrefixes::new(FstDictionary::curated()),
"My hobby is de-extinction of the dinosaurs.",
);
}
}