Skip to main content

harper_core/linting/
expr_linter.rs

1use crate::expr::{Expr, ExprExt};
2use blanket::blanket;
3
4use crate::{Document, LSend, Token, TokenStringExt};
5
6use super::{Lint, Linter};
7
8pub trait DocumentIterator {
9    type Unit;
10
11    fn iter_units<'a>(document: &'a Document) -> Box<dyn Iterator<Item = &'a [Token]> + 'a>;
12}
13
14/// Process text in chunks (clauses between commas)
15pub struct Chunk;
16/// Process text in full sentences
17pub struct Sentence;
18
19impl DocumentIterator for Chunk {
20    type Unit = Chunk;
21
22    fn iter_units<'a>(document: &'a Document) -> Box<dyn Iterator<Item = &'a [Token]> + 'a> {
23        Box::new(document.iter_chunks())
24    }
25}
26
27impl DocumentIterator for Sentence {
28    type Unit = Sentence;
29
30    fn iter_units<'a>(document: &'a Document) -> Box<dyn Iterator<Item = &'a [Token]> + 'a> {
31        Box::new(document.iter_sentences())
32    }
33}
34
35/// A trait that searches for tokens that fulfil [`Expr`]s in a [`Document`].
36///
37/// Makes use of [`TokenStringExt::iter_chunks`] by default, or [`TokenStringExt::iter_sentences`] to process either
38/// a chunk (clause) or a sentence at a time.
39#[blanket(derive(Box))]
40pub trait ExprLinter: LSend {
41    type Unit: DocumentIterator;
42
43    /// A simple getter for the expression you want Harper to search for.
44    fn expr(&self) -> &dyn Expr;
45    /// If any portions of a [`Document`] match [`Self::expr`], they are passed through [`ExprLinter::match_to_lint`]
46    /// or [`ExprLinter::match_to_lint_with_context`] to be transformed into a [`Lint`] for editor consumption.
47    ///
48    /// Transform matched tokens into a [`Lint`] for editor consumption.
49    ///
50    /// This is the simple version that only sees the matched tokens. For context-aware linting,
51    /// implement `match_to_lint_with_context` instead.
52    ///
53    /// Return `None` to skip producing a lint for this match.
54    fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
55        self.match_to_lint_with_context(matched_tokens, source, None)
56    }
57
58    /// Transform matched tokens into a [`Lint`] with access to surrounding context.
59    ///
60    /// The context provides access to tokens before and after the match. When implementing
61    /// this method, you can call `self.match_to_lint()` as a fallback if the context isn't needed.
62    ///
63    /// Return `None` to skip producing a lint for this match.
64    fn match_to_lint_with_context(
65        &self,
66        matched_tokens: &[Token],
67        source: &[char],
68        _context: Option<(&[Token], &[Token])>,
69    ) -> Option<Lint> {
70        // Default implementation falls back to the simple version
71        self.match_to_lint(matched_tokens, source)
72    }
73    /// A user-facing description of what kinds of grammatical errors this rule looks for.
74    /// It is usually shown in settings menus.
75    fn description(&self) -> &str;
76}
77
78/// Helper function to find the only occurrence of a token matching a predicate
79///
80/// Returns `Some(token)` if exactly one token matches the predicate, `None` otherwise.
81/// TODO: This can be used in the [`ThenThan`] linter when #1819 is merged.
82pub fn find_the_only_token_matching<'a, F>(
83    tokens: &'a [Token],
84    source: &[char],
85    predicate: F,
86) -> Option<&'a Token>
87where
88    F: Fn(&Token, &[char]) -> bool,
89{
90    let mut matches = tokens.iter().filter(|&tok| predicate(tok, source));
91    match (matches.next(), matches.next()) {
92        (Some(tok), None) => Some(tok),
93        _ => None,
94    }
95}
96
97impl<L, U> Linter for L
98where
99    L: ExprLinter<Unit = U>,
100    U: DocumentIterator,
101{
102    fn lint(&mut self, document: &Document) -> Vec<Lint> {
103        let mut lints = Vec::new();
104        let source = document.get_source();
105
106        for unit in U::iter_units(document) {
107            lints.extend(run_on_chunk(self, unit, source));
108        }
109
110        lints
111    }
112
113    fn description(&self) -> &str {
114        self.description()
115    }
116}
117
118pub fn run_on_chunk<'a>(
119    linter: &'a impl ExprLinter,
120    unit: &'a [Token],
121    source: &'a [char],
122) -> impl Iterator<Item = Lint> + 'a {
123    linter
124        .expr()
125        .iter_matches(unit, source)
126        .filter_map(|match_span| {
127            linter.match_to_lint_with_context(
128                &unit[match_span.start..match_span.end],
129                source,
130                Some((&unit[..match_span.start], &unit[match_span.end..])),
131            )
132        })
133}
134
135/// Check for sentence continuation after a matched span.
136///
137/// Validates that the "after" context starts with whitespace followed by a word token,
138/// allowing flexible inspection of that word's properties (POS tags, etc.) via the predicate.
139/// The predicate can be used to confirm matches, suppress false positives, or apply conditional logic.
140///
141/// Returns `false` if context is `None`, missing tokens, or the structure is malformed.
142pub fn followed_by_word(
143    context: Option<(&[Token], &[Token])>,
144    predicate: impl Fn(&Token) -> bool,
145) -> bool {
146    if let Some((_, after)) = context
147        && let [ws, word, ..] = after
148        && ws.kind.is_whitespace()
149    {
150        return predicate(word);
151    }
152    false
153}
154
155pub fn followed_by_hyphen(context: Option<(&[Token], &[Token])>) -> bool {
156    context
157        .and_then(|(_, after)| after.first())
158        .is_some_and(|hy| hy.kind.is_hyphen())
159}
160
161/// Counterintuitively, a sentence includes the whitespace after
162/// the sentence-final punctuation.
163pub fn at_start_of_sentence(context: Option<(&[Token], &[Token])>) -> bool {
164    if let Some((before, _)) = context
165        && (before.is_empty() || (before.len() == 1 && before[0].kind.is_whitespace()))
166    {
167        return true;
168    }
169    false
170}
171
172pub fn preceded_by_word(
173    context: Option<(&[Token], &[Token])>,
174    predicate: impl Fn(&Token) -> bool,
175) -> bool {
176    if let Some((before, _)) = context
177        && let [.., word, ws] = before
178        && ws.kind.is_whitespace()
179    {
180        return predicate(word);
181    }
182    false
183}
184
185#[cfg(test)]
186mod tests_context {
187    use crate::expr::{Expr, FixedPhrase};
188    use crate::linting::expr_linter::{Chunk, Sentence};
189    use crate::linting::tests::assert_suggestion_result;
190    use crate::linting::{ExprLinter, Suggestion};
191    use crate::token_string_ext::TokenStringExt;
192    use crate::{Lint, Token};
193
194    pub struct TestSimpleLinter {
195        expr: Box<dyn Expr>,
196    }
197
198    impl Default for TestSimpleLinter {
199        fn default() -> Self {
200            Self {
201                expr: Box::new(FixedPhrase::from_phrase("two")),
202            }
203        }
204    }
205
206    impl ExprLinter for TestSimpleLinter {
207        type Unit = Chunk;
208
209        fn expr(&self) -> &dyn Expr {
210            &*self.expr
211        }
212
213        fn match_to_lint(&self, toks: &[Token], _src: &[char]) -> Option<Lint> {
214            Some(Lint {
215                span: toks.span()?,
216                message: "simple".to_string(),
217                suggestions: vec![Suggestion::ReplaceWith(vec!['2'])],
218                ..Default::default()
219            })
220        }
221
222        fn description(&self) -> &str {
223            "test linter"
224        }
225    }
226
227    pub struct TestContextLinter {
228        expr: Box<dyn Expr>,
229    }
230
231    impl Default for TestContextLinter {
232        fn default() -> Self {
233            Self {
234                expr: Box::new(FixedPhrase::from_phrase("two")),
235            }
236        }
237    }
238
239    impl ExprLinter for TestContextLinter {
240        type Unit = Chunk;
241
242        fn expr(&self) -> &dyn Expr {
243            &*self.expr
244        }
245
246        fn match_to_lint_with_context(
247            &self,
248            toks: &[Token],
249            src: &[char],
250            context: Option<(&[Token], &[Token])>,
251        ) -> Option<Lint> {
252            if let Some((before, after)) = context {
253                let before = before.span()?.get_content_string(src);
254                let after = after.span()?.get_content_string(src);
255
256                let (message, suggestions) = if before.eq_ignore_ascii_case("one ")
257                    && after.eq_ignore_ascii_case(" three")
258                {
259                    (
260                        "ascending".to_string(),
261                        vec![Suggestion::ReplaceWith(vec!['>'])],
262                    )
263                } else if before.eq_ignore_ascii_case("three ")
264                    && after.eq_ignore_ascii_case(" one")
265                {
266                    (
267                        "descending".to_string(),
268                        vec![Suggestion::ReplaceWith(vec!['<'])],
269                    )
270                } else {
271                    (
272                        "dunno".to_string(),
273                        vec![Suggestion::ReplaceWith(vec!['?'])],
274                    )
275                };
276
277                return Some(Lint {
278                    span: toks.span()?,
279                    message,
280                    suggestions,
281                    ..Default::default()
282                });
283            } else {
284                None
285            }
286        }
287
288        fn description(&self) -> &str {
289            "context linter"
290        }
291    }
292
293    pub struct TestSentenceLinter {
294        expr: Box<dyn Expr>,
295    }
296
297    impl Default for TestSentenceLinter {
298        fn default() -> Self {
299            Self {
300                expr: Box::new(FixedPhrase::from_phrase("two, two")),
301            }
302        }
303    }
304
305    impl ExprLinter for TestSentenceLinter {
306        type Unit = Sentence;
307
308        fn expr(&self) -> &dyn Expr {
309            self.expr.as_ref()
310        }
311
312        fn match_to_lint(&self, toks: &[Token], _src: &[char]) -> Option<Lint> {
313            Some(Lint {
314                span: toks.span()?,
315                message: "sentence".to_string(),
316                suggestions: vec![Suggestion::ReplaceWith(vec!['2', '&', '2'])],
317                ..Default::default()
318            })
319        }
320
321        fn description(&self) -> &str {
322            "sentence linter"
323        }
324    }
325
326    #[test]
327    fn simple_test_123() {
328        assert_suggestion_result("one two three", TestSimpleLinter::default(), "one 2 three");
329    }
330
331    #[test]
332    fn context_test_123() {
333        assert_suggestion_result("one two three", TestContextLinter::default(), "one > three");
334    }
335
336    #[test]
337    fn context_test_321() {
338        assert_suggestion_result("three two one", TestContextLinter::default(), "three < one");
339    }
340
341    #[test]
342    fn sentence_test_123() {
343        assert_suggestion_result(
344            "one, two, two, three",
345            TestSentenceLinter::default(),
346            "one, 2&2, three",
347        );
348    }
349}