Skip to main content

harper_core/weir/
mod.rs

1//! Weir is a programming language for finding errors in natural language.
2//! See our [main documentation](https://writewithharper.com/docs/weir) for more details.
3
4mod ast;
5mod error;
6mod optimize;
7mod parsing;
8
9use std::collections::VecDeque;
10use std::str::FromStr;
11use std::sync::Arc;
12
13pub use error::Error;
14use hashbrown::{HashMap, HashSet};
15use is_macro::Is;
16use parsing::{parse_expr_str, parse_str};
17use strum_macros::{AsRefStr, EnumString};
18
19use crate::expr::{Expr, ExprExt};
20use crate::linting::{Chunk, ExprLinter, Lint, LintKind, Linter, Sentence, Suggestion};
21use crate::parsers::Markdown;
22use crate::spell::FstDictionary;
23use crate::{Document, Lrc, Token, TokenStringExt};
24
25use self::ast::{Ast, AstVariable};
26
27pub(crate) fn weir_expr_to_expr(weir_code: &str) -> Result<Box<dyn Expr>, Error> {
28    let ast = parse_expr_str(weir_code, true)?;
29    ast.to_expr(&HashMap::new())
30}
31
32#[derive(Debug, Is, EnumString, AsRefStr)]
33enum ReplacementStrategy {
34    MatchCase,
35    Exact,
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, EnumString)]
39enum WeirScope {
40    Chunk,
41    Sentence,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub struct TestResult {
46    pub expected: String,
47    pub got: String,
48}
49
50pub struct WeirLinter {
51    expr: Lrc<Box<dyn Expr>>,
52    description: String,
53    message: String,
54    strategy: ReplacementStrategy,
55    replacements: Vec<String>,
56    lint_kind: LintKind,
57    scope: WeirScope,
58    ast: Arc<Ast>,
59}
60
61struct ChunkWeirLinter(WeirLinter);
62
63struct SentenceWeirLinter(WeirLinter);
64
65impl WeirLinter {
66    pub fn new(weir_code: &str) -> Result<WeirLinter, Error> {
67        let ast = parse_str(weir_code, true)?;
68
69        let main_expr_name = "main";
70        let description_name = "description";
71        let message_name = "message";
72        let lint_kind_name = "kind";
73        let replacement_name = "becomes";
74        let replacement_strat_name = "strategy";
75        let scope_name = "scope";
76
77        let resolved = resolve_exprs(&ast)?;
78
79        let expr = resolved
80            .get(main_expr_name)
81            .ok_or(Error::ExpectedVariableUndefined)?;
82
83        let description = ast
84            .get_variable_value(description_name)
85            .ok_or(Error::ExpectedVariableUndefined)?
86            .as_string()
87            .ok_or(Error::ExpectedDifferentVariableType)?
88            .to_owned();
89
90        let message = ast
91            .get_variable_value(message_name)
92            .ok_or(Error::ExpectedVariableUndefined)?
93            .as_string()
94            .ok_or(Error::ExpectedDifferentVariableType)?
95            .to_owned();
96
97        let replacement_val = ast
98            .get_variable_value(replacement_name)
99            .ok_or(Error::ExpectedVariableUndefined)?;
100
101        let replacements = match replacement_val {
102            AstVariable::String(s) => vec![s.to_owned()],
103            AstVariable::Array(arr) => {
104                let mut out = Vec::with_capacity(arr.len());
105                for item in arr.iter().map(|v| {
106                    v.as_string()
107                        .cloned()
108                        .ok_or(Error::ExpectedDifferentVariableType)
109                }) {
110                    let item = item?;
111                    out.push(item);
112                }
113                out
114            }
115        };
116
117        let replacement_strat_var = ast.get_variable_value(replacement_strat_name);
118        let replacement_strat = if let Some(replacement_strat) = replacement_strat_var {
119            let str = replacement_strat
120                .as_string()
121                .ok_or(Error::ExpectedDifferentVariableType)?;
122            ReplacementStrategy::from_str(str)
123                .ok()
124                .ok_or(Error::InvalidReplacementStrategy)?
125        } else {
126            ReplacementStrategy::MatchCase
127        };
128
129        let lint_kind_var = ast.get_variable_value(lint_kind_name);
130        let lint_kind = if let Some(lint_kind) = lint_kind_var {
131            let str = lint_kind
132                .as_string()
133                .ok_or(Error::ExpectedDifferentVariableType)?;
134            LintKind::from_string_key(str).ok_or(Error::InvalidLintKind)?
135        } else {
136            LintKind::Miscellaneous
137        };
138
139        let scope_var = ast.get_variable_value(scope_name);
140        let scope = if let Some(scope) = scope_var {
141            let str = scope
142                .as_string()
143                .ok_or(Error::ExpectedDifferentVariableType)?;
144            WeirScope::from_str(str).ok().ok_or(Error::InvalidScope)?
145        } else {
146            WeirScope::Chunk
147        };
148
149        let linter = WeirLinter {
150            strategy: replacement_strat,
151            ast,
152            expr: expr.clone(),
153            lint_kind,
154            scope,
155            description,
156            message,
157            replacements,
158        };
159
160        Ok(linter)
161    }
162
163    pub fn into_chunk_linter(self) -> Result<impl ExprLinter<Unit = Chunk>, Self> {
164        if self.scope == WeirScope::Chunk {
165            Ok(ChunkWeirLinter(self))
166        } else {
167            Err(self)
168        }
169    }
170
171    pub fn into_sentence_linter(self) -> Result<impl ExprLinter<Unit = Sentence>, Self> {
172        if self.scope == WeirScope::Sentence {
173            Ok(SentenceWeirLinter(self))
174        } else {
175            Err(self)
176        }
177    }
178
179    fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
180        let span = matched_tokens.span()?;
181        let orig = span.get_content(source);
182
183        let suggestions = match self.strategy {
184            ReplacementStrategy::MatchCase => self
185                .replacements
186                .iter()
187                .map(|s| Suggestion::replace_with_match_case(s.chars().collect(), orig))
188                .collect(),
189            ReplacementStrategy::Exact => self
190                .replacements
191                .iter()
192                .map(|r| Suggestion::ReplaceWith(r.chars().collect()))
193                .collect(),
194        };
195
196        Some(Lint {
197            span,
198            lint_kind: self.lint_kind,
199            suggestions,
200            message: self.message.to_owned(),
201            priority: 31,
202        })
203    }
204
205    /// Counts the total number of tests defined.
206    pub fn count_tests(&self) -> usize {
207        self.ast.iter_tests().count()
208    }
209
210    /// Runs the tests defined in the source code, returning any failing results.
211    pub fn run_tests(&mut self) -> Vec<TestResult> {
212        fn apply_nth_suggestion(text: &str, lint: &Lint, n: usize) -> Option<String> {
213            let suggestion = lint.suggestions.get(n)?;
214            let mut text_chars: Vec<char> = text.chars().collect();
215            suggestion.apply(lint.span, &mut text_chars);
216            Some(text_chars.iter().collect())
217        }
218
219        fn transform_to_expected(
220            text: &str,
221            expected: &str,
222            linter: &mut impl Linter,
223        ) -> Option<String> {
224            let mut queue: VecDeque<(String, usize)> = VecDeque::new();
225            let mut seen: HashSet<String> = HashSet::new();
226
227            queue.push_back((text.to_string(), 0));
228            seen.insert(text.to_string());
229
230            while let Some((current, depth)) = queue.pop_front() {
231                if current == expected {
232                    return Some(current);
233                }
234
235                if depth >= 100 {
236                    continue;
237                }
238
239                let doc = Document::new_from_chars(
240                    current.chars().collect::<Vec<_>>().into(),
241                    &Markdown::default(),
242                    &FstDictionary::curated(),
243                );
244                let lints = linter.lint(&doc);
245
246                if let Some(lint) = lints.first() {
247                    for i in 0..lint.suggestions.len() {
248                        if let Some(next) = apply_nth_suggestion(&current, lint, i)
249                            && seen.insert(next.clone())
250                        {
251                            queue.push_back((next, depth + 1));
252                        }
253                    }
254                }
255            }
256
257            None
258        }
259
260        fn transform_nth_str(text: &str, linter: &mut impl Linter, n: usize) -> String {
261            let mut text_chars: Vec<char> = text.chars().collect();
262            let mut iter_count = 0;
263
264            loop {
265                let test = Document::new_from_chars(
266                    text_chars.clone().into(),
267                    &Markdown::default(),
268                    &FstDictionary::curated(),
269                );
270                let lints = linter.lint(&test);
271
272                if let Some(lint) = lints.first() {
273                    if let Some(suggestion) = lint.suggestions.get(n) {
274                        suggestion.apply(lint.span, &mut text_chars);
275                    } else {
276                        break;
277                    }
278                } else {
279                    break;
280                }
281
282                iter_count += 1;
283                if iter_count == 100 {
284                    break;
285                }
286            }
287
288            text_chars.iter().collect()
289        }
290
291        fn lint_count(text: &str, linter: &mut impl Linter) -> usize {
292            let document = Document::new_from_chars(
293                text.chars().collect::<Vec<_>>().into(),
294                &Markdown::default(),
295                &FstDictionary::curated(),
296            );
297
298            linter.lint(&document).len()
299        }
300
301        let mut results = Vec::new();
302        let tests: Vec<(String, String)> = self
303            .ast
304            .iter_tests()
305            .map(|(text, expected)| (text.to_string(), expected.to_string()))
306            .collect();
307
308        for (text, expected) in tests {
309            let matched = transform_to_expected(&text, &expected, self);
310
311            match matched {
312                Some(result) => {
313                    let remaining_lints = lint_count(&result, self);
314
315                    if remaining_lints != 0 {
316                        results.push(TestResult {
317                            expected: expected.to_string(),
318                            got: result,
319                        });
320                    }
321                }
322                None => results.push(TestResult {
323                    expected: expected.to_string(),
324                    got: transform_nth_str(&text, self, 0),
325                }),
326            }
327        }
328
329        results
330    }
331}
332
333impl Linter for WeirLinter {
334    fn lint(&mut self, document: &Document) -> Vec<Lint> {
335        let source = document.get_source();
336        let mut lints = Vec::new();
337        let units: Box<dyn Iterator<Item = &[Token]> + '_> = match self.scope {
338            WeirScope::Chunk => Box::new(document.iter_chunks()),
339            WeirScope::Sentence => Box::new(document.iter_sentences()),
340        };
341
342        for unit in units {
343            lints.extend(
344                self.expr
345                    .iter_matches(unit, source)
346                    .filter_map(|match_span| {
347                        self.match_to_lint(&unit[match_span.start..match_span.end], source)
348                    }),
349            );
350        }
351
352        lints
353    }
354
355    fn description(&self) -> &str {
356        &self.description
357    }
358}
359
360impl ExprLinter for ChunkWeirLinter {
361    type Unit = Chunk;
362
363    fn expr(&self) -> &dyn Expr {
364        &self.0.expr
365    }
366
367    fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
368        self.0.match_to_lint(matched_tokens, source)
369    }
370
371    fn description(&self) -> &str {
372        &self.0.description
373    }
374}
375
376impl ExprLinter for SentenceWeirLinter {
377    type Unit = Sentence;
378
379    fn expr(&self) -> &dyn Expr {
380        &self.0.expr
381    }
382
383    fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
384        self.0.match_to_lint(matched_tokens, source)
385    }
386
387    fn description(&self) -> &str {
388        &self.0.description
389    }
390}
391
392fn resolve_exprs(ast: &Ast) -> Result<HashMap<String, Lrc<Box<dyn Expr>>>, Error> {
393    let mut resolved_exprs = HashMap::new();
394
395    for (name, val) in ast.iter_exprs() {
396        let expr = val.to_expr(&resolved_exprs)?;
397        resolved_exprs.insert(name.to_owned(), Lrc::new(expr));
398    }
399
400    Ok(resolved_exprs)
401}
402
403#[cfg(test)]
404pub mod tests {
405    use quickcheck_macros::quickcheck;
406
407    use crate::weir::Error;
408
409    use super::{TestResult, WeirLinter};
410
411    #[track_caller]
412    pub fn assert_passes_all(linter: &mut WeirLinter) {
413        assert_eq!(Vec::<TestResult>::new(), linter.run_tests());
414    }
415
416    #[test]
417    fn simple_right_click_linter() {
418        let source = r#"
419            expr main <([right, middle, left] $click), ( )>
420            let message "Hyphenate this mouse command"
421            let description "Hyphenates right-click style mouse commands."
422            let kind "Punctuation"
423            let becomes "-"
424
425            test "Right click the icon." "Right-click the icon."
426            test "Please right click on the link." "Please right-click on the link."
427            test "They right clicked the submit button." "They right-clicked the submit button."
428            test "Right clicking the item highlights it." "Right-clicking the item highlights it."
429            test "Right clicks are tracked in the log." "Right-clicks are tracked in the log."
430            test "He RIGHT CLICKED the file." "He RIGHT-CLICKED the file."
431            test "Left click the checkbox." "Left-click the checkbox."
432            test "Middle click to open in a new tab." "Middle-click to open in a new tab."
433
434            allows "This test contains the correct version of right-click and therefore shouldn't error."
435            "#;
436
437        let mut linter = WeirLinter::new(source).unwrap();
438        assert_passes_all(&mut linter);
439        assert_eq!(9, linter.count_tests());
440    }
441
442    #[test]
443    fn g_suite() {
444        let source = r#"
445            expr main [(G [Suite, Suit]), (Google Apps for Work)]
446            let message "Use the updated brand."
447            let description "`G Suite` or `Google Apps for Work` is now called `Google Workspace`"
448            let kind "Miscellaneous"
449            let becomes "Google Workspace"
450            let strategy "Exact"
451
452            test "We migrated from G Suite last year." "We migrated from Google Workspace last year."
453            test "This account is still labeled as Google Apps for Work." "This account is still labeled as Google Workspace."
454            test "The pricing page mentions G Suit for legacy plans." "The pricing page mentions Google Workspace for legacy plans."
455            test "New customers sign up for Google Workspace." "New customers sign up for Google Workspace."
456
457            allows "This test contains the correct version of Google Workspace and therefore shouldn't error."
458            "#;
459
460        let mut linter = WeirLinter::new(source).unwrap();
461
462        assert_passes_all(&mut linter);
463        assert_eq!(5, linter.count_tests());
464    }
465
466    #[test]
467    fn array_prefers_longest_match_over_first_match() {
468        for main in [
469            "[(capitalized off of), (capitalized off)]",
470            "[(capitalized off), (capitalized off of)]",
471        ] {
472            let source = format!(
473                r#"
474            expr main {main}
475            let message "Use the replacement."
476            let description "Regression test for overlapping Weir array options."
477            let kind "Miscellaneous"
478            let becomes "replacement"
479            let strategy "Exact"
480
481            test "capitalized off of" "replacement"
482            "#
483            );
484
485            let mut linter = WeirLinter::new(&source).unwrap();
486            assert_passes_all(&mut linter);
487        }
488    }
489
490    #[test]
491    fn g_suite_with_refs() {
492        let source = r#"
493            expr a (G [Suite, Suit])
494            expr b (Google Apps For Work)
495            expr incorrect [@a, @b]
496
497            expr main @incorrect
498            let message "Use the updated brand."
499            let description "`G Suite` or `Google Apps for Work` is now called `Google Workspace`"
500            let kind "Miscellaneous"
501            let becomes "Google Workspace"
502            let strategy "Exact"
503
504            test "We migrated from G Suite last year." "We migrated from Google Workspace last year."
505            test "This account is still labeled as Google Apps for Work." "This account is still labeled as Google Workspace."
506            test "The pricing page mentions G Suit for legacy plans." "The pricing page mentions Google Workspace for legacy plans."
507            test "New customers sign up for Google Workspace." "New customers sign up for Google Workspace."
508            "#;
509
510        let mut linter = WeirLinter::new(source).unwrap();
511
512        assert_passes_all(&mut linter);
513        assert_eq!(4, linter.count_tests());
514    }
515
516    #[test]
517    fn scope_defaults_to_chunk() {
518        let source = r#"
519            expr main one**two
520            let message "Use three."
521            let description "Test chunk-scoped Weir."
522            let kind "Miscellaneous"
523            let becomes "three"
524            let strategy "Exact"
525
526            allows "one, two."
527        "#;
528
529        let mut linter = WeirLinter::new(source).unwrap();
530
531        assert_passes_all(&mut linter);
532
533        let linter = WeirLinter::new(source).unwrap();
534        let linter = match linter.into_sentence_linter() {
535            Ok(_) => panic!("default-scoped Weir rule should not convert to sentence linter"),
536            Err(linter) => linter,
537        };
538        assert!(linter.into_chunk_linter().is_ok());
539    }
540
541    #[test]
542    fn sentence_scope_can_match_across_chunks() {
543        let source = r#"
544            expr main one**two
545            let message "Use three."
546            let description "Test sentence-scoped Weir."
547            let kind "Miscellaneous"
548            let becomes "three"
549            let strategy "Exact"
550            let scope "Sentence"
551
552            test "one, two." "three."
553        "#;
554
555        let mut linter = WeirLinter::new(source).unwrap();
556
557        assert_passes_all(&mut linter);
558
559        assert!(
560            WeirLinter::new(source)
561                .unwrap()
562                .into_sentence_linter()
563                .is_ok()
564        );
565    }
566
567    #[test]
568    fn invalid_scope_errors() {
569        let source = r#"
570            expr main one
571            let message ""
572            let description ""
573            let kind "Miscellaneous"
574            let becomes ""
575            let scope "Paragraph"
576        "#;
577
578        let res = WeirLinter::new(source);
579
580        assert_eq!(res.err(), Some(Error::InvalidScope));
581    }
582
583    #[test]
584    fn fails_on_unresolved_expr() {
585        let source = r#"
586            expr main @missing
587            let message ""
588            let description ""
589            let kind "Miscellaneous"
590            let becomes ""
591            let strategy "Exact"
592        "#;
593
594        let res = WeirLinter::new(source);
595
596        assert_eq!(
597            res.err().unwrap(),
598            Error::UnableToResolveExpr("missing".to_string())
599        )
600    }
601
602    #[test]
603    fn wildcard() {
604        let source = r#"
605            expr main <(NOUN * NOUN), (* NOUN), *>
606            let message ""
607            let description ""
608            let kind "Miscellaneous"
609            let becomes ""
610            let strategy "Exact"
611
612            test "I like trees and plants of all kinds" "I like trees  plants of all kinds"
613            test "homework tempts teachers" "homework  teachers"
614            "#;
615
616        let mut linter = WeirLinter::new(source).unwrap();
617
618        assert_passes_all(&mut linter);
619        assert_eq!(2, linter.count_tests());
620    }
621
622    #[test]
623    fn dashes() {
624        let source = r#"
625            expr main --
626            let message ""
627            let description ""
628            let kind "Miscellaneous"
629            let becomes "-"
630            let strategy "Exact"
631
632            test "This--and--that" "This-and-that"
633
634            allows "this-and-that"
635            "#;
636
637        let mut linter = WeirLinter::new(source).unwrap();
638
639        assert_passes_all(&mut linter);
640        assert_eq!(2, linter.count_tests());
641    }
642
643    #[test]
644    fn fails_on_ignore_test() {
645        let source = r#"
646            expr main test
647            let message ""
648            let description ""
649            let kind "Miscellaneous"
650            let becomes "-"
651            let strategy "Exact"
652
653            allows "test"
654            "#;
655
656        let mut linter = WeirLinter::new(source).unwrap();
657
658        assert_eq!(linter.run_tests().len(), 1)
659    }
660
661    #[test]
662    fn errors_properly_with_missing_expr() {
663        let source = "expr main";
664        let res = WeirLinter::new(source);
665        assert_eq!(res.err(), Some(Error::ExpectedVariableUndefined))
666    }
667
668    #[test]
669    fn becomes_array_with_many_alternatives() {
670        let source = r#"
671 expr main (the fact)
672 let message "Consider alternative phrasing"
673 let description "Test that all 'becomes' alternatives can be reached"
674 let kind "Miscellaneous"
675 let becomes ["the allegation", "the idea", "the claim", "the story", "the rumor"]
676 let strategy "Exact"
677
678 test "There is truth to the fact that people like images." "There is truth to the allegation that people like images."
679 test "There is truth to the fact that people like images." "There is truth to the idea that people like images."
680 test "There is truth to the fact that people like images." "There is truth to the claim that people like images."
681 test "There is truth to the fact that people like images." "There is truth to the story that people like images."
682 test "There is truth to the fact that people like images." "There is truth to the rumor that people like images."
683
684 allows "There is truth to the story that people like images."
685 "#;
686
687        let mut linter = WeirLinter::new(source).unwrap();
688        assert_passes_all(&mut linter);
689        assert_eq!(6, linter.count_tests());
690    }
691
692    #[quickcheck]
693    fn does_not_panic(s: String) {
694        let _ = WeirLinter::new(s.as_str());
695    }
696}