Skip to main content

harper_core/weir/
mod.rs

1//! Weir is a programming language for finding errors in natural language.
2//! See our [main documentation](https://writewithharper.com/docs/weir) for more details.
3
4mod ast;
5mod error;
6mod optimize;
7mod parsing;
8
9use std::collections::VecDeque;
10use std::str::FromStr;
11use std::sync::Arc;
12
13pub use error::Error;
14use hashbrown::{HashMap, HashSet};
15use is_macro::Is;
16use parsing::{parse_expr_str, parse_str};
17use strum_macros::{AsRefStr, EnumString};
18
19use crate::expr::Expr;
20use crate::linting::{Chunk, ExprLinter, Lint, LintKind, Linter, Suggestion};
21use crate::parsers::Markdown;
22use crate::spell::FstDictionary;
23use crate::{Document, Lrc, Token, TokenStringExt};
24
25use self::ast::{Ast, AstVariable};
26
27pub(crate) fn weir_expr_to_expr(weir_code: &str) -> Result<Box<dyn Expr>, Error> {
28    let ast = parse_expr_str(weir_code, true)?;
29    ast.to_expr(&HashMap::new())
30}
31
32#[derive(Debug, Is, EnumString, AsRefStr)]
33enum ReplacementStrategy {
34    MatchCase,
35    Exact,
36}
37
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct TestResult {
40    pub expected: String,
41    pub got: String,
42}
43
44pub struct WeirLinter {
45    expr: Lrc<Box<dyn Expr>>,
46    description: String,
47    message: String,
48    strategy: ReplacementStrategy,
49    replacements: Vec<String>,
50    lint_kind: LintKind,
51    ast: Arc<Ast>,
52}
53
54impl WeirLinter {
55    pub fn new(weir_code: &str) -> Result<WeirLinter, Error> {
56        let ast = parse_str(weir_code, true)?;
57
58        let main_expr_name = "main";
59        let description_name = "description";
60        let message_name = "message";
61        let lint_kind_name = "kind";
62        let replacement_name = "becomes";
63        let replacement_strat_name = "strategy";
64
65        let resolved = resolve_exprs(&ast)?;
66
67        let expr = resolved
68            .get(main_expr_name)
69            .ok_or(Error::ExpectedVariableUndefined)?;
70
71        let description = ast
72            .get_variable_value(description_name)
73            .ok_or(Error::ExpectedVariableUndefined)?
74            .as_string()
75            .ok_or(Error::ExpectedDifferentVariableType)?
76            .to_owned();
77
78        let message = ast
79            .get_variable_value(message_name)
80            .ok_or(Error::ExpectedVariableUndefined)?
81            .as_string()
82            .ok_or(Error::ExpectedDifferentVariableType)?
83            .to_owned();
84
85        let replacement_val = ast
86            .get_variable_value(replacement_name)
87            .ok_or(Error::ExpectedVariableUndefined)?;
88
89        let replacements = match replacement_val {
90            AstVariable::String(s) => vec![s.to_owned()],
91            AstVariable::Array(arr) => {
92                let mut out = Vec::with_capacity(arr.len());
93                for item in arr.iter().map(|v| {
94                    v.as_string()
95                        .cloned()
96                        .ok_or(Error::ExpectedDifferentVariableType)
97                }) {
98                    let item = item?;
99                    out.push(item);
100                }
101                out
102            }
103        };
104
105        let replacement_strat_var = ast.get_variable_value(replacement_strat_name);
106        let replacement_strat = if let Some(replacement_strat) = replacement_strat_var {
107            let str = replacement_strat
108                .as_string()
109                .ok_or(Error::ExpectedDifferentVariableType)?;
110            ReplacementStrategy::from_str(str)
111                .ok()
112                .ok_or(Error::InvalidReplacementStrategy)?
113        } else {
114            ReplacementStrategy::MatchCase
115        };
116
117        let lint_kind_var = ast.get_variable_value(lint_kind_name);
118        let lint_kind = if let Some(lint_kind) = lint_kind_var {
119            let str = lint_kind
120                .as_string()
121                .ok_or(Error::ExpectedDifferentVariableType)?;
122            LintKind::from_string_key(str).ok_or(Error::InvalidLintKind)?
123        } else {
124            LintKind::Miscellaneous
125        };
126
127        let linter = WeirLinter {
128            strategy: replacement_strat,
129            ast,
130            expr: expr.clone(),
131            lint_kind,
132            description,
133            message,
134            replacements,
135        };
136
137        Ok(linter)
138    }
139
140    /// Counts the total number of tests defined.
141    pub fn count_tests(&self) -> usize {
142        self.ast.iter_tests().count()
143    }
144
145    /// Runs the tests defined in the source code, returning any failing results.
146    pub fn run_tests(&mut self) -> Vec<TestResult> {
147        fn apply_nth_suggestion(text: &str, lint: &Lint, n: usize) -> Option<String> {
148            let suggestion = lint.suggestions.get(n)?;
149            let mut text_chars: Vec<char> = text.chars().collect();
150            suggestion.apply(lint.span, &mut text_chars);
151            Some(text_chars.iter().collect())
152        }
153
154        fn transform_top3_to_expected(
155            text: &str,
156            expected: &str,
157            linter: &mut impl Linter,
158        ) -> Option<String> {
159            let mut queue: VecDeque<(String, usize)> = VecDeque::new();
160            let mut seen: HashSet<String> = HashSet::new();
161
162            queue.push_back((text.to_string(), 0));
163            seen.insert(text.to_string());
164
165            while let Some((current, depth)) = queue.pop_front() {
166                if current == expected {
167                    return Some(current);
168                }
169
170                if depth >= 100 {
171                    continue;
172                }
173
174                let doc = Document::new_from_chars(
175                    current.chars().collect::<Vec<_>>().into(),
176                    &Markdown::default(),
177                    &FstDictionary::curated(),
178                );
179                let lints = linter.lint(&doc);
180
181                if let Some(lint) = lints.first() {
182                    for i in 0..3 {
183                        if let Some(next) = apply_nth_suggestion(&current, lint, i)
184                            && seen.insert(next.clone())
185                        {
186                            queue.push_back((next, depth + 1));
187                        }
188                    }
189                }
190            }
191
192            None
193        }
194
195        fn transform_nth_str(text: &str, linter: &mut impl Linter, n: usize) -> String {
196            let mut text_chars: Vec<char> = text.chars().collect();
197            let mut iter_count = 0;
198
199            loop {
200                let test = Document::new_from_chars(
201                    text_chars.clone().into(),
202                    &Markdown::default(),
203                    &FstDictionary::curated(),
204                );
205                let lints = linter.lint(&test);
206
207                if let Some(lint) = lints.first() {
208                    if let Some(suggestion) = lint.suggestions.get(n) {
209                        suggestion.apply(lint.span, &mut text_chars);
210                    } else {
211                        break;
212                    }
213                } else {
214                    break;
215                }
216
217                iter_count += 1;
218                if iter_count == 100 {
219                    break;
220                }
221            }
222
223            text_chars.iter().collect()
224        }
225
226        fn lint_count(text: &str, linter: &mut impl Linter) -> usize {
227            let document = Document::new_from_chars(
228                text.chars().collect::<Vec<_>>().into(),
229                &Markdown::default(),
230                &FstDictionary::curated(),
231            );
232
233            linter.lint(&document).len()
234        }
235
236        let mut results = Vec::new();
237        let tests: Vec<(String, String)> = self
238            .ast
239            .iter_tests()
240            .map(|(text, expected)| (text.to_string(), expected.to_string()))
241            .collect();
242
243        for (text, expected) in tests {
244            let matched = transform_top3_to_expected(&text, &expected, self);
245
246            match matched {
247                Some(result) => {
248                    let remaining_lints = lint_count(&result, self);
249
250                    if remaining_lints != 0 {
251                        results.push(TestResult {
252                            expected: expected.to_string(),
253                            got: result,
254                        });
255                    }
256                }
257                None => results.push(TestResult {
258                    expected: expected.to_string(),
259                    got: transform_nth_str(&text, self, 0),
260                }),
261            }
262        }
263
264        results
265    }
266}
267
268impl ExprLinter for WeirLinter {
269    type Unit = Chunk;
270
271    fn expr(&self) -> &dyn Expr {
272        &self.expr
273    }
274
275    fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
276        let span = matched_tokens.span()?;
277        let orig = span.get_content(source);
278
279        let suggestions = match self.strategy {
280            ReplacementStrategy::MatchCase => self
281                .replacements
282                .iter()
283                .map(|s| Suggestion::replace_with_match_case(s.chars().collect(), orig))
284                .collect(),
285            ReplacementStrategy::Exact => self
286                .replacements
287                .iter()
288                .map(|r| Suggestion::ReplaceWith(r.chars().collect()))
289                .collect(),
290        };
291
292        Some(Lint {
293            span,
294            lint_kind: self.lint_kind,
295            suggestions,
296            message: self.message.to_owned(),
297            priority: 31,
298        })
299    }
300
301    fn description(&self) -> &str {
302        &self.description
303    }
304}
305
306fn resolve_exprs(ast: &Ast) -> Result<HashMap<String, Lrc<Box<dyn Expr>>>, Error> {
307    let mut resolved_exprs = HashMap::new();
308
309    for (name, val) in ast.iter_exprs() {
310        let expr = val.to_expr(&resolved_exprs)?;
311        resolved_exprs.insert(name.to_owned(), Lrc::new(expr));
312    }
313
314    Ok(resolved_exprs)
315}
316
317#[cfg(test)]
318pub mod tests {
319    use quickcheck_macros::quickcheck;
320
321    use crate::weir::Error;
322
323    use super::{TestResult, WeirLinter};
324
325    #[track_caller]
326    pub fn assert_passes_all(linter: &mut WeirLinter) {
327        assert_eq!(Vec::<TestResult>::new(), linter.run_tests());
328    }
329
330    #[test]
331    fn simple_right_click_linter() {
332        let source = r#"
333            expr main <([right, middle, left] $click), ( )>
334            let message "Hyphenate this mouse command"
335            let description "Hyphenates right-click style mouse commands."
336            let kind "Punctuation"
337            let becomes "-"
338
339            test "Right click the icon." "Right-click the icon."
340            test "Please right click on the link." "Please right-click on the link."
341            test "They right clicked the submit button." "They right-clicked the submit button."
342            test "Right clicking the item highlights it." "Right-clicking the item highlights it."
343            test "Right clicks are tracked in the log." "Right-clicks are tracked in the log."
344            test "He RIGHT CLICKED the file." "He RIGHT-CLICKED the file."
345            test "Left click the checkbox." "Left-click the checkbox."
346            test "Middle click to open in a new tab." "Middle-click to open in a new tab."
347
348            allows "This test contains the correct version of right-click and therefore shouldn't error."
349            "#;
350
351        let mut linter = WeirLinter::new(source).unwrap();
352        assert_passes_all(&mut linter);
353        assert_eq!(9, linter.count_tests());
354    }
355
356    #[test]
357    fn g_suite() {
358        let source = r#"
359            expr main [(G [Suite, Suit]), (Google Apps for Work)]
360            let message "Use the updated brand."
361            let description "`G Suite` or `Google Apps for Work` is now called `Google Workspace`"
362            let kind "Miscellaneous"
363            let becomes "Google Workspace"
364            let strategy "Exact"
365
366            test "We migrated from G Suite last year." "We migrated from Google Workspace last year."
367            test "This account is still labeled as Google Apps for Work." "This account is still labeled as Google Workspace."
368            test "The pricing page mentions G Suit for legacy plans." "The pricing page mentions Google Workspace for legacy plans."
369            test "New customers sign up for Google Workspace." "New customers sign up for Google Workspace."
370
371            allows "This test contains the correct version of Google Workspace and therefore shouldn't error."
372            "#;
373
374        let mut linter = WeirLinter::new(source).unwrap();
375
376        assert_passes_all(&mut linter);
377        assert_eq!(5, linter.count_tests());
378    }
379
380    #[test]
381    fn g_suite_with_refs() {
382        let source = r#"
383            expr a (G [Suite, Suit])
384            expr b (Google Apps For Work)
385            expr incorrect [@a, @b]
386
387            expr main @incorrect
388            let message "Use the updated brand."
389            let description "`G Suite` or `Google Apps for Work` is now called `Google Workspace`"
390            let kind "Miscellaneous"
391            let becomes "Google Workspace"
392            let strategy "Exact"
393
394            test "We migrated from G Suite last year." "We migrated from Google Workspace last year."
395            test "This account is still labeled as Google Apps for Work." "This account is still labeled as Google Workspace."
396            test "The pricing page mentions G Suit for legacy plans." "The pricing page mentions Google Workspace for legacy plans."
397            test "New customers sign up for Google Workspace." "New customers sign up for Google Workspace."
398            "#;
399
400        let mut linter = WeirLinter::new(source).unwrap();
401
402        assert_passes_all(&mut linter);
403        assert_eq!(4, linter.count_tests());
404    }
405
406    #[test]
407    fn fails_on_unresolved_expr() {
408        let source = r#"
409            expr main @missing
410            let message ""
411            let description ""
412            let kind "Miscellaneous"
413            let becomes ""
414            let strategy "Exact"
415        "#;
416
417        let res = WeirLinter::new(source);
418
419        assert_eq!(
420            res.err().unwrap(),
421            Error::UnableToResolveExpr("missing".to_string())
422        )
423    }
424
425    #[test]
426    fn wildcard() {
427        let source = r#"
428            expr main <(NOUN * NOUN), (* NOUN), *>
429            let message ""
430            let description ""
431            let kind "Miscellaneous"
432            let becomes ""
433            let strategy "Exact"
434
435            test "I like trees and plants of all kinds" "I like trees  plants of all kinds"
436            test "homework tempts teachers" "homework  teachers"
437            "#;
438
439        let mut linter = WeirLinter::new(source).unwrap();
440
441        assert_passes_all(&mut linter);
442        assert_eq!(2, linter.count_tests());
443    }
444
445    #[test]
446    fn dashes() {
447        let source = r#"
448            expr main --
449            let message ""
450            let description ""
451            let kind "Miscellaneous"
452            let becomes "-"
453            let strategy "Exact"
454
455            test "This--and--that" "This-and-that"
456
457            allows "this-and-that"
458            "#;
459
460        let mut linter = WeirLinter::new(source).unwrap();
461
462        assert_passes_all(&mut linter);
463        assert_eq!(2, linter.count_tests());
464    }
465
466    #[test]
467    fn fails_on_ignore_test() {
468        let source = r#"
469            expr main test
470            let message ""
471            let description ""
472            let kind "Miscellaneous"
473            let becomes "-"
474            let strategy "Exact"
475
476            allows "test"
477            "#;
478
479        let mut linter = WeirLinter::new(source).unwrap();
480
481        assert_eq!(linter.run_tests().len(), 1)
482    }
483
484    #[test]
485    fn errors_properly_with_missing_expr() {
486        let source = "expr main";
487        let res = WeirLinter::new(source);
488        assert_eq!(res.err(), Some(Error::ExpectedVariableUndefined))
489    }
490
491    #[quickcheck]
492    fn does_not_panic(s: String) {
493        let _ = WeirLinter::new(s.as_str());
494    }
495}