1mod ast;
5mod error;
6mod optimize;
7mod parsing;
8
9use std::collections::VecDeque;
10use std::str::FromStr;
11use std::sync::Arc;
12
13pub use error::Error;
14use hashbrown::{HashMap, HashSet};
15use is_macro::Is;
16use parsing::{parse_expr_str, parse_str};
17use strum_macros::{AsRefStr, EnumString};
18
19use crate::expr::{Expr, ExprExt};
20use crate::linting::{Chunk, ExprLinter, Lint, LintKind, Linter, Sentence, Suggestion};
21use crate::parsers::Markdown;
22use crate::spell::FstDictionary;
23use crate::{Document, Lrc, Token, TokenStringExt};
24
25use self::ast::{Ast, AstVariable};
26
27pub(crate) fn weir_expr_to_expr(weir_code: &str) -> Result<Box<dyn Expr>, Error> {
28 let ast = parse_expr_str(weir_code, true)?;
29 ast.to_expr(&HashMap::new())
30}
31
32#[derive(Debug, Is, EnumString, AsRefStr)]
33enum ReplacementStrategy {
34 MatchCase,
35 Exact,
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, EnumString)]
39enum WeirScope {
40 Chunk,
41 Sentence,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub struct TestResult {
46 pub expected: String,
47 pub got: String,
48}
49
50pub struct WeirLinter {
51 expr: Lrc<Box<dyn Expr>>,
52 description: String,
53 message: String,
54 strategy: ReplacementStrategy,
55 replacements: Vec<String>,
56 lint_kind: LintKind,
57 scope: WeirScope,
58 ast: Arc<Ast>,
59}
60
61struct ChunkWeirLinter(WeirLinter);
62
63struct SentenceWeirLinter(WeirLinter);
64
65impl WeirLinter {
66 pub fn new(weir_code: &str) -> Result<WeirLinter, Error> {
67 let ast = parse_str(weir_code, true)?;
68
69 let main_expr_name = "main";
70 let description_name = "description";
71 let message_name = "message";
72 let lint_kind_name = "kind";
73 let replacement_name = "becomes";
74 let replacement_strat_name = "strategy";
75 let scope_name = "scope";
76
77 let resolved = resolve_exprs(&ast)?;
78
79 let expr = resolved
80 .get(main_expr_name)
81 .ok_or(Error::ExpectedVariableUndefined)?;
82
83 let description = ast
84 .get_variable_value(description_name)
85 .ok_or(Error::ExpectedVariableUndefined)?
86 .as_string()
87 .ok_or(Error::ExpectedDifferentVariableType)?
88 .to_owned();
89
90 let message = ast
91 .get_variable_value(message_name)
92 .ok_or(Error::ExpectedVariableUndefined)?
93 .as_string()
94 .ok_or(Error::ExpectedDifferentVariableType)?
95 .to_owned();
96
97 let replacement_val = ast
98 .get_variable_value(replacement_name)
99 .ok_or(Error::ExpectedVariableUndefined)?;
100
101 let replacements = match replacement_val {
102 AstVariable::String(s) => vec![s.to_owned()],
103 AstVariable::Array(arr) => {
104 let mut out = Vec::with_capacity(arr.len());
105 for item in arr.iter().map(|v| {
106 v.as_string()
107 .cloned()
108 .ok_or(Error::ExpectedDifferentVariableType)
109 }) {
110 let item = item?;
111 out.push(item);
112 }
113 out
114 }
115 };
116
117 let replacement_strat_var = ast.get_variable_value(replacement_strat_name);
118 let replacement_strat = if let Some(replacement_strat) = replacement_strat_var {
119 let str = replacement_strat
120 .as_string()
121 .ok_or(Error::ExpectedDifferentVariableType)?;
122 ReplacementStrategy::from_str(str)
123 .ok()
124 .ok_or(Error::InvalidReplacementStrategy)?
125 } else {
126 ReplacementStrategy::MatchCase
127 };
128
129 let lint_kind_var = ast.get_variable_value(lint_kind_name);
130 let lint_kind = if let Some(lint_kind) = lint_kind_var {
131 let str = lint_kind
132 .as_string()
133 .ok_or(Error::ExpectedDifferentVariableType)?;
134 LintKind::from_string_key(str).ok_or(Error::InvalidLintKind)?
135 } else {
136 LintKind::Miscellaneous
137 };
138
139 let scope_var = ast.get_variable_value(scope_name);
140 let scope = if let Some(scope) = scope_var {
141 let str = scope
142 .as_string()
143 .ok_or(Error::ExpectedDifferentVariableType)?;
144 WeirScope::from_str(str).ok().ok_or(Error::InvalidScope)?
145 } else {
146 WeirScope::Chunk
147 };
148
149 let linter = WeirLinter {
150 strategy: replacement_strat,
151 ast,
152 expr: expr.clone(),
153 lint_kind,
154 scope,
155 description,
156 message,
157 replacements,
158 };
159
160 Ok(linter)
161 }
162
163 pub fn into_chunk_linter(self) -> Result<impl ExprLinter<Unit = Chunk>, Self> {
164 if self.scope == WeirScope::Chunk {
165 Ok(ChunkWeirLinter(self))
166 } else {
167 Err(self)
168 }
169 }
170
171 pub fn into_sentence_linter(self) -> Result<impl ExprLinter<Unit = Sentence>, Self> {
172 if self.scope == WeirScope::Sentence {
173 Ok(SentenceWeirLinter(self))
174 } else {
175 Err(self)
176 }
177 }
178
179 fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
180 let span = matched_tokens.span()?;
181 let orig = span.get_content(source);
182
183 let suggestions = match self.strategy {
184 ReplacementStrategy::MatchCase => self
185 .replacements
186 .iter()
187 .map(|s| Suggestion::replace_with_match_case(s.chars().collect(), orig))
188 .collect(),
189 ReplacementStrategy::Exact => self
190 .replacements
191 .iter()
192 .map(|r| Suggestion::ReplaceWith(r.chars().collect()))
193 .collect(),
194 };
195
196 Some(Lint {
197 span,
198 lint_kind: self.lint_kind,
199 suggestions,
200 message: self.message.to_owned(),
201 priority: 31,
202 })
203 }
204
205 pub fn count_tests(&self) -> usize {
207 self.ast.iter_tests().count()
208 }
209
210 pub fn run_tests(&mut self) -> Vec<TestResult> {
212 fn apply_nth_suggestion(text: &str, lint: &Lint, n: usize) -> Option<String> {
213 let suggestion = lint.suggestions.get(n)?;
214 let mut text_chars: Vec<char> = text.chars().collect();
215 suggestion.apply(lint.span, &mut text_chars);
216 Some(text_chars.iter().collect())
217 }
218
219 fn transform_to_expected(
220 text: &str,
221 expected: &str,
222 linter: &mut impl Linter,
223 ) -> Option<String> {
224 let mut queue: VecDeque<(String, usize)> = VecDeque::new();
225 let mut seen: HashSet<String> = HashSet::new();
226
227 queue.push_back((text.to_string(), 0));
228 seen.insert(text.to_string());
229
230 while let Some((current, depth)) = queue.pop_front() {
231 if current == expected {
232 return Some(current);
233 }
234
235 if depth >= 100 {
236 continue;
237 }
238
239 let doc = Document::new_from_chars(
240 current.chars().collect::<Vec<_>>().into(),
241 &Markdown::default(),
242 &FstDictionary::curated(),
243 );
244 let lints = linter.lint(&doc);
245
246 if let Some(lint) = lints.first() {
247 for i in 0..lint.suggestions.len() {
248 if let Some(next) = apply_nth_suggestion(¤t, lint, i)
249 && seen.insert(next.clone())
250 {
251 queue.push_back((next, depth + 1));
252 }
253 }
254 }
255 }
256
257 None
258 }
259
260 fn transform_nth_str(text: &str, linter: &mut impl Linter, n: usize) -> String {
261 let mut text_chars: Vec<char> = text.chars().collect();
262 let mut iter_count = 0;
263
264 loop {
265 let test = Document::new_from_chars(
266 text_chars.clone().into(),
267 &Markdown::default(),
268 &FstDictionary::curated(),
269 );
270 let lints = linter.lint(&test);
271
272 if let Some(lint) = lints.first() {
273 if let Some(suggestion) = lint.suggestions.get(n) {
274 suggestion.apply(lint.span, &mut text_chars);
275 } else {
276 break;
277 }
278 } else {
279 break;
280 }
281
282 iter_count += 1;
283 if iter_count == 100 {
284 break;
285 }
286 }
287
288 text_chars.iter().collect()
289 }
290
291 fn lint_count(text: &str, linter: &mut impl Linter) -> usize {
292 let document = Document::new_from_chars(
293 text.chars().collect::<Vec<_>>().into(),
294 &Markdown::default(),
295 &FstDictionary::curated(),
296 );
297
298 linter.lint(&document).len()
299 }
300
301 let mut results = Vec::new();
302 let tests: Vec<(String, String)> = self
303 .ast
304 .iter_tests()
305 .map(|(text, expected)| (text.to_string(), expected.to_string()))
306 .collect();
307
308 for (text, expected) in tests {
309 let matched = transform_to_expected(&text, &expected, self);
310
311 match matched {
312 Some(result) => {
313 let remaining_lints = lint_count(&result, self);
314
315 if remaining_lints != 0 {
316 results.push(TestResult {
317 expected: expected.to_string(),
318 got: result,
319 });
320 }
321 }
322 None => results.push(TestResult {
323 expected: expected.to_string(),
324 got: transform_nth_str(&text, self, 0),
325 }),
326 }
327 }
328
329 results
330 }
331}
332
333impl Linter for WeirLinter {
334 fn lint(&mut self, document: &Document) -> Vec<Lint> {
335 let source = document.get_source();
336 let mut lints = Vec::new();
337 let units: Box<dyn Iterator<Item = &[Token]> + '_> = match self.scope {
338 WeirScope::Chunk => Box::new(document.iter_chunks()),
339 WeirScope::Sentence => Box::new(document.iter_sentences()),
340 };
341
342 for unit in units {
343 lints.extend(
344 self.expr
345 .iter_matches(unit, source)
346 .filter_map(|match_span| {
347 self.match_to_lint(&unit[match_span.start..match_span.end], source)
348 }),
349 );
350 }
351
352 lints
353 }
354
355 fn description(&self) -> &str {
356 &self.description
357 }
358}
359
360impl ExprLinter for ChunkWeirLinter {
361 type Unit = Chunk;
362
363 fn expr(&self) -> &dyn Expr {
364 &self.0.expr
365 }
366
367 fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
368 self.0.match_to_lint(matched_tokens, source)
369 }
370
371 fn description(&self) -> &str {
372 &self.0.description
373 }
374}
375
376impl ExprLinter for SentenceWeirLinter {
377 type Unit = Sentence;
378
379 fn expr(&self) -> &dyn Expr {
380 &self.0.expr
381 }
382
383 fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
384 self.0.match_to_lint(matched_tokens, source)
385 }
386
387 fn description(&self) -> &str {
388 &self.0.description
389 }
390}
391
392fn resolve_exprs(ast: &Ast) -> Result<HashMap<String, Lrc<Box<dyn Expr>>>, Error> {
393 let mut resolved_exprs = HashMap::new();
394
395 for (name, val) in ast.iter_exprs() {
396 let expr = val.to_expr(&resolved_exprs)?;
397 resolved_exprs.insert(name.to_owned(), Lrc::new(expr));
398 }
399
400 Ok(resolved_exprs)
401}
402
403#[cfg(test)]
404pub mod tests {
405 use quickcheck_macros::quickcheck;
406
407 use crate::weir::Error;
408
409 use super::{TestResult, WeirLinter};
410
411 #[track_caller]
412 pub fn assert_passes_all(linter: &mut WeirLinter) {
413 assert_eq!(Vec::<TestResult>::new(), linter.run_tests());
414 }
415
416 #[test]
417 fn simple_right_click_linter() {
418 let source = r#"
419 expr main <([right, middle, left] $click), ( )>
420 let message "Hyphenate this mouse command"
421 let description "Hyphenates right-click style mouse commands."
422 let kind "Punctuation"
423 let becomes "-"
424
425 test "Right click the icon." "Right-click the icon."
426 test "Please right click on the link." "Please right-click on the link."
427 test "They right clicked the submit button." "They right-clicked the submit button."
428 test "Right clicking the item highlights it." "Right-clicking the item highlights it."
429 test "Right clicks are tracked in the log." "Right-clicks are tracked in the log."
430 test "He RIGHT CLICKED the file." "He RIGHT-CLICKED the file."
431 test "Left click the checkbox." "Left-click the checkbox."
432 test "Middle click to open in a new tab." "Middle-click to open in a new tab."
433
434 allows "This test contains the correct version of right-click and therefore shouldn't error."
435 "#;
436
437 let mut linter = WeirLinter::new(source).unwrap();
438 assert_passes_all(&mut linter);
439 assert_eq!(9, linter.count_tests());
440 }
441
442 #[test]
443 fn g_suite() {
444 let source = r#"
445 expr main [(G [Suite, Suit]), (Google Apps for Work)]
446 let message "Use the updated brand."
447 let description "`G Suite` or `Google Apps for Work` is now called `Google Workspace`"
448 let kind "Miscellaneous"
449 let becomes "Google Workspace"
450 let strategy "Exact"
451
452 test "We migrated from G Suite last year." "We migrated from Google Workspace last year."
453 test "This account is still labeled as Google Apps for Work." "This account is still labeled as Google Workspace."
454 test "The pricing page mentions G Suit for legacy plans." "The pricing page mentions Google Workspace for legacy plans."
455 test "New customers sign up for Google Workspace." "New customers sign up for Google Workspace."
456
457 allows "This test contains the correct version of Google Workspace and therefore shouldn't error."
458 "#;
459
460 let mut linter = WeirLinter::new(source).unwrap();
461
462 assert_passes_all(&mut linter);
463 assert_eq!(5, linter.count_tests());
464 }
465
466 #[test]
467 fn array_prefers_longest_match_over_first_match() {
468 for main in [
469 "[(capitalized off of), (capitalized off)]",
470 "[(capitalized off), (capitalized off of)]",
471 ] {
472 let source = format!(
473 r#"
474 expr main {main}
475 let message "Use the replacement."
476 let description "Regression test for overlapping Weir array options."
477 let kind "Miscellaneous"
478 let becomes "replacement"
479 let strategy "Exact"
480
481 test "capitalized off of" "replacement"
482 "#
483 );
484
485 let mut linter = WeirLinter::new(&source).unwrap();
486 assert_passes_all(&mut linter);
487 }
488 }
489
490 #[test]
491 fn g_suite_with_refs() {
492 let source = r#"
493 expr a (G [Suite, Suit])
494 expr b (Google Apps For Work)
495 expr incorrect [@a, @b]
496
497 expr main @incorrect
498 let message "Use the updated brand."
499 let description "`G Suite` or `Google Apps for Work` is now called `Google Workspace`"
500 let kind "Miscellaneous"
501 let becomes "Google Workspace"
502 let strategy "Exact"
503
504 test "We migrated from G Suite last year." "We migrated from Google Workspace last year."
505 test "This account is still labeled as Google Apps for Work." "This account is still labeled as Google Workspace."
506 test "The pricing page mentions G Suit for legacy plans." "The pricing page mentions Google Workspace for legacy plans."
507 test "New customers sign up for Google Workspace." "New customers sign up for Google Workspace."
508 "#;
509
510 let mut linter = WeirLinter::new(source).unwrap();
511
512 assert_passes_all(&mut linter);
513 assert_eq!(4, linter.count_tests());
514 }
515
516 #[test]
517 fn scope_defaults_to_chunk() {
518 let source = r#"
519 expr main one**two
520 let message "Use three."
521 let description "Test chunk-scoped Weir."
522 let kind "Miscellaneous"
523 let becomes "three"
524 let strategy "Exact"
525
526 allows "one, two."
527 "#;
528
529 let mut linter = WeirLinter::new(source).unwrap();
530
531 assert_passes_all(&mut linter);
532
533 let linter = WeirLinter::new(source).unwrap();
534 let linter = match linter.into_sentence_linter() {
535 Ok(_) => panic!("default-scoped Weir rule should not convert to sentence linter"),
536 Err(linter) => linter,
537 };
538 assert!(linter.into_chunk_linter().is_ok());
539 }
540
541 #[test]
542 fn sentence_scope_can_match_across_chunks() {
543 let source = r#"
544 expr main one**two
545 let message "Use three."
546 let description "Test sentence-scoped Weir."
547 let kind "Miscellaneous"
548 let becomes "three"
549 let strategy "Exact"
550 let scope "Sentence"
551
552 test "one, two." "three."
553 "#;
554
555 let mut linter = WeirLinter::new(source).unwrap();
556
557 assert_passes_all(&mut linter);
558
559 assert!(
560 WeirLinter::new(source)
561 .unwrap()
562 .into_sentence_linter()
563 .is_ok()
564 );
565 }
566
567 #[test]
568 fn invalid_scope_errors() {
569 let source = r#"
570 expr main one
571 let message ""
572 let description ""
573 let kind "Miscellaneous"
574 let becomes ""
575 let scope "Paragraph"
576 "#;
577
578 let res = WeirLinter::new(source);
579
580 assert_eq!(res.err(), Some(Error::InvalidScope));
581 }
582
583 #[test]
584 fn fails_on_unresolved_expr() {
585 let source = r#"
586 expr main @missing
587 let message ""
588 let description ""
589 let kind "Miscellaneous"
590 let becomes ""
591 let strategy "Exact"
592 "#;
593
594 let res = WeirLinter::new(source);
595
596 assert_eq!(
597 res.err().unwrap(),
598 Error::UnableToResolveExpr("missing".to_string())
599 )
600 }
601
602 #[test]
603 fn wildcard() {
604 let source = r#"
605 expr main <(NOUN * NOUN), (* NOUN), *>
606 let message ""
607 let description ""
608 let kind "Miscellaneous"
609 let becomes ""
610 let strategy "Exact"
611
612 test "I like trees and plants of all kinds" "I like trees plants of all kinds"
613 test "homework tempts teachers" "homework teachers"
614 "#;
615
616 let mut linter = WeirLinter::new(source).unwrap();
617
618 assert_passes_all(&mut linter);
619 assert_eq!(2, linter.count_tests());
620 }
621
622 #[test]
623 fn dashes() {
624 let source = r#"
625 expr main --
626 let message ""
627 let description ""
628 let kind "Miscellaneous"
629 let becomes "-"
630 let strategy "Exact"
631
632 test "This--and--that" "This-and-that"
633
634 allows "this-and-that"
635 "#;
636
637 let mut linter = WeirLinter::new(source).unwrap();
638
639 assert_passes_all(&mut linter);
640 assert_eq!(2, linter.count_tests());
641 }
642
643 #[test]
644 fn fails_on_ignore_test() {
645 let source = r#"
646 expr main test
647 let message ""
648 let description ""
649 let kind "Miscellaneous"
650 let becomes "-"
651 let strategy "Exact"
652
653 allows "test"
654 "#;
655
656 let mut linter = WeirLinter::new(source).unwrap();
657
658 assert_eq!(linter.run_tests().len(), 1)
659 }
660
661 #[test]
662 fn errors_properly_with_missing_expr() {
663 let source = "expr main";
664 let res = WeirLinter::new(source);
665 assert_eq!(res.err(), Some(Error::ExpectedVariableUndefined))
666 }
667
668 #[test]
669 fn becomes_array_with_many_alternatives() {
670 let source = r#"
671 expr main (the fact)
672 let message "Consider alternative phrasing"
673 let description "Test that all 'becomes' alternatives can be reached"
674 let kind "Miscellaneous"
675 let becomes ["the allegation", "the idea", "the claim", "the story", "the rumor"]
676 let strategy "Exact"
677
678 test "There is truth to the fact that people like images." "There is truth to the allegation that people like images."
679 test "There is truth to the fact that people like images." "There is truth to the idea that people like images."
680 test "There is truth to the fact that people like images." "There is truth to the claim that people like images."
681 test "There is truth to the fact that people like images." "There is truth to the story that people like images."
682 test "There is truth to the fact that people like images." "There is truth to the rumor that people like images."
683
684 allows "There is truth to the story that people like images."
685 "#;
686
687 let mut linter = WeirLinter::new(source).unwrap();
688 assert_passes_all(&mut linter);
689 assert_eq!(6, linter.count_tests());
690 }
691
692 #[quickcheck]
693 fn does_not_panic(s: String) {
694 let _ = WeirLinter::new(s.as_str());
695 }
696}