harper_core/expr/
spelled_number_expr.rs1use crate::expr::LongestMatchOf;
2use crate::patterns::{WhitespacePattern, WordSet};
3use crate::{Span, Token};
4
5use super::{Expr, SequenceExpr};
6
7#[derive(Default)]
9pub struct SpelledNumberExpr;
10
11impl Expr for SpelledNumberExpr {
12 fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
13 if tokens.is_empty() {
14 return None;
15 }
16
17 let units = &[
21 "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
22 ];
23
24 let teens = &[
26 "ten",
27 "eleven",
28 "twelve",
29 "thirteen",
30 "fourteen",
31 "fifteen",
32 "sixteen",
33 "seventeen",
34 "eighteen",
35 "nineteen",
36 ];
37
38 let tens = &[
41 "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety",
42 ];
43
44 let single_words = WordSet::new(
45 &units
46 .iter()
47 .chain(teens.iter())
48 .chain(tens.iter())
49 .copied()
50 .chain(std::iter::once("zero"))
51 .collect::<Vec<&str>>(),
52 );
53
54 let tens_units_compounds = SequenceExpr::word_set(tens)
55 .then_any_of(vec![
56 Box::new(|t: &Token, _s: &[char]| t.kind.is_hyphen()),
57 Box::new(WhitespacePattern),
58 ])
59 .then_word_set(units);
60
61 let expr =
62 LongestMatchOf::new(vec![Box::new(single_words), Box::new(tens_units_compounds)]);
63
64 expr.run(cursor, tokens, source)
65 }
66}
67
68#[cfg(test)]
69mod tests {
70 use super::SpelledNumberExpr;
71 use crate::Document;
72 use crate::expr::ExprExt;
73 use crate::linting::tests::SpanVecExt;
74
75 #[test]
76 fn matches_single_digit() {
77 let doc = Document::new_markdown_default_curated("one two three");
78 let matches = SpelledNumberExpr.iter_matches_in_doc(&doc);
79 assert_eq!(matches.count(), 3);
80 }
81
82 #[test]
83 fn matches_teens() {
84 let doc = Document::new_markdown_default_curated("ten eleven twelve");
85 let matches = SpelledNumberExpr.iter_matches_in_doc(&doc);
86 assert_eq!(matches.count(), 3);
87 }
88
89 #[test]
90 fn matches_tens() {
91 let doc = Document::new_markdown_default_curated("twenty thirty forty");
92 let matches = SpelledNumberExpr.iter_matches_in_doc(&doc);
93 assert_eq!(matches.count(), 3);
94 }
95
96 #[test]
97 fn matches_compound_numbers() {
98 let doc = Document::new_markdown_default_curated("twenty-one thirty-two");
99 let matches = SpelledNumberExpr
100 .iter_matches_in_doc(&doc)
101 .collect::<Vec<_>>();
102
103 println!("Found {} matches:", matches.len());
105 for m in &matches {
106 let text: String = doc.get_tokens()[m.start..m.end]
107 .iter()
108 .map(|t| doc.get_span_content_str(&t.span))
109 .collect();
110 println!("- '{text}' (span: {m:?})");
111 }
112
113 assert_eq!(matches.len(), 2);
114 }
115
116 #[test]
117 fn deep_thought() {
118 let doc = Document::new_markdown_default_curated(
119 "the answer to the ultimate question of life, the universe, and everything is forty-two",
120 );
121 let matches = SpelledNumberExpr
122 .iter_matches_in_doc(&doc)
123 .collect::<Vec<_>>();
124
125 dbg!(&matches);
126 dbg!(matches.to_strings(&doc));
127
128 assert_eq!(matches.to_strings(&doc), vec!["forty-two"]);
129 }
130
131 #[test]
132 fn jacksons() {
133 let doc = Document::new_markdown_default_curated(
134 "A, B, C It's easy as one, two, three. Or simple as Do-Re-Mi",
135 );
136 let matches = SpelledNumberExpr
137 .iter_matches_in_doc(&doc)
138 .collect::<Vec<_>>();
139
140 assert_eq!(matches.to_strings(&doc), vec!["one", "two", "three"]);
141 }
142
143 #[test]
144 fn orwell() {
145 let doc = Document::new_markdown_default_curated("Nineteen Eighty-Four");
146 let matches = SpelledNumberExpr
147 .iter_matches_in_doc(&doc)
148 .collect::<Vec<_>>();
149
150 assert_eq!(matches.to_strings(&doc), vec!["Nineteen", "Eighty-Four"]);
151 }
152
153 #[test]
154 fn get_smart() {
155 let doc = Document::new_markdown_default_curated(
156 "Maxwell Smart was Agent Eighty-Six, but who was Agent Ninety-Nine?",
157 );
158 let matches = SpelledNumberExpr
159 .iter_matches_in_doc(&doc)
160 .collect::<Vec<_>>();
161
162 assert_eq!(matches.to_strings(&doc), vec!["Eighty-Six", "Ninety-Nine"]);
163 }
164
165 #[test]
166 fn hyphens_or_spaces() {
167 let doc = Document::new_markdown_default_curated(
168 "twenty-one, thirty two, forty-three, fifty four, sixty-five, seventy six, eighty-seven, ninety eight",
169 );
170 let matches = SpelledNumberExpr
171 .iter_matches_in_doc(&doc)
172 .collect::<Vec<_>>();
173
174 assert_eq!(
175 matches.to_strings(&doc),
176 vec![
177 "twenty-one",
178 "thirty two",
179 "forty-three",
180 "fifty four",
181 "sixty-five",
182 "seventy six",
183 "eighty-seven",
184 "ninety eight",
185 ]
186 );
187 }
188
189 #[test]
190 fn waiting_since() {
191 let doc = Document::new_markdown_default_curated("I have been waiting since two hours.");
192 let matches = SpelledNumberExpr
193 .iter_matches_in_doc(&doc)
194 .collect::<Vec<_>>();
195
196 assert_eq!(matches.to_strings(&doc), vec!["two"]);
197 }
198}