harper_core/expr/
spelled_number_expr.rs1use crate::expr::LongestMatchOf;
2use crate::patterns::{WhitespacePattern, WordSet};
3use crate::{Span, Token};
4
5use super::{Expr, SequenceExpr};
6
7#[derive(Default)]
9pub struct SpelledNumberExpr;
10
11impl Expr for SpelledNumberExpr {
12 fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
13 if tokens.is_empty() {
14 return None;
15 }
16
17 let units = &[
21 "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
22 ];
23
24 let teens = &[
26 "ten",
27 "eleven",
28 "twelve",
29 "thirteen",
30 "fourteen",
31 "fifteen",
32 "sixteen",
33 "seventeen",
34 "eighteen",
35 "nineteen",
36 ];
37
38 let tens = &[
41 "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety",
42 ];
43
44 let single_words = WordSet::new(
45 &units
46 .iter()
47 .chain(teens.iter())
48 .chain(tens.iter())
49 .copied()
50 .chain(std::iter::once("zero"))
51 .collect::<Vec<&str>>(),
52 );
53
54 let tens_units_compounds = SequenceExpr::default()
55 .then(WordSet::new(tens))
56 .then_any_of(vec![
57 Box::new(|t: &Token, _s: &[char]| t.kind.is_hyphen()),
58 Box::new(WhitespacePattern),
59 ])
60 .then(WordSet::new(units));
61
62 let expr =
63 LongestMatchOf::new(vec![Box::new(single_words), Box::new(tens_units_compounds)]);
64
65 expr.run(cursor, tokens, source)
66 }
67}
68
69#[cfg(test)]
70mod tests {
71 use super::SpelledNumberExpr;
72 use crate::Document;
73 use crate::expr::ExprExt;
74 use crate::linting::tests::SpanVecExt;
75
76 #[test]
77 fn matches_single_digit() {
78 let doc = Document::new_markdown_default_curated("one two three");
79 let matches = SpelledNumberExpr.iter_matches_in_doc(&doc);
80 assert_eq!(matches.count(), 3);
81 }
82
83 #[test]
84 fn matches_teens() {
85 let doc = Document::new_markdown_default_curated("ten eleven twelve");
86 let matches = SpelledNumberExpr.iter_matches_in_doc(&doc);
87 assert_eq!(matches.count(), 3);
88 }
89
90 #[test]
91 fn matches_tens() {
92 let doc = Document::new_markdown_default_curated("twenty thirty forty");
93 let matches = SpelledNumberExpr.iter_matches_in_doc(&doc);
94 assert_eq!(matches.count(), 3);
95 }
96
97 #[test]
98 fn matches_compound_numbers() {
99 let doc = Document::new_markdown_default_curated("twenty-one thirty-two");
100 let matches = SpelledNumberExpr
101 .iter_matches_in_doc(&doc)
102 .collect::<Vec<_>>();
103
104 println!("Found {} matches:", matches.len());
106 for m in &matches {
107 let text: String = doc.get_tokens()[m.start..m.end]
108 .iter()
109 .map(|t| doc.get_span_content_str(&t.span))
110 .collect();
111 println!("- '{text}' (span: {m:?})");
112 }
113
114 assert_eq!(matches.len(), 2);
115 }
116
117 #[test]
118 fn deep_thought() {
119 let doc = Document::new_markdown_default_curated(
120 "the answer to the ultimate question of life, the universe, and everything is forty-two",
121 );
122 let matches = SpelledNumberExpr
123 .iter_matches_in_doc(&doc)
124 .collect::<Vec<_>>();
125
126 dbg!(&matches);
127 dbg!(matches.to_strings(&doc));
128
129 assert_eq!(matches.to_strings(&doc), vec!["forty-two"]);
130 }
131
132 #[test]
133 fn jacksons() {
134 let doc = Document::new_markdown_default_curated(
135 "A, B, C It's easy as one, two, three. Or simple as Do-Re-Mi",
136 );
137 let matches = SpelledNumberExpr
138 .iter_matches_in_doc(&doc)
139 .collect::<Vec<_>>();
140
141 assert_eq!(matches.to_strings(&doc), vec!["one", "two", "three"]);
142 }
143
144 #[test]
145 fn orwell() {
146 let doc = Document::new_markdown_default_curated("Nineteen Eighty-Four");
147 let matches = SpelledNumberExpr
148 .iter_matches_in_doc(&doc)
149 .collect::<Vec<_>>();
150
151 assert_eq!(matches.to_strings(&doc), vec!["Nineteen", "Eighty-Four"]);
152 }
153
154 #[test]
155 fn get_smart() {
156 let doc = Document::new_markdown_default_curated(
157 "Maxwell Smart was Agent Eighty-Six, but who was Agent Ninety-Nine?",
158 );
159 let matches = SpelledNumberExpr
160 .iter_matches_in_doc(&doc)
161 .collect::<Vec<_>>();
162
163 assert_eq!(matches.to_strings(&doc), vec!["Eighty-Six", "Ninety-Nine"]);
164 }
165
166 #[test]
167 fn hyphens_or_spaces() {
168 let doc = Document::new_markdown_default_curated(
169 "twenty-one, thirty two, forty-three, fifty four, sixty-five, seventy six, eighty-seven, ninety eight",
170 );
171 let matches = SpelledNumberExpr
172 .iter_matches_in_doc(&doc)
173 .collect::<Vec<_>>();
174
175 assert_eq!(
176 matches.to_strings(&doc),
177 vec![
178 "twenty-one",
179 "thirty two",
180 "forty-three",
181 "fifty four",
182 "sixty-five",
183 "seventy six",
184 "eighty-seven",
185 "ninety eight",
186 ]
187 );
188 }
189
190 #[test]
191 fn waiting_since() {
192 let doc = Document::new_markdown_default_curated("I have been waiting since two hours.");
193 let matches = SpelledNumberExpr
194 .iter_matches_in_doc(&doc)
195 .collect::<Vec<_>>();
196
197 assert_eq!(matches.to_strings(&doc), vec!["two"]);
198 }
199}