harper_core/expr/
spelled_number_expr.rs1use crate::expr::LongestMatchOf;
2use crate::patterns::{WhitespacePattern, WordSet};
3use crate::{Span, Token};
4
5use super::{Expr, SequenceExpr};
6
7#[derive(Default)]
9pub struct SpelledNumberExpr;
10
11impl Expr for SpelledNumberExpr {
12 fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span> {
13 if tokens.is_empty() {
14 return None;
15 }
16
17 let units = &[
21 "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
22 ];
23
24 let teens = &[
26 "ten",
27 "eleven",
28 "twelve",
29 "thirteen",
30 "fourteen",
31 "fifteen",
32 "sixteen",
33 "seventeen",
34 "eighteen",
35 "nineteen",
36 ];
37
38 let tens = &[
41 "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety",
42 ];
43
44 let single_words = WordSet::new(
45 &units
46 .iter()
47 .chain(teens.iter())
48 .chain(tens.iter())
49 .copied()
50 .chain(std::iter::once("zero"))
51 .collect::<Vec<&str>>(),
52 );
53
54 let tens_units_compounds = SequenceExpr::default()
55 .then(WordSet::new(tens))
56 .then_any_of(vec![
57 Box::new(|t: &Token, _s: &[char]| t.kind.is_hyphen()),
58 Box::new(WhitespacePattern),
59 ])
60 .then(WordSet::new(units));
61
62 let expr =
63 LongestMatchOf::new(vec![Box::new(single_words), Box::new(tens_units_compounds)]);
64
65 expr.run(cursor, tokens, source)
66 }
67}
68
69#[cfg(test)]
70mod tests {
71 use super::SpelledNumberExpr;
72 use crate::expr::ExprExt;
73 use crate::{Document, Span};
74
75 trait SpanVecExt {
76 fn to_strings(&self, doc: &Document) -> Vec<String>;
77 }
78
79 impl SpanVecExt for Vec<Span> {
80 fn to_strings(&self, doc: &Document) -> Vec<String> {
81 self.iter()
82 .map(|sp| {
83 doc.get_tokens()[sp.start..sp.end]
84 .iter()
85 .map(|tok| doc.get_span_content_str(&tok.span))
86 .collect::<String>()
87 })
88 .collect()
89 }
90 }
91
92 #[test]
93 fn matches_single_digit() {
94 let doc = Document::new_markdown_default_curated("one two three");
95 let matches = SpelledNumberExpr.iter_matches_in_doc(&doc);
96 assert_eq!(matches.count(), 3);
97 }
98
99 #[test]
100 fn matches_teens() {
101 let doc = Document::new_markdown_default_curated("ten eleven twelve");
102 let matches = SpelledNumberExpr.iter_matches_in_doc(&doc);
103 assert_eq!(matches.count(), 3);
104 }
105
106 #[test]
107 fn matches_tens() {
108 let doc = Document::new_markdown_default_curated("twenty thirty forty");
109 let matches = SpelledNumberExpr.iter_matches_in_doc(&doc);
110 assert_eq!(matches.count(), 3);
111 }
112
113 #[test]
114 fn matches_compound_numbers() {
115 let doc = Document::new_markdown_default_curated("twenty-one thirty-two");
116 let matches = SpelledNumberExpr
117 .iter_matches_in_doc(&doc)
118 .collect::<Vec<_>>();
119
120 println!("Found {} matches:", matches.len());
122 for m in &matches {
123 let text: String = doc.get_tokens()[m.start..m.end]
124 .iter()
125 .map(|t| doc.get_span_content_str(&t.span))
126 .collect();
127 println!("- '{text}' (span: {m:?})");
128 }
129
130 assert_eq!(matches.len(), 2);
131 }
132
133 #[test]
134 fn deep_thought() {
135 let doc = Document::new_markdown_default_curated(
136 "the answer to the ultimate question of life, the universe, and everything is forty-two",
137 );
138 let matches = SpelledNumberExpr
139 .iter_matches_in_doc(&doc)
140 .collect::<Vec<_>>();
141
142 dbg!(&matches);
143 dbg!(matches.to_strings(&doc));
144
145 assert_eq!(matches.to_strings(&doc), vec!["forty-two"]);
146 }
147
148 #[test]
149 fn jacksons() {
150 let doc = Document::new_markdown_default_curated(
151 "A, B, C It's easy as one, two, three. Or simple as Do-Re-Mi",
152 );
153 let matches = SpelledNumberExpr
154 .iter_matches_in_doc(&doc)
155 .collect::<Vec<_>>();
156
157 assert_eq!(matches.to_strings(&doc), vec!["one", "two", "three"]);
158 }
159
160 #[test]
161 fn orwell() {
162 let doc = Document::new_markdown_default_curated("Nineteen Eighty-Four");
163 let matches = SpelledNumberExpr
164 .iter_matches_in_doc(&doc)
165 .collect::<Vec<_>>();
166
167 assert_eq!(matches.to_strings(&doc), vec!["Nineteen", "Eighty-Four"]);
168 }
169
170 #[test]
171 fn get_smart() {
172 let doc = Document::new_markdown_default_curated(
173 "Maxwell Smart was Agent Eighty-Six, but who was Agent Ninety-Nine?",
174 );
175 let matches = SpelledNumberExpr
176 .iter_matches_in_doc(&doc)
177 .collect::<Vec<_>>();
178
179 assert_eq!(matches.to_strings(&doc), vec!["Eighty-Six", "Ninety-Nine"]);
180 }
181
182 #[test]
183 fn hyphens_or_spaces() {
184 let doc = Document::new_markdown_default_curated(
185 "twenty-one, thirty two, forty-three, fifty four, sixty-five, seventy six, eighty-seven, ninety eight",
186 );
187 let matches = SpelledNumberExpr
188 .iter_matches_in_doc(&doc)
189 .collect::<Vec<_>>();
190
191 assert_eq!(
192 matches.to_strings(&doc),
193 vec![
194 "twenty-one",
195 "thirty two",
196 "forty-three",
197 "fifty four",
198 "sixty-five",
199 "seventy six",
200 "eighty-seven",
201 "ninety eight",
202 ]
203 );
204 }
205
206 #[test]
207 fn waiting_since() {
208 let doc = Document::new_markdown_default_curated("I have been waiting since two hours.");
209 let matches = SpelledNumberExpr
210 .iter_matches_in_doc(&doc)
211 .collect::<Vec<_>>();
212
213 assert_eq!(matches.to_strings(&doc), vec!["two"]);
214 }
215}