harper_core/parsers/
mod.rs1mod collapse_identifiers;
2mod isolate_english;
3mod markdown;
4mod mask;
5mod plain_english;
6
7use blanket::blanket;
8pub use collapse_identifiers::CollapseIdentifiers;
9pub use isolate_english::IsolateEnglish;
10pub use markdown::{Markdown, MarkdownOptions};
11pub use mask::Mask;
12pub use plain_english::PlainEnglish;
13
14use crate::{LSend, Token, TokenStringExt};
15
16#[cfg_attr(feature = "concurrent", blanket(derive(Box, Arc)))]
17#[cfg_attr(not(feature = "concurrent"), blanket(derive(Box, Rc)))]
18pub trait Parser: LSend {
19 fn parse(&self, source: &[char]) -> Vec<Token>;
20}
21
22pub trait StrParser {
23 fn parse_str(&self, source: impl AsRef<str>) -> Vec<Token>;
24}
25
26impl<T> StrParser for T
27where
28 T: Parser,
29{
30 fn parse_str(&self, source: impl AsRef<str>) -> Vec<Token> {
31 let source: Vec<_> = source.as_ref().chars().collect();
32 self.parse(&source)
33 }
34}
35
36#[cfg(test)]
37mod tests {
38 use super::{Markdown, Parser, PlainEnglish};
39 use crate::Punctuation;
40 use crate::TokenKind::{self, *};
41
42 fn assert_tokens_eq(test_str: impl AsRef<str>, expected: &[TokenKind], parser: &impl Parser) {
43 let chars: Vec<_> = test_str.as_ref().chars().collect();
44 let tokens = parser.parse(&chars);
45 let kinds: Vec<_> = tokens.into_iter().map(|v| v.kind).collect();
46
47 assert_eq!(&kinds, expected)
48 }
49
50 fn assert_tokens_eq_plain(test_str: impl AsRef<str>, expected: &[TokenKind]) {
51 assert_tokens_eq(test_str, expected, &PlainEnglish);
52 }
53
54 fn assert_tokens_eq_md(test_str: impl AsRef<str>, expected: &[TokenKind]) {
55 assert_tokens_eq(test_str, expected, &Markdown::default())
56 }
57
58 #[test]
59 fn single_letter() {
60 assert_tokens_eq_plain("a", &[TokenKind::blank_word()])
61 }
62
63 #[test]
64 fn sentence() {
65 assert_tokens_eq_plain(
66 "hello world, my friend",
67 &[
68 TokenKind::blank_word(),
69 Space(1),
70 TokenKind::blank_word(),
71 Punctuation(Punctuation::Comma),
72 Space(1),
73 TokenKind::blank_word(),
74 Space(1),
75 TokenKind::blank_word(),
76 ],
77 )
78 }
79
80 #[test]
81 fn sentence_md() {
82 assert_tokens_eq_md(
83 "__hello__ world, [my]() friend",
84 &[
85 TokenKind::blank_word(),
86 Space(1),
87 TokenKind::blank_word(),
88 Punctuation(Punctuation::Comma),
89 Space(1),
90 TokenKind::blank_word(),
91 Space(1),
92 TokenKind::blank_word(),
93 ],
94 );
95 }
96
97 #[test]
98 fn inserts_newlines() {
99 assert_tokens_eq_md(
100 "__hello__ world,\n\n[my]() friend",
101 &[
102 TokenKind::blank_word(),
103 Space(1),
104 TokenKind::blank_word(),
105 Punctuation(Punctuation::Comma),
106 ParagraphBreak,
107 TokenKind::blank_word(),
108 Space(1),
109 TokenKind::blank_word(),
110 ],
111 );
112 }
113
114 #[test]
117 fn parses_non_english() {
118 assert_tokens_eq_plain("Løvetann", &[TokenKind::blank_word()]);
119 assert_tokens_eq_plain("Naïve", &[TokenKind::blank_word()]);
120 }
121}