harper_core/parsers/
mod.rs1mod collapse_identifiers;
2mod isolate_english;
3mod markdown;
4mod mask;
5mod org_mode;
6mod plain_english;
7
8use blanket::blanket;
9pub use collapse_identifiers::CollapseIdentifiers;
10pub use isolate_english::IsolateEnglish;
11pub use markdown::{Markdown, MarkdownOptions};
12pub use mask::Mask;
13pub use org_mode::OrgMode;
14pub use plain_english::PlainEnglish;
15
16use crate::{LSend, Token, TokenStringExt};
17
18#[cfg_attr(feature = "concurrent", blanket(derive(Box, Arc)))]
19#[cfg_attr(not(feature = "concurrent"), blanket(derive(Box, Rc)))]
20pub trait Parser: LSend {
21 fn parse(&self, source: &[char]) -> Vec<Token>;
22}
23
24pub trait StrParser {
25 fn parse_str(&self, source: impl AsRef<str>) -> Vec<Token>;
26}
27
28impl<T> StrParser for T
29where
30 T: Parser,
31{
32 fn parse_str(&self, source: impl AsRef<str>) -> Vec<Token> {
33 let source: Vec<_> = source.as_ref().chars().collect();
34 self.parse(&source)
35 }
36}
37
38#[cfg(test)]
39mod tests {
40 use super::{Markdown, OrgMode, Parser, PlainEnglish};
41 use crate::Punctuation;
42 use crate::TokenKind::{self, *};
43
44 fn assert_tokens_eq(test_str: impl AsRef<str>, expected: &[TokenKind], parser: &impl Parser) {
45 let chars: Vec<_> = test_str.as_ref().chars().collect();
46 let tokens = parser.parse(&chars);
47 let kinds: Vec<_> = tokens.into_iter().map(|v| v.kind).collect();
48
49 assert_eq!(&kinds, expected)
50 }
51
52 fn assert_tokens_eq_plain(test_str: impl AsRef<str>, expected: &[TokenKind]) {
53 assert_tokens_eq(test_str, expected, &PlainEnglish);
54 }
55
56 fn assert_tokens_eq_md(test_str: impl AsRef<str>, expected: &[TokenKind]) {
57 assert_tokens_eq(test_str, expected, &Markdown::default())
58 }
59
60 fn assert_tokens_eq_org(test_str: impl AsRef<str>, expected: &[TokenKind]) {
61 assert_tokens_eq(test_str, expected, &OrgMode)
62 }
63
64 #[test]
65 fn single_letter() {
66 assert_tokens_eq_plain("a", &[TokenKind::blank_word()])
67 }
68
69 #[test]
70 fn sentence() {
71 assert_tokens_eq_plain(
72 "hello world, my friend",
73 &[
74 TokenKind::blank_word(),
75 Space(1),
76 TokenKind::blank_word(),
77 Punctuation(Punctuation::Comma),
78 Space(1),
79 TokenKind::blank_word(),
80 Space(1),
81 TokenKind::blank_word(),
82 ],
83 )
84 }
85
86 #[test]
87 fn sentence_md() {
88 assert_tokens_eq_md(
89 "__hello__ world, [my]() friend",
90 &[
91 TokenKind::blank_word(),
92 Space(1),
93 TokenKind::blank_word(),
94 Punctuation(Punctuation::Comma),
95 Space(1),
96 TokenKind::blank_word(),
97 Space(1),
98 TokenKind::blank_word(),
99 ],
100 );
101 }
102
103 #[test]
104 fn inserts_newlines() {
105 assert_tokens_eq_md(
106 "__hello__ world,\n\n[my]() friend",
107 &[
108 TokenKind::blank_word(),
109 Space(1),
110 TokenKind::blank_word(),
111 Punctuation(Punctuation::Comma),
112 ParagraphBreak,
113 TokenKind::blank_word(),
114 Space(1),
115 TokenKind::blank_word(),
116 ],
117 );
118 }
119
120 #[test]
123 fn parses_non_english() {
124 assert_tokens_eq_plain("Løvetann", &[TokenKind::blank_word()]);
125 assert_tokens_eq_plain("Naïve", &[TokenKind::blank_word()]);
126 }
127
128 #[test]
129 fn org_mode_basic() {
130 assert_tokens_eq_org(
131 "hello world",
132 &[TokenKind::blank_word(), Space(1), TokenKind::blank_word()],
133 );
134 }
135}