harper_core/parsers/
mod.rs1mod collapse_identifiers;
4mod isolate_english;
5mod markdown;
6mod mask;
7mod org_mode;
8mod plain_english;
9
10use blanket::blanket;
11pub use collapse_identifiers::CollapseIdentifiers;
12pub use isolate_english::IsolateEnglish;
13pub use markdown::{Markdown, MarkdownOptions};
14pub use mask::Mask;
15pub use org_mode::OrgMode;
16pub use plain_english::PlainEnglish;
17
18use crate::{LSend, Token, TokenStringExt};
19
20#[cfg_attr(feature = "concurrent", blanket(derive(Box, Arc)))]
21#[cfg_attr(not(feature = "concurrent"), blanket(derive(Box, Rc)))]
22pub trait Parser: LSend {
23 fn parse(&self, source: &[char]) -> Vec<Token>;
24}
25
26pub trait StrParser {
27 fn parse_str(&self, source: impl AsRef<str>) -> Vec<Token>;
28}
29
30impl<T> StrParser for T
31where
32 T: Parser,
33{
34 fn parse_str(&self, source: impl AsRef<str>) -> Vec<Token> {
35 let source: Vec<_> = source.as_ref().chars().collect();
36 self.parse(&source)
37 }
38}
39
40#[cfg(test)]
41mod tests {
42 use super::{Markdown, OrgMode, Parser, PlainEnglish};
43 use crate::Punctuation;
44 use crate::TokenKind::{self, *};
45
46 fn assert_tokens_eq(test_str: impl AsRef<str>, expected: &[TokenKind], parser: &impl Parser) {
47 let chars: Vec<_> = test_str.as_ref().chars().collect();
48 let tokens = parser.parse(&chars);
49 let kinds: Vec<_> = tokens.into_iter().map(|v| v.kind).collect();
50
51 assert_eq!(&kinds, expected)
52 }
53
54 fn assert_tokens_eq_plain(test_str: impl AsRef<str>, expected: &[TokenKind]) {
55 assert_tokens_eq(test_str, expected, &PlainEnglish);
56 }
57
58 fn assert_tokens_eq_md(test_str: impl AsRef<str>, expected: &[TokenKind]) {
59 assert_tokens_eq(test_str, expected, &Markdown::default())
60 }
61
62 fn assert_tokens_eq_org(test_str: impl AsRef<str>, expected: &[TokenKind]) {
63 assert_tokens_eq(test_str, expected, &OrgMode)
64 }
65
66 #[test]
67 fn single_letter() {
68 assert_tokens_eq_plain("a", &[TokenKind::blank_word()])
69 }
70
71 #[test]
72 fn sentence() {
73 assert_tokens_eq_plain(
74 "hello world, my friend",
75 &[
76 TokenKind::blank_word(),
77 Space(1),
78 TokenKind::blank_word(),
79 Punctuation(Punctuation::Comma),
80 Space(1),
81 TokenKind::blank_word(),
82 Space(1),
83 TokenKind::blank_word(),
84 ],
85 )
86 }
87
88 #[test]
89 fn sentence_md() {
90 assert_tokens_eq_md(
91 "__hello__ world, [my]() friend",
92 &[
93 TokenKind::blank_word(),
94 Space(1),
95 TokenKind::blank_word(),
96 Punctuation(Punctuation::Comma),
97 Space(1),
98 TokenKind::blank_word(),
99 Space(1),
100 TokenKind::blank_word(),
101 ],
102 );
103 }
104
105 #[test]
106 fn inserts_newlines() {
107 assert_tokens_eq_md(
108 "__hello__ world,\n\n[my]() friend",
109 &[
110 TokenKind::blank_word(),
111 Space(1),
112 TokenKind::blank_word(),
113 Punctuation(Punctuation::Comma),
114 ParagraphBreak,
115 TokenKind::blank_word(),
116 Space(1),
117 TokenKind::blank_word(),
118 ],
119 );
120 }
121
122 #[test]
125 fn parses_non_english() {
126 assert_tokens_eq_plain("Løvetann", &[TokenKind::blank_word()]);
127 assert_tokens_eq_plain("Naïve", &[TokenKind::blank_word()]);
128 }
129
130 #[test]
131 fn org_mode_basic() {
132 assert_tokens_eq_org(
133 "hello world",
134 &[TokenKind::blank_word(), Space(1), TokenKind::blank_word()],
135 );
136 }
137}