harper_core/parsers/
mod.rs1mod collapse_identifiers;
4mod isolate_english;
5mod markdown;
6mod mask;
7mod oops_all_headings;
8mod org_mode;
9mod plain_english;
10
11use blanket::blanket;
12pub use collapse_identifiers::CollapseIdentifiers;
13pub use isolate_english::IsolateEnglish;
14pub use markdown::{Markdown, MarkdownOptions};
15pub use mask::Mask;
16pub use oops_all_headings::OopsAllHeadings;
17pub use org_mode::OrgMode;
18pub use plain_english::PlainEnglish;
19
20use crate::{LSend, Token, TokenStringExt};
21
22#[cfg_attr(feature = "concurrent", blanket(derive(Box, Arc)))]
23#[cfg_attr(not(feature = "concurrent"), blanket(derive(Box, Rc)))]
24pub trait Parser: LSend {
25 fn parse(&self, source: &[char]) -> Vec<Token>;
26}
27
28pub trait StrParser {
29 fn parse_str(&self, source: impl AsRef<str>) -> Vec<Token>;
30}
31
32impl<T> StrParser for T
33where
34 T: Parser,
35{
36 fn parse_str(&self, source: impl AsRef<str>) -> Vec<Token> {
37 let source: Vec<_> = source.as_ref().chars().collect();
38 self.parse(&source)
39 }
40}
41
42#[cfg(test)]
43mod tests {
44 use super::{Markdown, OrgMode, Parser, PlainEnglish};
45 use crate::Punctuation;
46 use crate::TokenKind::{self, *};
47
48 fn assert_tokens_eq(test_str: impl AsRef<str>, expected: &[TokenKind], parser: &impl Parser) {
49 let chars: Vec<_> = test_str.as_ref().chars().collect();
50 let tokens = parser.parse(&chars);
51 let kinds: Vec<_> = tokens.into_iter().map(|v| v.kind).collect();
52
53 assert_eq!(&kinds, expected)
54 }
55
56 fn assert_tokens_eq_plain(test_str: impl AsRef<str>, expected: &[TokenKind]) {
57 assert_tokens_eq(test_str, expected, &PlainEnglish);
58 }
59
60 fn assert_tokens_eq_md(test_str: impl AsRef<str>, expected: &[TokenKind]) {
61 assert_tokens_eq(test_str, expected, &Markdown::default())
62 }
63
64 fn assert_tokens_eq_org(test_str: impl AsRef<str>, expected: &[TokenKind]) {
65 assert_tokens_eq(test_str, expected, &OrgMode)
66 }
67
68 #[test]
69 fn single_letter() {
70 assert_tokens_eq_plain("a", &[TokenKind::blank_word()])
71 }
72
73 #[test]
74 fn sentence() {
75 assert_tokens_eq_plain(
76 "hello world, my friend",
77 &[
78 TokenKind::blank_word(),
79 Space(1),
80 TokenKind::blank_word(),
81 Punctuation(Punctuation::Comma),
82 Space(1),
83 TokenKind::blank_word(),
84 Space(1),
85 TokenKind::blank_word(),
86 ],
87 )
88 }
89
90 #[test]
91 fn sentence_md() {
92 assert_tokens_eq_md(
93 "__hello__ world, [my]() friend",
94 &[
95 TokenKind::blank_word(),
96 Space(1),
97 TokenKind::blank_word(),
98 Punctuation(Punctuation::Comma),
99 Space(1),
100 TokenKind::blank_word(),
101 Space(1),
102 TokenKind::blank_word(),
103 ],
104 );
105 }
106
107 #[test]
108 fn inserts_newlines() {
109 assert_tokens_eq_md(
110 "__hello__ world,\n\n[my]() friend",
111 &[
112 TokenKind::blank_word(),
113 Space(1),
114 TokenKind::blank_word(),
115 Punctuation(Punctuation::Comma),
116 ParagraphBreak,
117 TokenKind::blank_word(),
118 Space(1),
119 TokenKind::blank_word(),
120 ],
121 );
122 }
123
124 #[test]
127 fn parses_non_english() {
128 assert_tokens_eq_plain("Løvetann", &[TokenKind::blank_word()]);
129 assert_tokens_eq_plain("Naïve", &[TokenKind::blank_word()]);
130 }
131
132 #[test]
133 fn org_mode_basic() {
134 assert_tokens_eq_org(
135 "hello world",
136 &[TokenKind::blank_word(), Space(1), TokenKind::blank_word()],
137 );
138 }
139}