harper_core/parsers/
mod.rs1mod collapse_identifiers;
2mod isolate_english;
3mod markdown;
4mod mask;
5mod plain_english;
6
7use blanket::blanket;
8pub use collapse_identifiers::CollapseIdentifiers;
9pub use isolate_english::IsolateEnglish;
10pub use markdown::{Markdown, MarkdownOptions};
11pub use mask::Mask;
12pub use plain_english::PlainEnglish;
13
14use crate::{Token, TokenStringExt};
15
16#[cfg(not(feature = "concurrent"))]
17#[blanket(derive(Box, Rc))]
18pub trait Parser {
19 fn parse(&self, source: &[char]) -> Vec<Token>;
20}
21
22#[cfg(feature = "concurrent")]
23#[blanket(derive(Box, Arc))]
24pub trait Parser: Send + Sync {
25 fn parse(&self, source: &[char]) -> Vec<Token>;
26}
27
28pub trait StrParser {
29 fn parse_str(&self, source: impl AsRef<str>) -> Vec<Token>;
30}
31
32impl<T> StrParser for T
33where
34 T: Parser,
35{
36 fn parse_str(&self, source: impl AsRef<str>) -> Vec<Token> {
37 let source: Vec<_> = source.as_ref().chars().collect();
38 self.parse(&source)
39 }
40}
41
42#[cfg(test)]
43mod tests {
44 use super::{Markdown, Parser, PlainEnglish};
45 use crate::Punctuation;
46 use crate::TokenKind::{self, *};
47
48 fn assert_tokens_eq(test_str: impl AsRef<str>, expected: &[TokenKind], parser: &impl Parser) {
49 let chars: Vec<_> = test_str.as_ref().chars().collect();
50 let tokens = parser.parse(&chars);
51 let kinds: Vec<_> = tokens.into_iter().map(|v| v.kind).collect();
52
53 assert_eq!(&kinds, expected)
54 }
55
56 fn assert_tokens_eq_plain(test_str: impl AsRef<str>, expected: &[TokenKind]) {
57 assert_tokens_eq(test_str, expected, &PlainEnglish);
58 }
59
60 fn assert_tokens_eq_md(test_str: impl AsRef<str>, expected: &[TokenKind]) {
61 assert_tokens_eq(test_str, expected, &Markdown::default())
62 }
63
64 #[test]
65 fn single_letter() {
66 assert_tokens_eq_plain("a", &[TokenKind::blank_word()])
67 }
68
69 #[test]
70 fn sentence() {
71 assert_tokens_eq_plain(
72 "hello world, my friend",
73 &[
74 TokenKind::blank_word(),
75 Space(1),
76 TokenKind::blank_word(),
77 Punctuation(Punctuation::Comma),
78 Space(1),
79 TokenKind::blank_word(),
80 Space(1),
81 TokenKind::blank_word(),
82 ],
83 )
84 }
85
86 #[test]
87 fn sentence_md() {
88 assert_tokens_eq_md(
89 "__hello__ world, [my]() friend",
90 &[
91 TokenKind::blank_word(),
92 Space(1),
93 TokenKind::blank_word(),
94 Punctuation(Punctuation::Comma),
95 Space(1),
96 TokenKind::blank_word(),
97 Space(1),
98 TokenKind::blank_word(),
99 ],
100 );
101 }
102
103 #[test]
104 fn inserts_newlines() {
105 assert_tokens_eq_md(
106 "__hello__ world,\n\n[my]() friend",
107 &[
108 TokenKind::blank_word(),
109 Space(1),
110 TokenKind::blank_word(),
111 Punctuation(Punctuation::Comma),
112 ParagraphBreak,
113 TokenKind::blank_word(),
114 Space(1),
115 TokenKind::blank_word(),
116 ],
117 );
118 }
119
120 #[test]
123 fn parses_non_english() {
124 assert_tokens_eq_plain("Løvetann", &[TokenKind::blank_word()]);
125 assert_tokens_eq_plain("Naïve", &[TokenKind::blank_word()]);
126 }
127}