1use core::{marker::PhantomData, str::Chars};
20use embedded_graphics::{prelude::PixelColor, text::DecorationColor};
21
22#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
24pub enum ChangeTextStyle<C> {
25 Reset,
27
28 TextColor(Option<C>),
30
31 BackgroundColor(Option<C>),
33
34 Underline(DecorationColor<C>),
36
37 Strikethrough(DecorationColor<C>),
39}
40
41#[derive(Debug, PartialEq, Clone)]
43pub enum Token<'a, C> {
44 NewLine,
46
47 CarriageReturn,
49
50 Tab,
52
53 Whitespace(u32, &'a str),
55
56 Word(&'a str),
58
59 Break(&'a str),
61
62 ChangeTextStyle(ChangeTextStyle<C>),
64
65 MoveCursor {
67 chars: i32,
69 draw_background: bool,
71 },
72}
73
74#[derive(Clone, Debug)]
76pub(crate) struct Parser<'a, C>
77where
78 C: PixelColor,
79{
80 inner: Chars<'a>,
81 _marker: PhantomData<C>,
82}
83
84pub(crate) const SPEC_CHAR_NBSP: char = '\u{a0}';
85pub(crate) const SPEC_CHAR_ZWSP: char = '\u{200b}';
86pub(crate) const SPEC_CHAR_SHY: char = '\u{ad}';
87
88fn is_word_char(c: char) -> bool {
89 (!c.is_whitespace() || c == SPEC_CHAR_NBSP) && ![SPEC_CHAR_ZWSP, SPEC_CHAR_SHY].contains(&c)
92}
93
94fn is_space_char(c: char) -> bool {
95 c.is_whitespace() && !['\n', '\r', '\t', SPEC_CHAR_NBSP].contains(&c) || c == SPEC_CHAR_ZWSP
98}
99
100impl<'a, C> Parser<'a, C>
101where
102 C: PixelColor,
103{
104 #[inline]
106 #[must_use]
107 pub fn parse(text: &'a str) -> Self {
108 Self {
109 inner: text.chars(),
110 _marker: PhantomData,
111 }
112 }
113
114 pub fn as_str(&self) -> &str {
115 self.inner.as_str()
116 }
117
118 fn consume_string(&mut self, string: &'a str, c: char) -> &'a str {
119 let offset = {
121 let ptr_start = string.as_ptr() as usize;
122 let ptr_cur = self.inner.as_str().as_ptr() as usize;
123 ptr_cur - ptr_start - c.len_utf8()
124 };
125
126 debug_assert!(string.is_char_boundary(offset));
127
128 unsafe {
129 self.inner = string.get_unchecked(offset..).chars();
132
133 string.get_unchecked(0..offset)
134 }
135 }
136}
137
138impl<'a, C> Iterator for Parser<'a, C>
139where
140 C: PixelColor,
141{
142 type Item = Token<'a, C>;
143
144 #[inline]
145 fn next(&mut self) -> Option<Self::Item> {
146 let string = self.inner.as_str();
147
148 if let Some(c) = self.inner.next() {
149 if is_word_char(c) {
150 for c in &mut self.inner {
152 if !is_word_char(c) {
153 let consumed = self.consume_string(string, c);
154 return Some(Token::Word(consumed));
155 }
156 }
157
158 Some(Token::Word(string))
160 } else {
161 match c {
162 '\n' => Some(Token::NewLine),
164 '\r' => Some(Token::CarriageReturn),
165 '\t' => Some(Token::Tab),
166 SPEC_CHAR_ZWSP => Some(Token::Whitespace(0, unsafe {
167 string.get_unchecked(0..c.len_utf8())
170 })),
171 SPEC_CHAR_SHY => Some(Token::Break(
172 "-", )),
174
175 _ => {
177 let mut len = 1;
178 for c in &mut self.inner {
179 if is_space_char(c) {
180 if c != SPEC_CHAR_ZWSP {
181 len += 1;
182 }
183 } else {
184 let consumed = self.consume_string(string, c);
185 return Some(Token::Whitespace(len, consumed));
186 }
187 }
188
189 Some(Token::Whitespace(len, string))
191 }
192 }
193 }
194 } else {
195 None
196 }
197 }
198}
199
200#[cfg(test)]
201mod test {
202 use embedded_graphics::pixelcolor::BinaryColor;
203
204 use super::{Parser, Token};
205
206 #[track_caller]
207 pub fn assert_tokens(text: &str, tokens: std::vec::Vec<Token<BinaryColor>>) {
208 assert_eq!(
209 Parser::parse(text).collect::<std::vec::Vec<Token<BinaryColor>>>(),
210 tokens
211 )
212 }
213
214 #[test]
215 fn test_parse() {
216 assert_tokens(
217 "Lorem ipsum \r dolor sit am\u{00AD}et,\tconse😅ctetur adipiscing\nelit",
218 vec![
219 Token::Word("Lorem"),
220 Token::Whitespace(1, " "),
221 Token::Word("ipsum"),
222 Token::Whitespace(1, " "),
223 Token::CarriageReturn,
224 Token::Whitespace(1, " "),
225 Token::Word("dolor"),
226 Token::Whitespace(1, " "),
227 Token::Word("sit"),
228 Token::Whitespace(1, " "),
229 Token::Word("am"),
230 Token::Break("-"),
231 Token::Word("et,"),
232 Token::Tab,
233 Token::Word("conse😅ctetur"),
234 Token::Whitespace(1, " "),
235 Token::Word("adipiscing"),
236 Token::NewLine,
237 Token::Word("elit"),
238 ],
239 );
240 }
241
242 #[test]
243 fn parse_zwsp() {
244 assert_eq!(9, "two\u{200B}words".chars().count());
245
246 assert_tokens(
247 "two\u{200B}words",
248 vec![
249 Token::Word("two"),
250 Token::Whitespace(0, "\u{200B}"),
251 Token::Word("words"),
252 ],
253 );
254
255 assert_tokens(" \u{200B} ", vec![Token::Whitespace(3, " \u{200B} ")]);
257 }
258
259 #[test]
260 fn parse_multibyte_last() {
261 assert_tokens("test😅", vec![Token::Word("test😅")]);
262 }
263
264 #[test]
265 fn parse_nbsp_as_word_char() {
266 assert_eq!(9, "test\u{A0}word".chars().count());
267 assert_tokens("test\u{A0}word", vec![Token::Word("test\u{A0}word")]);
268 assert_tokens(
269 " \u{A0}word",
270 vec![Token::Whitespace(1, " "), Token::Word("\u{A0}word")],
271 );
272 }
273
274 #[test]
275 fn parse_shy_issue_42() {
276 assert_tokens(
277 "foo\u{AD}bar",
278 vec![Token::Word("foo"), Token::Break("-"), Token::Word("bar")],
279 );
280 }
281}