use core::{marker::PhantomData, str::Chars};
use embedded_graphics::{prelude::PixelColor, text::DecorationColor};
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub enum ChangeTextStyle<C> {
Reset,
TextColor(Option<C>),
BackgroundColor(Option<C>),
Underline(DecorationColor<C>),
Strikethrough(DecorationColor<C>),
}
#[derive(Debug, PartialEq, Clone)]
pub enum Token<'a, C> {
NewLine,
CarriageReturn,
Tab,
Whitespace(u32, &'a str),
Word(&'a str),
Break(&'a str),
ChangeTextStyle(ChangeTextStyle<C>),
MoveCursor {
chars: i32,
draw_background: bool,
},
}
#[derive(Clone, Debug)]
pub(crate) struct Parser<'a, C>
where
C: PixelColor,
{
inner: Chars<'a>,
_marker: PhantomData<C>,
}
pub(crate) const SPEC_CHAR_NBSP: char = '\u{a0}';
pub(crate) const SPEC_CHAR_ZWSP: char = '\u{200b}';
pub(crate) const SPEC_CHAR_SHY: char = '\u{ad}';
fn is_word_char(c: char) -> bool {
(!c.is_whitespace() || c == SPEC_CHAR_NBSP) && ![SPEC_CHAR_ZWSP, SPEC_CHAR_SHY].contains(&c)
}
fn is_space_char(c: char) -> bool {
c.is_whitespace() && !['\n', '\r', '\t', SPEC_CHAR_NBSP].contains(&c) || c == SPEC_CHAR_ZWSP
}
impl<'a, C> Parser<'a, C>
where
C: PixelColor,
{
#[inline]
#[must_use]
pub fn parse(text: &'a str) -> Self {
Self {
inner: text.chars(),
_marker: PhantomData,
}
}
pub fn as_str(&self) -> &str {
self.inner.as_str()
}
fn consume_string(&mut self, string: &'a str, c: char) -> &'a str {
let offset = {
let ptr_start = string.as_ptr() as usize;
let ptr_cur = self.inner.as_str().as_ptr() as usize;
ptr_cur - ptr_start - c.len_utf8()
};
debug_assert!(string.is_char_boundary(offset));
unsafe {
self.inner = string.get_unchecked(offset..).chars();
string.get_unchecked(0..offset)
}
}
}
impl<'a, C> Iterator for Parser<'a, C>
where
C: PixelColor,
{
type Item = Token<'a, C>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let string = self.inner.as_str();
if let Some(c) = self.inner.next() {
if is_word_char(c) {
for c in &mut self.inner {
if !is_word_char(c) {
let consumed = self.consume_string(string, c);
return Some(Token::Word(consumed));
}
}
Some(Token::Word(string))
} else {
match c {
'\n' => Some(Token::NewLine),
'\r' => Some(Token::CarriageReturn),
'\t' => Some(Token::Tab),
SPEC_CHAR_ZWSP => Some(Token::Whitespace(0, unsafe {
string.get_unchecked(0..c.len_utf8())
})),
SPEC_CHAR_SHY => Some(Token::Break(
"-", )),
_ => {
let mut len = 1;
for c in &mut self.inner {
if is_space_char(c) {
if c != SPEC_CHAR_ZWSP {
len += 1;
}
} else {
let consumed = self.consume_string(string, c);
return Some(Token::Whitespace(len, consumed));
}
}
Some(Token::Whitespace(len, string))
}
}
}
} else {
None
}
}
}
#[cfg(test)]
mod test {
use embedded_graphics::pixelcolor::BinaryColor;
use super::{Parser, Token};
#[track_caller]
pub fn assert_tokens(text: &str, tokens: std::vec::Vec<Token<BinaryColor>>) {
assert_eq!(
Parser::parse(text).collect::<std::vec::Vec<Token<BinaryColor>>>(),
tokens
)
}
#[test]
fn test_parse() {
assert_tokens(
"Lorem ipsum \r dolor sit am\u{00AD}et,\tconse😅ctetur adipiscing\nelit",
vec![
Token::Word("Lorem"),
Token::Whitespace(1, " "),
Token::Word("ipsum"),
Token::Whitespace(1, " "),
Token::CarriageReturn,
Token::Whitespace(1, " "),
Token::Word("dolor"),
Token::Whitespace(1, " "),
Token::Word("sit"),
Token::Whitespace(1, " "),
Token::Word("am"),
Token::Break("-"),
Token::Word("et,"),
Token::Tab,
Token::Word("conse😅ctetur"),
Token::Whitespace(1, " "),
Token::Word("adipiscing"),
Token::NewLine,
Token::Word("elit"),
],
);
}
#[test]
fn parse_zwsp() {
assert_eq!(9, "two\u{200B}words".chars().count());
assert_tokens(
"two\u{200B}words",
vec![
Token::Word("two"),
Token::Whitespace(0, "\u{200B}"),
Token::Word("words"),
],
);
assert_tokens(" \u{200B} ", vec![Token::Whitespace(3, " \u{200B} ")]);
}
#[test]
fn parse_multibyte_last() {
assert_tokens("test😅", vec![Token::Word("test😅")]);
}
#[test]
fn parse_nbsp_as_word_char() {
assert_eq!(9, "test\u{A0}word".chars().count());
assert_tokens("test\u{A0}word", vec![Token::Word("test\u{A0}word")]);
assert_tokens(
" \u{A0}word",
vec![Token::Whitespace(1, " "), Token::Word("\u{A0}word")],
);
}
#[test]
fn parse_shy_issue_42() {
assert_tokens(
"foo\u{AD}bar",
vec![Token::Word("foo"), Token::Break("-"), Token::Word("bar")],
);
}
}