#[derive(Debug, PartialEq, Eq)]
pub enum TextToken<'a> {
Literal(&'a str),
Expr(&'a str),
MarkupOpen {
name: &'a str,
properties: Vec<(&'a str, &'a str)>,
},
MarkupClose {
name: &'a str,
},
MarkupSelfClose {
name: &'a str,
properties: Vec<(&'a str, &'a str)>,
},
}
#[derive(Debug, PartialEq, Eq)]
pub enum MarkupScanError {
UnclosedBrace(usize),
UnclosedBracket(usize),
}
pub fn scan_text_segments(text: &str) -> Result<Vec<TextToken<'_>>, MarkupScanError> {
let mut tokens = Vec::new();
let bytes = text.as_bytes();
let mut i = 0usize;
let mut lit_start = 0usize;
macro_rules! flush_literal {
() => {
if lit_start < i {
tokens.push(TextToken::Literal(&text[lit_start..i]));
}
};
}
while i < bytes.len() {
match bytes[i] {
b'{' => {
let brace_start = i;
let rest = &text[i + 1..];
let close = rest
.find('}')
.ok_or(MarkupScanError::UnclosedBrace(brace_start))?;
flush_literal!();
tokens.push(TextToken::Expr(&rest[..close]));
i = i + 1 + close + 1;
lit_start = i;
}
b'[' => {
let bracket_start = i;
let rest = &text[i + 1..];
let close_rel = rest
.find(']')
.ok_or(MarkupScanError::UnclosedBracket(bracket_start))?;
let inner = &rest[..close_rel];
if let Some(tok) = try_parse_markup(inner) {
flush_literal!();
tokens.push(tok);
i = i + 1 + close_rel + 1;
lit_start = i;
} else {
i += 1;
}
}
_ => {
i += 1;
}
}
}
if lit_start < text.len() {
tokens.push(TextToken::Literal(&text[lit_start..]));
}
Ok(tokens)
}
fn is_identifier(s: &str) -> bool {
let mut chars = s.chars();
chars.next().is_some_and(|c| {
(c.is_ascii_alphabetic() || c == '_')
&& chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
})
}
fn parse_properties(s: &str) -> Option<Vec<(&str, &str)>> {
if s.is_empty() {
return Some(Vec::new());
}
let mut props = Vec::new();
for part in s.split_whitespace() {
let eq = part.find('=')?;
let key = &part[..eq];
let val = &part[eq + 1..];
if !is_identifier(key) {
return None;
}
props.push((key, val));
}
Some(props)
}
fn try_parse_markup(inner: &str) -> Option<TextToken<'_>> {
if let Some(name_part) = inner.strip_prefix('/') {
let name = name_part.trim_start();
if is_identifier(name) && name.len() == name_part.len() {
return Some(TextToken::MarkupClose { name });
}
return None;
}
let (content, self_close) = inner
.strip_suffix(" /")
.map_or((inner, false), |rest| (rest, true));
let (name, props_src) = content
.find(' ')
.map_or((content, ""), |sp| (&content[..sp], &content[sp + 1..]));
if !is_identifier(name) {
return None;
}
let properties = parse_properties(props_src)?;
if self_close {
Some(TextToken::MarkupSelfClose { name, properties })
} else {
Some(TextToken::MarkupOpen { name, properties })
}
}
#[cfg(test)]
#[path = "markup_tests.rs"]
mod tests;