extern crate alloc;
use alloc::{borrow::Cow, string::String};
#[allow(unused_imports)]
#[cfg(all(not(feature = "std"), feature = "no-std-unix-debug"))]
use crate::println;
use crate::{
ast::Attributes,
text::{self},
util::{is_punct, is_space, resolve_entity_references, resolve_numeric_references, TinyVec},
};
pub fn parse_attributes<'a>(reader: &mut impl text::Reader<'a>) -> Option<Attributes> {
let (saved_line, saved_position) = reader.position();
reader.skip_spaces();
if reader.peek_byte() != b'{' {
reader.set_position(saved_line, saved_position);
return None;
}
reader.advance(1);
let mut attrs = Attributes::new();
loop {
if reader.peek_byte() == b'}' {
reader.advance(1);
return Some(attrs);
}
if let Some((name, value)) = parse_attribute(reader) {
if name == "class" && attrs.contains_key("class") {
let s = String::from(attrs.get("class").unwrap().str(reader.source()));
attrs.insert(name, (s + " " + &value.str(reader.source())).into());
} else {
attrs.insert(name, value);
}
reader.skip_spaces();
if reader.peek_byte() == b',' {
reader.advance(1);
}
reader.skip_spaces();
} else {
reader.set_position(saved_line, saved_position);
return None;
}
}
}
fn parse_attribute<'a>(
reader: &mut impl text::Reader<'a>,
) -> Option<(String, text::MultilineValue)> {
reader.skip_spaces();
let c = reader.peek_byte();
if c == b'#' || c == b'.' {
reader.advance(1);
let (line, seg) = reader.peek_line_bytes()?;
if line.is_empty() {
return None;
}
let i = line
.iter()
.take_while(|&&b| {
!is_space(b) && (!is_punct(b) || b == b'_' || b == b'-' || b == b':' || b == b'.')
})
.count();
reader.advance(i);
if c == b'#' {
return Some(("id".into(), seg.with_stop(seg.start() + i).into()));
}
return Some(("class".into(), seg.with_stop(seg.start() + i).into()));
}
let (line, _) = reader.peek_line_bytes()?;
if line.is_empty() {
return None;
}
let c = line[0];
if !(c.is_ascii_alphabetic() || c == b'_' || c == b':') {
return None;
}
let i = line
.iter()
.take_while(|&&b| {
b.is_ascii_alphabetic()
|| b.is_ascii_digit()
|| b == b'_'
|| b == b'-'
|| b == b':'
|| b == b'.'
})
.count();
let name = &line[0..i];
reader.advance(i);
reader.skip_spaces();
if reader.peek_byte() != b'=' {
return None;
}
reader.advance(1); let value = parse_attribute_value(reader)?;
Some((String::from_utf8_lossy(name).into_owned(), value))
}
fn parse_attribute_value<'a>(reader: &mut impl text::Reader<'a>) -> Option<text::MultilineValue> {
reader.skip_spaces();
match reader.peek_byte() {
b'"' => parse_quoted_attribute_value(reader, b'"'),
b'\'' => parse_quoted_attribute_value(reader, b'\''),
_ => parse_unquoted_attribute_value(reader),
}
}
fn parse_quoted_attribute_value<'a>(
reader: &mut impl text::Reader<'a>,
q: u8,
) -> Option<text::MultilineValue> {
reader.advance(1); let mut value = TinyVec::<text::Index>::empty();
let mut break_loop = false;
loop {
let (line, mut s) = reader.peek_line_bytes()?;
if let Some(i) = memchr::memchr(q, &line) {
reader.advance(i + 1);
s = s.with_stop(s.start() + i);
break_loop = true;
} else {
reader.advance_line();
}
value.push(s.into());
if break_loop {
break;
}
}
if !break_loop {
return None;
}
if value.len() == 1 {
let resolved =
resolve_numeric_references(resolve_entity_references(value[0].bytes(reader.source())));
Some(match resolved {
Cow::Borrowed(_) => value.into(),
Cow::Owned(s) => s.into(),
})
} else {
let mut result = String::new();
let mut has_resolved = false;
for idx in &value {
let resolved =
resolve_numeric_references(resolve_entity_references(idx.bytes(reader.source())));
result.push_str(unsafe { core::str::from_utf8_unchecked(&resolved) });
if matches!(resolved, Cow::Owned(_)) {
has_resolved = true;
}
}
if has_resolved {
Some(result.into())
} else {
Some(value.into())
}
}
}
fn parse_unquoted_attribute_value<'a>(
reader: &mut impl text::Reader<'a>,
) -> Option<text::MultilineValue> {
let (line, mut s) = reader.peek_line_bytes()?;
let i = line
.iter()
.take_while(|&&b| {
!is_space(b)
&& b != b'}'
&& b != b'"'
&& b != b'\''
&& b != b'='
&& b != b'<'
&& b != b'>'
&& b != b'`'
&& b != b','
})
.count();
if i == 0 {
return None;
}
reader.advance(i);
s = s.with_stop(s.start() + i);
let resolved = resolve_numeric_references(resolve_entity_references(s.bytes(reader.source())));
Some(match resolved {
Cow::Borrowed(_) => s.into(),
Cow::Owned(s) => s.into(),
})
}
#[cfg(test)]
mod tests {
use super::*;
#[allow(unused_imports)]
#[cfg(all(not(feature = "std"), feature = "no-std-unix-debug"))]
use crate::println;
use crate::text::Reader;
#[test]
fn test_parse_attributes() {
let source = "{#my-id .class1 .class2 title=\"My &Title\" attr=aaa} rest of line";
let mut reader = text::BasicReader::new(source);
let attrs = parse_attributes(&mut reader).unwrap();
assert_eq!(attrs.get("id").unwrap().str(source), "my-id");
assert!(matches!(
attrs.get("id").unwrap(),
&text::MultilineValue::Indices(_)
));
assert_eq!(attrs.get("class").unwrap().str(source), "class1 class2");
assert!(matches!(
attrs.get("class").unwrap(),
&text::MultilineValue::String(_)
));
assert_eq!(attrs.get("title").unwrap().str(source), "My &Title");
assert!(matches!(
attrs.get("title").unwrap(),
&text::MultilineValue::String(_)
));
assert_eq!(attrs.get("attr").unwrap().str(source), "aaa");
assert!(matches!(
attrs.get("attr").unwrap(),
&text::MultilineValue::Indices(_)
));
let (line, _) = reader.peek_line().unwrap();
assert_eq!(line.as_ref(), " rest of line");
}
#[test]
fn test_parse_attributes_multiline() {
let source = "{title=\"This is a \nmultiline title\"} rest";
let mut reader = text::BasicReader::new(source);
let attrs = parse_attributes(&mut reader).unwrap();
assert_eq!(
attrs.get("title").unwrap().str(source),
"This is a \nmultiline title"
);
assert!(matches!(
attrs.get("title").unwrap(),
&text::MultilineValue::Indices(_)
));
let (line, _) = reader.peek_line().unwrap();
assert_eq!(line.as_ref(), " rest");
}
}