use crate::CowStr;
pub(crate) fn valid(src: &str) -> usize {
use State::*;
let mut n = 0;
let mut state = Start;
for c in src.bytes() {
n += 1;
state = state.step(c);
match state {
Done | Invalid => break,
_ => {}
}
}
if matches!(state, Done) {
n
} else {
0
}
}
#[derive(Clone, Debug, Eq, PartialEq, Default)]
pub struct AttributeValue<'s> {
raw: CowStr<'s>,
}
impl<'s> AttributeValue<'s> {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn parts(&'s self) -> AttributeValueParts<'s> {
AttributeValueParts { ahead: &self.raw }
}
fn extend(&mut self, s: &'s str) {
if s.is_empty() {
return;
}
if !self.raw.is_empty() {
self.extend_raw(" ");
}
self.extend_raw(s);
}
fn extend_raw(&mut self, s: &'s str) {
match &mut self.raw {
CowStr::Borrowed(prev) => {
if prev.is_empty() {
*prev = s;
} else {
self.raw = format!("{prev}{s}").into();
}
}
CowStr::Owned(prev) => {
if prev.is_empty() {
self.raw = s.into();
} else {
prev.push_str(s);
}
}
}
}
}
impl<'s> From<&'s str> for AttributeValue<'s> {
fn from(value: &'s str) -> Self {
Self { raw: value.into() }
}
}
impl<'s> From<CowStr<'s>> for AttributeValue<'s> {
fn from(value: CowStr<'s>) -> Self {
Self { raw: value }
}
}
impl From<String> for AttributeValue<'_> {
fn from(value: String) -> Self {
Self { raw: value.into() }
}
}
impl std::fmt::Display for AttributeValue<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.parts().try_for_each(|part| f.write_str(part))
}
}
pub struct AttributeValueParts<'s> {
ahead: &'s str,
}
impl<'s> Iterator for AttributeValueParts<'s> {
type Item = &'s str;
fn next(&mut self) -> Option<Self::Item> {
for (i, _) in self.ahead.match_indices('\\') {
match self.ahead.as_bytes().get(i + 1) {
Some(b'\\') => {
let next = &self.ahead[..i + 1];
self.ahead = &self.ahead[i + 2..];
return Some(next);
}
Some(c) if c.is_ascii_punctuation() => {
let next = &self.ahead[..i];
self.ahead = &self.ahead[i + 1..];
return Some(next);
}
_ => {}
}
}
(!self.ahead.is_empty()).then(|| std::mem::take(&mut self.ahead))
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum AttributeKind<'s> {
Class,
Id,
Pair { key: CowStr<'s> },
Comment,
}
impl AttributeKind<'_> {
#[must_use]
pub fn key(&self) -> Option<&str> {
match self {
AttributeKind::Class => Some("class"),
AttributeKind::Id => Some("id"),
AttributeKind::Pair { key } => Some(key.as_ref()),
AttributeKind::Comment => None,
}
}
}
#[derive(Clone, PartialEq, Eq, Default)]
pub struct Attributes<'s>(Vec<AttributeElem<'s>>);
type AttributeElem<'s> = (AttributeKind<'s>, AttributeValue<'s>);
impl<'s> Attributes<'s> {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub(crate) fn take(&mut self) -> Self {
std::mem::take(self)
}
pub(crate) fn parse(&mut self, input: &'s str) -> Result<(), usize> {
let mut parser = Parser::new(self.take());
let input = input.trim_end_matches(|c: char| c.is_ascii_whitespace());
let n = parser.parse(input)?;
if n == input.len() && matches!(parser.state, State::Done) {
*self = parser.finish();
Ok(())
} else {
Err(n)
}
}
#[must_use]
pub fn contains_key(&self, key: &str) -> bool {
self.0
.iter()
.any(|(k, _)| matches!(k.key(), Some(k) if k == key))
}
#[must_use]
pub fn get_value(&self, key: &str) -> Option<AttributeValue<'_>> {
if key == "class" {
let mut value = AttributeValue::new();
for (k, v) in &self.0 {
if k.key() == Some("class") {
value.extend(&v.raw);
}
}
if value.raw.is_empty() {
None
} else {
Some(value)
}
} else {
self.0
.iter()
.rfind(|(k, _)| k.key() == Some(key))
.map(|(_, v)| v.clone())
}
}
#[must_use]
pub fn unique_pairs<'a>(&'a self) -> AttributePairsIter<'a, 's> {
AttributePairsIter {
attrs: &self.0,
pos: 0,
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct ParseAttributesError {
pub pos: usize,
}
impl<'s> TryFrom<&'s str> for Attributes<'s> {
type Error = ParseAttributesError;
fn try_from(s: &'s str) -> Result<Self, Self::Error> {
let mut a = Attributes::new();
match a.parse(s) {
Ok(()) => Ok(a),
Err(pos) => Err(ParseAttributesError { pos }),
}
}
}
impl<'s> From<Vec<AttributeElem<'s>>> for Attributes<'s> {
fn from(v: Vec<AttributeElem<'s>>) -> Self {
Self(v)
}
}
impl<'s> From<Attributes<'s>> for Vec<AttributeElem<'s>> {
fn from(a: Attributes<'s>) -> Self {
a.0
}
}
impl<'s> std::ops::Deref for Attributes<'s> {
type Target = Vec<AttributeElem<'s>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl std::ops::DerefMut for Attributes<'_> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl<'s> FromIterator<AttributeElem<'s>> for Attributes<'s> {
fn from_iter<I: IntoIterator<Item = AttributeElem<'s>>>(iter: I) -> Self {
Attributes(iter.into_iter().collect())
}
}
impl std::fmt::Debug for Attributes<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{{")?;
let mut first = true;
for (k, v) in self {
if !first {
write!(f, " ")?;
}
first = false;
match k {
AttributeKind::Class => write!(f, ".{}", v.raw)?,
AttributeKind::Id => write!(f, "#{}", v.raw)?,
AttributeKind::Pair { key } => write!(f, "{key}=\"{}\"", v.raw)?,
AttributeKind::Comment => write!(f, "%{}%", v.raw)?,
}
}
write!(f, "}}")
}
}
impl<'s> IntoIterator for Attributes<'s> {
type Item = AttributeElem<'s>;
type IntoIter = std::vec::IntoIter<AttributeElem<'s>>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl<'i, 's> IntoIterator for &'i Attributes<'s> {
type Item = &'i AttributeElem<'s>;
type IntoIter = std::slice::Iter<'i, AttributeElem<'s>>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter()
}
}
impl<'i, 's> IntoIterator for &'i mut Attributes<'s> {
type Item = &'i mut AttributeElem<'s>;
type IntoIter = std::slice::IterMut<'i, AttributeElem<'s>>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter_mut()
}
}
pub struct AttributePairsIter<'a, 's> {
attrs: &'a [AttributeElem<'s>],
pos: usize,
}
impl<'a: 's, 's> Iterator for AttributePairsIter<'a, 's> {
type Item = (&'s str, AttributeValue<'s>);
fn next(&mut self) -> Option<Self::Item> {
while let Some((key, value)) = self.attrs[self.pos..].first() {
self.pos += 1;
let Some(key) = key.key() else { continue };
if self.attrs[..self.pos - 1]
.iter()
.any(|(k, _)| k.key() == Some(key))
{
continue; }
if key == "class" {
let mut value = value.clone();
for (k, v) in &self.attrs[self.pos..] {
if k.key() == Some("class") {
value.extend(&v.raw);
}
}
return Some((key, value));
}
if let Some((_, v)) = self.attrs[self.pos..]
.iter()
.rfind(|(k, _)| k.key() == Some(key))
{
return Some((key, v.clone())); }
return Some((key, value.clone()));
}
None
}
}
#[derive(Clone)]
pub(crate) struct Validator {
state: State,
}
impl Validator {
pub(crate) fn new() -> Self {
Self {
state: State::Start,
}
}
pub(crate) fn restart(&mut self) {
self.state = State::Start;
}
pub(crate) fn parse(&mut self, input: &str) -> Option<usize> {
let mut bytes = input.bytes();
for c in &mut bytes {
self.state = self.state.step(c);
match self.state {
State::Done => return Some(input.len() - bytes.len()),
State::Invalid => return Some(0),
_ => {}
}
}
None
}
}
pub(crate) struct Parser<'s> {
attrs: Attributes<'s>,
state: State,
}
impl<'s> Parser<'s> {
pub(crate) fn new(attrs: Attributes<'s>) -> Self {
Self {
attrs,
state: State::Start,
}
}
pub(crate) fn parse(&mut self, input: &'s str) -> Result<usize, usize> {
use State::*;
let mut pos_prev = 0;
for (pos, c) in input.bytes().enumerate() {
let state_next = self.state.step(c);
if matches!(state_next, Invalid) {
return Err(pos);
}
let st = std::mem::replace(&mut self.state, state_next);
if st != self.state && !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape))
{
let content = &input[pos_prev..pos];
pos_prev = pos;
match st {
Class => self.attrs.push((AttributeKind::Class, content.into())),
Identifier => self.attrs.push((AttributeKind::Id, content.into())),
Key => self.attrs.push((
AttributeKind::Pair {
key: content.into(),
},
"".into(),
)),
Value | ValueQuoted | ValueContinued => {
let last = self.attrs.len() - 1;
self.attrs.0[last]
.1
.extend(&content[usize::from(matches!(st, ValueQuoted))..]);
}
Comment | CommentNewline => {
let last = self.attrs.len() - 1;
self.attrs.0[last].1.extend_raw(if matches!(st, Comment) {
content
} else {
"\n"
});
}
CommentFirst => self.attrs.push((AttributeKind::Comment, "".into())),
_ => {}
}
}
debug_assert!(!matches!(self.state, Invalid));
if matches!(self.state, Done) {
if input[pos + 1..].starts_with('{') {
self.state = Start;
} else {
return Ok(pos + 1);
}
}
}
Ok(input.len())
}
pub(crate) fn finish(self) -> Attributes<'s> {
self.attrs
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum State {
Start,
Whitespace,
CommentFirst,
Comment,
CommentNewline,
ClassFirst,
Class,
IdentifierFirst,
Identifier,
Key,
ValueFirst,
Value,
ValueQuoted,
ValueEscape,
ValueNewline,
ValueContinued,
Done,
Invalid,
}
impl State {
fn step(self, c: u8) -> State {
use State::*;
match self {
Start if c == b'{' => Whitespace,
Start => Invalid,
Whitespace => match c {
b'}' => Done,
b'.' => ClassFirst,
b'#' => IdentifierFirst,
b'%' => CommentFirst,
c if is_name(c) => Key,
c if c.is_ascii_whitespace() => Whitespace,
_ => Invalid,
},
CommentFirst | Comment | CommentNewline if c == b'%' => Whitespace,
CommentFirst | Comment | CommentNewline if c == b'}' => Done,
CommentFirst | Comment | CommentNewline if c == b'\n' => CommentNewline,
CommentFirst | Comment | CommentNewline => Comment,
ClassFirst if is_name(c) => Class,
ClassFirst => Invalid,
IdentifierFirst if is_name(c) => Identifier,
IdentifierFirst => Invalid,
s @ (Class | Identifier | Value) if is_name(c) => s,
Class | Identifier | Value if c.is_ascii_whitespace() => Whitespace,
Class | Identifier | Value if c == b'}' => Done,
Class | Identifier | Value => Invalid,
Key if is_name(c) => Key,
Key if c == b'=' => ValueFirst,
Key => Invalid,
ValueFirst if is_name(c) => Value,
ValueFirst if c == b'"' => ValueQuoted,
ValueFirst => Invalid,
ValueQuoted | ValueNewline | ValueContinued if c == b'"' => Whitespace,
ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == b'\n' => ValueNewline,
ValueQuoted if c == b'\\' => ValueEscape,
ValueQuoted | ValueEscape => ValueQuoted,
ValueNewline | ValueContinued => ValueContinued,
Invalid | Done => panic!("{self:?}"),
}
}
}
pub(crate) fn is_name(c: u8) -> bool {
c.is_ascii_alphanumeric() || matches!(c, b':' | b'_' | b'-')
}