dom-cat 0.1.0

Persistent DOM model: arena-backed Node tree with mutation API and CSS-selector matching. Consumes html-cat trees; selectors via css-cat. No mut, no Rc/Arc, no interior mutability, no panics, exhaustive matches. Third sub-crate of a Servo-replacement webview runtime targeting Tauri.
//! In-crate selector parser.
//!
//! `css_cat::parse` strips whitespace before selector recognition, which
//! loses the descendant combinator.  This module re-parses just the
//! selector source from raw `css_cat::tokenizer` output, treating
//! whitespace as the descendant combinator and `>` / `+` / `~` as their
//! explicit forms.

use css_cat::token::CssToken;
use css_cat::{
    AttrOperator, Combinator, ComplexSelector, CompoundSelector, SelectorList, SimpleSelector,
    span::{Position, Span},
};

use crate::error::Error;

/// Parse `source` into a [`SelectorList`].
///
/// # Errors
///
/// Surfaces an [`Error::InvalidSelector`] when no usable selector
/// survives parsing.
pub fn parse_selectors(source: &str) -> Result<SelectorList, Error> {
    let tokens = css_cat::tokenizer::tokenize(source);
    let groups = split_by_comma(&tokens, 0, Vec::new(), Vec::new());
    let selectors: Vec<ComplexSelector> = groups
        .into_iter()
        .filter_map(|group| parse_complex(&group))
        .collect();
    if selectors.is_empty() {
        Err(Error::InvalidSelector {
            selector: source.to_owned(),
        })
    } else {
        Ok(SelectorList::new(selectors))
    }
}

fn split_by_comma(
    tokens: &[CssToken],
    pos: usize,
    current: Vec<CssToken>,
    groups: Vec<Vec<CssToken>>,
) -> Vec<Vec<CssToken>> {
    match tokens.get(pos) {
        None => {
            if current.is_empty() {
                groups
            } else {
                groups.into_iter().chain(std::iter::once(current)).collect()
            }
        }
        Some(CssToken::Comma(_)) => split_by_comma(
            tokens,
            pos + 1,
            Vec::new(),
            groups.into_iter().chain(std::iter::once(current)).collect(),
        ),
        Some(t) => split_by_comma(
            tokens,
            pos + 1,
            current
                .into_iter()
                .chain(std::iter::once(t.clone()))
                .collect(),
            groups,
        ),
    }
}

fn parse_complex(tokens: &[CssToken]) -> Option<ComplexSelector> {
    let trimmed = trim_whitespace(tokens);
    let start = skip_whitespace(&trimmed, 0);
    let (head, after_head) = parse_compound(&trimmed, start)?;
    let (tail, _end) = parse_tail(&trimmed, after_head, Vec::new());
    let synthetic = Position::new(1, 1, 0);
    let span = Span::new(
        trimmed.first().map_or(synthetic, |t| t.span().start()),
        trimmed.last().map_or(synthetic, |t| t.span().end()),
    );
    Some(ComplexSelector::new(head, tail, span))
}

fn trim_whitespace(tokens: &[CssToken]) -> Vec<CssToken> {
    let lead = skip_whitespace(tokens, 0);
    let tail_start = trim_trailing_whitespace(tokens, tokens.len());
    tokens
        .get(lead..tail_start)
        .map_or(Vec::new(), <[_]>::to_vec)
}

fn trim_trailing_whitespace(tokens: &[CssToken], end: usize) -> usize {
    if end == 0 {
        0
    } else {
        let prev = end - 1;
        if matches!(tokens.get(prev), Some(t) if t.is_trivia()) {
            trim_trailing_whitespace(tokens, prev)
        } else {
            end
        }
    }
}

fn parse_tail(
    tokens: &[CssToken],
    pos: usize,
    acc: Vec<(Combinator, CompoundSelector)>,
) -> (Vec<(Combinator, CompoundSelector)>, usize) {
    if pos >= tokens.len() {
        (acc, pos)
    } else {
        let (combinator, after_comb) = parse_combinator(tokens, pos);
        if let Some((compound, after_compound)) = parse_compound(tokens, after_comb) {
            parse_tail(
                tokens,
                after_compound,
                acc.into_iter()
                    .chain(std::iter::once((combinator, compound)))
                    .collect(),
            )
        } else {
            (acc, pos)
        }
    }
}

fn parse_combinator(tokens: &[CssToken], pos: usize) -> (Combinator, usize) {
    let after_ws = skip_whitespace(tokens, pos);
    let saw_ws = after_ws > pos;
    match tokens.get(after_ws) {
        Some(CssToken::Delim('>', _)) => (Combinator::Child, skip_whitespace(tokens, after_ws + 1)),
        Some(CssToken::Delim('+', _)) => (
            Combinator::AdjacentSibling,
            skip_whitespace(tokens, after_ws + 1),
        ),
        Some(CssToken::Delim('~', _)) => (
            Combinator::GeneralSibling,
            skip_whitespace(tokens, after_ws + 1),
        ),
        _other => {
            if saw_ws {
                (Combinator::Descendant, after_ws)
            } else {
                (Combinator::Descendant, pos)
            }
        }
    }
}

fn skip_whitespace(tokens: &[CssToken], pos: usize) -> usize {
    if matches!(tokens.get(pos), Some(t) if t.is_trivia()) {
        skip_whitespace(tokens, pos + 1)
    } else {
        pos
    }
}

fn parse_compound(tokens: &[CssToken], pos: usize) -> Option<(CompoundSelector, usize)> {
    let (parts, after) = collect_simple(tokens, pos, Vec::new());
    if parts.is_empty() {
        None
    } else {
        Some((CompoundSelector::new(parts), after))
    }
}

fn collect_simple(
    tokens: &[CssToken],
    pos: usize,
    acc: Vec<SimpleSelector>,
) -> (Vec<SimpleSelector>, usize) {
    if let Some((simple, next)) = parse_simple(tokens, pos) {
        let extended = acc.into_iter().chain(std::iter::once(simple)).collect();
        collect_simple(tokens, next, extended)
    } else {
        (acc, pos)
    }
}

fn parse_simple(tokens: &[CssToken], pos: usize) -> Option<(SimpleSelector, usize)> {
    match tokens.get(pos) {
        Some(CssToken::Delim('*', _)) => Some((SimpleSelector::Universal, pos + 1)),
        Some(CssToken::Ident(name, _)) => Some((SimpleSelector::Type(name.clone()), pos + 1)),
        Some(CssToken::Delim('.', _)) => match tokens.get(pos + 1) {
            Some(CssToken::Ident(name, _)) => Some((SimpleSelector::Class(name.clone()), pos + 2)),
            _other => None,
        },
        Some(CssToken::Hash(name, _)) => Some((SimpleSelector::Id(name.clone()), pos + 1)),
        Some(CssToken::Colon(_)) => parse_pseudo(tokens, pos + 1),
        Some(CssToken::LBracket(_)) => parse_attribute(tokens, pos + 1),
        _other => None,
    }
}

fn parse_pseudo(tokens: &[CssToken], pos: usize) -> Option<(SimpleSelector, usize)> {
    match tokens.get(pos) {
        Some(CssToken::Ident(name, _)) => Some((
            SimpleSelector::PseudoClass {
                name: name.clone(),
                argument: None,
            },
            pos + 1,
        )),
        Some(CssToken::Function(name, _)) => {
            let (end, argument) = collect_until_rparen(tokens, pos + 1, String::new());
            Some((
                SimpleSelector::PseudoClass {
                    name: name.clone(),
                    argument: Some(argument),
                },
                end,
            ))
        }
        _other => None,
    }
}

fn collect_until_rparen(tokens: &[CssToken], pos: usize, acc: String) -> (usize, String) {
    match tokens.get(pos) {
        None => (pos, acc),
        Some(CssToken::RParen(_)) => (pos + 1, acc),
        Some(t) => {
            let piece = match t {
                CssToken::Ident(n, _) => n.clone(),
                CssToken::Number(v, _, _) => format!("{v}"),
                CssToken::Delim(c, _) => c.to_string(),
                _other => String::new(),
            };
            collect_until_rparen(tokens, pos + 1, format!("{acc}{piece}"))
        }
    }
}

fn parse_attribute(tokens: &[CssToken], pos: usize) -> Option<(SimpleSelector, usize)> {
    let (name, after_name) = match tokens.get(pos) {
        Some(CssToken::Ident(n, _)) => (n.clone(), pos + 1),
        _other => return None,
    };
    match tokens.get(after_name) {
        Some(CssToken::RBracket(_)) => Some((
            SimpleSelector::Attribute {
                name,
                op: None,
                value: None,
            },
            after_name + 1,
        )),
        Some(CssToken::Op2(op_str, _)) => {
            parse_attr_value(tokens, after_name + 1, name, op2_to_op(op_str))
        }
        Some(CssToken::Delim('=', _)) => {
            parse_attr_value(tokens, after_name + 1, name, Some(AttrOperator::Equals))
        }
        _other => None,
    }
}

fn op2_to_op(s: &str) -> Option<AttrOperator> {
    match s {
        "~=" => Some(AttrOperator::Includes),
        "|=" => Some(AttrOperator::DashMatch),
        "^=" => Some(AttrOperator::Prefix),
        "$=" => Some(AttrOperator::Suffix),
        "*=" => Some(AttrOperator::Substring),
        _other => None,
    }
}

fn parse_attr_value(
    tokens: &[CssToken],
    pos: usize,
    name: String,
    op: Option<AttrOperator>,
) -> Option<(SimpleSelector, usize)> {
    let (value, after_value) = match tokens.get(pos) {
        Some(CssToken::String(s, _) | CssToken::Ident(s, _)) => (s.clone(), pos + 1),
        _other => return None,
    };
    match tokens.get(after_value) {
        Some(CssToken::RBracket(_)) => Some((
            SimpleSelector::Attribute {
                name,
                op,
                value: Some(value),
            },
            after_value + 1,
        )),
        _other => None,
    }
}