use super::{
error::{CompilationError, CompilationErrorKind as ErrorKind, ErrorHandler},
scanner::{Attribute, AttributeValue, Tag, TextMode, Token, TokenSource},
util::{find_dir, is_core_component, no, non_whitespace, yes, VStr},
Name, Namespace, SourceLocation,
};
#[cfg(feature = "serde")]
use serde::Serialize;
use smallvec::{smallvec, SmallVec};
use std::ops::Deref;
#[cfg_attr(feature = "serde", derive(Serialize))]
pub enum AstNode<'a> {
Element(Element<'a>),
Text(TextNode<'a>),
Interpolation(SourceNode<'a>),
Comment(SourceNode<'a>),
}
impl<'a> AstNode<'a> {
pub fn get_element(&self) -> Option<&Element<'a>> {
match self {
AstNode::Element(e) => Some(e),
_ => None,
}
}
pub fn get_element_mut(&mut self) -> Option<&mut Element<'a>> {
match self {
AstNode::Element(e) => Some(e),
_ => None,
}
}
pub fn into_element(self) -> Element<'a> {
match self {
AstNode::Element(e) => e,
_ => panic!("call into_element on non-element AstNode"),
}
}
pub fn get_location(&self) -> &SourceLocation {
match self {
Self::Element(e) => &e.location,
Self::Text(t) => &t.location,
Self::Interpolation(i) => &i.location,
Self::Comment(c) => &c.location,
}
}
}
#[cfg_attr(feature = "serde", derive(Serialize))]
pub struct SourceNode<'a> {
pub source: &'a str,
pub location: SourceLocation,
}
pub struct TextNode<'a> {
pub text: SmallVec<[VStr<'a>; 1]>,
pub location: SourceLocation,
}
#[cfg(feature = "serde")]
impl<'a> Serialize for TextNode<'a> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::ser::SerializeStruct;
let mut state = serializer.serialize_struct("TextNode", 2)?;
let s = self.text.iter().map(|&s| s.into_string());
let s: String = s.collect();
state.serialize_field("text", &s)?;
state.serialize_field("location", &self.location)?;
state.end()
}
}
impl<'a> Deref for TextNode<'a> {
type Target = str;
fn deref(&self) -> &Self::Target {
debug_assert!(self.text.len() == 1);
&self.text[0]
}
}
impl<'a> TextNode<'a> {
pub fn is_all_whitespace(&self) -> bool {
self.text.iter().all(|s| !s.chars().any(non_whitespace))
}
pub fn trim_leading_newline(&mut self) {
if self.text.is_empty() {
return;
}
let first = &self.text[0];
let offset = if first.starts_with('\n') {
1
} else if first.starts_with("\r\n") {
2
} else {
return;
};
if first.len() > offset {
self.text[0] = VStr {
raw: &first.raw[offset..],
ops: first.ops,
};
} else {
self.text.remove(0);
}
}
}
#[cfg_attr(feature = "serde", derive(Serialize))]
pub enum ElemProp<'a> {
Attr(Attribute<'a>),
Dir(Directive<'a>),
}
#[derive(PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize))]
pub enum ElementType {
Plain,
Component,
Template,
SlotOutlet,
}
#[cfg_attr(feature = "serde", derive(Serialize))]
pub struct Element<'a> {
pub tag_name: Name<'a>,
pub tag_type: ElementType,
pub namespace: Namespace,
pub properties: Vec<ElemProp<'a>>,
pub children: Vec<AstNode<'a>>,
pub location: SourceLocation,
}
impl<'a> Element<'a> {
#[inline]
pub fn is_component(&self) -> bool {
self.tag_type == ElementType::Component
}
}
#[cfg_attr(feature = "serde", derive(Serialize))]
pub enum DirectiveArg<'a> {
Static(Name<'a>),
Dynamic(Name<'a>), }
#[derive(Default)]
#[cfg_attr(feature = "serde", derive(Serialize))]
pub struct Directive<'a> {
pub name: &'a str,
pub argument: Option<DirectiveArg<'a>>,
pub modifiers: Vec<&'a str>,
pub expression: Option<AttributeValue<'a>>,
pub head_loc: SourceLocation,
pub location: SourceLocation,
}
impl<'a> Directive<'a> {
pub fn has_empty_expr(&self) -> bool {
self.expression
.as_ref()
.map_or(true, |v| !v.content.contains(non_whitespace))
}
pub fn check_empty_expr(&self, kind: ErrorKind) -> Option<CompilationError> {
if !self.has_empty_expr() {
return None;
}
let loc = self
.expression
.as_ref()
.map_or(self.head_loc.clone(), |v| v.location.clone());
Some(CompilationError::new(kind).with_location(loc))
}
}
#[cfg_attr(feature = "serde", derive(Serialize))]
pub struct AstRoot<'a> {
pub children: Vec<AstNode<'a>>,
pub location: SourceLocation,
}
#[derive(Clone)]
pub enum WhitespaceStrategy {
Preserve,
Condense,
}
impl Default for WhitespaceStrategy {
fn default() -> Self {
WhitespaceStrategy::Condense
}
}
#[derive(Clone)]
pub struct ParseOption {
pub whitespace: WhitespaceStrategy,
pub preserve_comment: bool,
pub get_namespace: fn(&str, &Vec<Element<'_>>) -> Namespace,
pub get_text_mode: fn(&str) -> TextMode,
pub is_void_tag: fn(&str) -> bool,
pub is_pre_tag: fn(&str) -> bool,
pub is_custom_element: fn(&str) -> bool,
pub is_builtin_component: fn(&str) -> bool,
pub is_native_element: fn(&str) -> bool,
}
impl Default for ParseOption {
fn default() -> Self {
Self {
whitespace: WhitespaceStrategy::Condense,
preserve_comment: true,
get_namespace: |_, _| Namespace::Html,
get_text_mode: |_| TextMode::Data,
is_void_tag: no,
is_pre_tag: |s| s == "pre",
is_custom_element: no,
is_builtin_component: no,
is_native_element: yes,
}
}
}
pub struct Parser {
option: ParseOption,
}
impl Parser {
pub fn new(option: ParseOption) -> Self {
Self { option }
}
pub fn parse<'a, Ts, E>(&self, tokens: Ts, err_handle: E) -> AstRoot<'a>
where
Ts: TokenSource<'a>,
E: ErrorHandler,
{
let need_flag_namespace = tokens.need_flag_hint();
AstBuilder {
tokens,
err_handle,
option: self.option.clone(),
open_elems: vec![],
root_nodes: vec![],
pre_count: 0,
v_pre_index: None,
need_flag_namespace,
}
.build_ast()
}
}
struct AstBuilder<'a, Ts, Eh>
where
Ts: TokenSource<'a>,
Eh: ErrorHandler,
{
tokens: Ts,
err_handle: Eh,
option: ParseOption,
open_elems: Vec<Element<'a>>,
root_nodes: Vec<AstNode<'a>>,
pre_count: usize,
v_pre_index: Option<usize>,
need_flag_namespace: bool,
}
impl<'a, Ts, Eh> AstBuilder<'a, Ts, Eh>
where
Ts: TokenSource<'a>,
Eh: ErrorHandler,
{
fn insert_node(&mut self, node: AstNode<'a>) {
if let Some(elem) = self.open_elems.last_mut() {
elem.children.push(node);
} else {
self.root_nodes.push(node);
}
}
fn emit_error(&self, kind: ErrorKind, loc: SourceLocation) {
let error = CompilationError::new(kind).with_location(loc);
self.err_handle.on_error(error)
}
}
impl<'a, Ts, Eh> AstBuilder<'a, Ts, Eh>
where
Ts: TokenSource<'a>,
Eh: ErrorHandler,
{
fn build_ast(mut self) -> AstRoot<'a> {
let start = self.tokens.current_position();
while let Some(token) = self.tokens.next() {
self.parse_token(token);
}
self.report_unclosed_script_comment();
for _ in 0..self.open_elems.len() {
self.close_element( false);
}
debug_assert_eq!(self.pre_count, 0);
debug_assert!(self.v_pre_index.is_none());
let need_condense = self.need_condense();
compress_whitespaces(&mut self.root_nodes, need_condense);
let location = self.tokens.get_location_from(start);
AstRoot {
children: self.root_nodes,
location,
}
}
fn parse_token(&mut self, token: Token<'a>) {
match token {
Token::EndTag(s) => self.parse_end_tag(s),
Token::Text(text) => self.parse_text(text),
Token::StartTag(tag) => self.parse_open_tag(tag),
Token::Comment(c) => self.parse_comment(c),
Token::Interpolation(i) => self.parse_interpolation(i),
};
}
fn parse_open_tag(&mut self, tag: Tag<'a>) {
let Tag {
name,
self_closing,
attributes,
} = tag;
let props = self.parse_attributes(attributes);
let ns = (self.option.get_namespace)(name, &self.open_elems);
let elem = Element {
tag_name: name,
tag_type: ElementType::Plain,
namespace: ns,
properties: props,
children: vec![],
location: SourceLocation {
start: self.tokens.last_position(),
end: self.tokens.current_position(),
},
};
if self_closing || (self.option.is_void_tag)(name) {
let node = self.parse_element(elem);
self.insert_node(node);
} else {
self.handle_pre_like(&elem);
self.open_elems.push(elem);
self.set_scanner_flag();
}
}
fn parse_attributes(&mut self, mut attrs: Vec<Attribute<'a>>) -> Vec<ElemProp<'a>> {
if self.v_pre_index.is_some() {
return attrs.into_iter().map(ElemProp::Attr).collect();
}
let mut dir_parser = DirectiveParser::new(&self.err_handle);
for i in 0..attrs.len() {
if attrs[i].name != "v-pre" {
continue;
}
let dir = dir_parser.parse(attrs.remove(i));
let mut ret = vec![ElemProp::Dir(dir)];
ret.extend(attrs.into_iter().map(ElemProp::Attr));
return ret;
}
attrs
.into_iter()
.map(|attr| {
if dir_parser.detect_directive(&attr) {
ElemProp::Dir(dir_parser.parse(attr))
} else {
ElemProp::Attr(attr)
}
})
.collect()
}
fn handle_pre_like(&mut self, elem: &Element) {
debug_assert!(
self.open_elems
.last()
.map_or(true, |e| e.location != elem.location),
"element should not be pushed to stack yet.",
);
if (self.option.is_pre_tag)(elem.tag_name) {
self.pre_count += 1;
}
if is_v_pre_boundary(elem) {
debug_assert!(self.v_pre_index.is_none());
self.v_pre_index = Some(self.open_elems.len());
}
}
fn parse_end_tag(&mut self, end_tag: &'a str) {
let index = self
.open_elems
.iter()
.enumerate()
.rfind(|p| element_matches_end_tag(p.1, end_tag))
.map(|p| p.0);
if let Some(i) = index {
let mut to_close = self.open_elems.len() - i;
while to_close > 0 {
to_close -= 1;
self.close_element(to_close == 0);
}
debug_assert_eq!(self.open_elems.len(), i);
} else {
let start = self.tokens.last_position();
let loc = self.tokens.get_location_from(start);
self.emit_error(ErrorKind::InvalidEndTag, loc);
}
}
fn close_element(&mut self, has_matched_end: bool) {
let mut elem = self.open_elems.pop().unwrap();
self.set_scanner_flag();
let start = elem.location.start;
if !has_matched_end {
let err_location = SourceLocation {
start: start.clone(),
end: start.clone(),
};
self.emit_error(ErrorKind::MissingEndTag, err_location);
}
let location = self.tokens.get_location_from(start);
elem.location = location;
if self.pre_count > 0 {
self.decrement_pre(&mut elem)
} else if (self.option.get_text_mode)(elem.tag_name) == TextMode::Data {
compress_whitespaces(&mut elem.children, self.need_condense());
}
let node = self.parse_element(elem);
self.insert_node(node);
}
fn decrement_pre(&mut self, elem: &mut Element) {
debug_assert!(self.pre_count > 0);
let pre_boundary = (self.option.is_pre_tag)(elem.tag_name);
if !pre_boundary {
return;
}
if let Some(AstNode::Text(tn)) = elem.children.last_mut() {
tn.trim_leading_newline();
}
self.pre_count -= 1;
}
fn close_v_pre(&mut self) {
let idx = self.v_pre_index.unwrap();
debug_assert!(idx <= self.open_elems.len());
if idx == self.open_elems.len() {
self.v_pre_index = None;
}
}
fn parse_element(&mut self, mut elem: Element<'a>) -> AstNode<'a> {
debug_assert!(elem.tag_type == ElementType::Plain);
if self.v_pre_index.is_some() {
debug_assert!({
let i = *self.v_pre_index.as_ref().unwrap();
i != self.open_elems.len() || is_v_pre_boundary(&elem)
});
self.close_v_pre();
elem.tag_type = ElementType::Plain;
} else if elem.tag_name == "slot" {
elem.tag_type = ElementType::SlotOutlet;
} else if is_template_element(&elem) {
elem.tag_type = ElementType::Template;
} else if self.is_component(&elem) {
elem.tag_type = ElementType::Component;
}
AstNode::Element(elem)
}
fn parse_text(&mut self, text: VStr<'a>) {
let mut text = smallvec![text];
let mut next_token = None;
let start = self.tokens.last_position();
for token in &mut self.tokens {
if let Token::Text(ds) = token {
text.push(ds);
} else {
next_token = Some(token);
break;
}
}
let end = self.tokens.last_position();
let location = SourceLocation { start, end };
let text_node = TextNode { text, location };
self.insert_node(AstNode::Text(text_node));
if let Some(token) = next_token {
self.parse_token(token);
}
}
fn parse_comment(&mut self, c: &'a str) {
if !self.option.preserve_comment {
return;
}
let pos = self.tokens.last_position();
let source_node = SourceNode {
source: c,
location: self.tokens.get_location_from(pos),
};
self.insert_node(AstNode::Comment(source_node));
}
fn parse_interpolation(&mut self, src: &'a str) {
let pos = self.tokens.last_position();
let source_node = SourceNode {
source: src,
location: self.tokens.get_location_from(pos),
};
self.insert_node(AstNode::Interpolation(source_node));
}
fn report_unclosed_script_comment(&mut self) {
debug_assert!(self.tokens.next().is_none());
let elem = match self.open_elems.last() {
Some(e) => e,
None => return,
};
if !elem.tag_name.eq_ignore_ascii_case("script") {
return;
}
let text = match elem.children.first() {
Some(AstNode::Text(text)) => text,
_ => return,
};
if text.contains("<!--") && !text.contains("-->") {
let loc = SourceLocation {
start: self.tokens.last_position(),
end: self.tokens.last_position(),
};
self.emit_error(ErrorKind::EofInScriptHtmlCommentLikeText, loc);
}
}
#[inline]
fn set_scanner_flag(&mut self) {
if self.need_flag_namespace {
return;
}
let in_html = self
.open_elems
.last()
.map_or(true, |e| e.namespace == Namespace::Html);
self.tokens.set_is_in_html(in_html)
}
fn is_component(&self, e: &Element) -> bool {
let opt = &self.option;
let tag_name = e.tag_name;
if (opt.is_custom_element)(tag_name) {
return false;
}
if tag_name == "component"
|| tag_name.starts_with(|c| matches!(c, 'A'..='Z'))
|| is_core_component(tag_name)
|| (opt.is_builtin_component)(tag_name)
|| !(opt.is_native_element)(tag_name)
{
return true;
}
e.properties.iter().any(|prop| match prop {
ElemProp::Dir(Directive { name: "is", .. }) => true,
ElemProp::Attr(Attribute {
name: "is",
value: Some(v),
..
}) => v.content.starts_with("vue:"),
_ => false,
})
}
fn need_condense(&self) -> bool {
matches!(self.option.whitespace, WhitespaceStrategy::Condense)
}
}
const BIND_CHAR: char = ':';
const MOD_CHAR: char = '.';
const ON_CHAR: char = '@';
const SLOT_CHAR: char = '#';
const SEP_BYTES: &[u8] = &[BIND_CHAR as u8, MOD_CHAR as u8];
const SHORTHANDS: &[char] = &[BIND_CHAR, ON_CHAR, SLOT_CHAR, MOD_CHAR];
const DIR_MARK: &str = "v-";
type StrPair<'a> = (&'a str, &'a str);
struct DirectiveParser<'a, 'e, Eh: ErrorHandler> {
eh: &'e Eh,
name_loc: SourceLocation,
location: SourceLocation,
cached: Option<StrPair<'a>>,
}
impl<'a, 'e, Eh: ErrorHandler> DirectiveParser<'a, 'e, Eh> {
fn new(eh: &'e Eh) -> Self {
Self {
eh,
name_loc: Default::default(),
location: Default::default(),
cached: None,
}
}
fn attr_name_err(&self, kind: ErrorKind) {
let error = CompilationError::new(kind).with_location(self.name_loc.clone());
self.eh.on_error(error);
}
fn detect_directive(&mut self, attr: &Attribute<'a>) -> bool {
debug_assert!(self.cached.is_none());
self.cached = self.detect_dir_name(attr);
self.cached.is_some()
}
fn set_location(&mut self, attr: &Attribute<'a>) {
self.location = attr.location.clone();
self.name_loc = attr.name_loc.clone();
}
fn parse(&mut self, attr: Attribute<'a>) -> Directive<'a> {
let (name, prefixed) = self
.cached
.or_else(|| self.detect_dir_name(&attr))
.expect("Parse without detection requires attribute be directive.");
let is_prop = attr.name.starts_with('.');
let is_v_slot = name == "slot";
let (arg_str, mods_str) = self.split_arg_and_mods(prefixed, is_v_slot, is_prop);
let argument = self.parse_directive_arg(arg_str);
let modifiers = self.parse_directive_mods(mods_str, is_prop);
self.cached = None; Directive {
name,
argument,
modifiers,
expression: attr.value,
head_loc: attr.name_loc,
location: attr.location,
}
}
fn detect_dir_name(&mut self, attr: &Attribute<'a>) -> Option<StrPair<'a>> {
self.set_location(attr);
self.parse_dir_name(attr)
}
fn parse_dir_name(&self, attr: &Attribute<'a>) -> Option<StrPair<'a>> {
let name = attr.name;
if !name.starts_with(DIR_MARK) {
let ret = match name.chars().next()? {
BIND_CHAR | MOD_CHAR => "bind",
ON_CHAR => "on",
SLOT_CHAR => "slot",
_ => return None,
};
return Some((ret, name));
}
let n = &name[2..];
let ret = n
.bytes()
.position(|c| SEP_BYTES.contains(&c))
.map(|i| n.split_at(i))
.unwrap_or((n, ""));
if ret.0.is_empty() {
self.attr_name_err(ErrorKind::MissingDirectiveName);
return None;
}
Some(ret)
}
fn split_arg_and_mods(&self, prefixed: &'a str, is_v_slot: bool, is_prop: bool) -> StrPair<'a> {
debug_assert!(prefixed.is_empty() || prefixed.starts_with(SHORTHANDS));
if prefixed.is_empty() {
return ("", "");
}
if prefixed.len() == 1 {
self.attr_name_err(ErrorKind::MissingDirectiveArg);
return ("", "");
}
let remain = &prefixed[1..];
if is_v_slot {
if prefixed.starts_with(MOD_CHAR) {
self.attr_name_err(ErrorKind::InvalidVSlotModifier);
("", prefixed)
} else {
debug_assert!(prefixed.starts_with(&[SLOT_CHAR, BIND_CHAR][..]));
(remain, "")
}
} else if prefixed.starts_with(MOD_CHAR) && !is_prop {
("", prefixed)
} else if remain.starts_with('[') {
self.split_dynamic_arg(remain)
} else {
debug_assert!(!prefixed.starts_with(SLOT_CHAR));
remain
.bytes()
.position(|u| u == MOD_CHAR as u8)
.map(|i| remain.split_at(i))
.unwrap_or((remain, ""))
}
}
fn split_dynamic_arg(&self, remain: &'a str) -> (&'a str, &'a str) {
let bytes = remain.as_bytes();
let end = bytes
.iter()
.position(|b| *b == b']')
.map_or(bytes.len(), |i| i + 1);
let (arg, mut mods) = remain.split_at(end);
if mods.starts_with(|c| c != MOD_CHAR) {
self.attr_name_err(ErrorKind::UnexpectedContentAfterDynamicDirective);
mods = mods.trim_start_matches(|c| c != MOD_CHAR);
}
(arg, mods)
}
fn parse_directive_arg(&self, arg: &'a str) -> Option<DirectiveArg<'a>> {
if arg.is_empty() {
return None;
}
Some(if !arg.starts_with('[') {
DirectiveArg::Static(arg)
} else if let Some(i) = arg.chars().position(|c| c == ']') {
debug_assert!(i == arg.len() - 1);
DirectiveArg::Dynamic(&arg[1..i])
} else {
self.attr_name_err(ErrorKind::MissingDynamicDirectiveArgumentEnd);
DirectiveArg::Dynamic(&arg[1..])
})
}
fn parse_directive_mods(&self, mods: &'a str, is_prop: bool) -> Vec<&'a str> {
debug_assert!(mods.is_empty() || mods.starts_with(MOD_CHAR));
let report_missing_mod = |s: &&str| {
if s.is_empty() {
self.attr_name_err(ErrorKind::MissingDirectiveMod);
}
};
let mut ret = if mods.is_empty() {
vec![]
} else {
mods[1..]
.as_bytes()
.split(|b| *b == b'.')
.map(std::str::from_utf8) .map(Result::unwrap)
.inspect(report_missing_mod)
.collect()
};
if is_prop {
ret.push("prop")
}
ret
}
}
fn compress_whitespaces(nodes: &mut Vec<AstNode>, need_condense: bool) {
debug_assert!({
let no_consecutive_text = |last_is_text, is_text| {
if last_is_text && is_text {
None
} else {
Some(is_text)
}
};
nodes
.iter()
.map(|n| matches!(n, AstNode::Text(_)))
.try_fold(false, no_consecutive_text)
.is_some()
});
let mut i = 0;
while i < nodes.len() {
let should_remove = if let AstNode::Text(child) = &nodes[i] {
use AstNode as A;
if !child.is_all_whitespace() {
if need_condense {
compress_text_node(&mut nodes[i]);
}
false
} else if i == nodes.len() - 1 || i == 0 {
true
} else if !need_condense {
false
} else {
let prev = &nodes[i - 1];
let next = &nodes[i + 1];
match (prev, next) {
(A::Comment(_), A::Comment(_)) => true,
_ => is_element(prev) && is_element(next) && child.contains(&['\r', '\n'][..]),
}
}
} else {
false
};
if should_remove {
nodes.remove(i);
} else {
i += 1;
}
}
}
#[inline]
fn is_element(n: &AstNode) -> bool {
n.get_element().is_some()
}
fn compress_text_node(n: &mut AstNode) {
if let AstNode::Text(src) = n {
for s in src.text.iter_mut() {
s.compress_whitespace();
}
} else {
debug_assert!(false, "impossible");
}
}
fn is_special_template_directive(n: &str) -> bool {
match n.len() {
2 => n == "if",
3 => n == "for",
4 => n == "else" || n == "slot",
7 => n == "else-if",
_ => false,
}
}
fn is_template_element(e: &Element) -> bool {
e.tag_name == "template" && find_dir(e, is_special_template_directive).is_some()
}
fn element_matches_end_tag(e: &Element, tag: &str) -> bool {
e.tag_name.eq_ignore_ascii_case(tag)
}
fn is_v_pre_boundary(elem: &Element) -> bool {
find_dir(elem, "pre").is_some()
}
#[cfg(test)]
pub mod test {
use super::*;
use crate::{cast, error::test::TestErrorHandler, scanner::test::base_scan};
#[test]
fn test_parse_text() {
let case = "hello {{world}}<p/><p/>";
let ast = base_parse(case);
let mut children = ast.children;
assert_eq!(children.len(), 4);
children.pop();
children.pop();
let world = children.pop().unwrap();
let hello = children.pop().unwrap();
let v = cast!(hello, AstNode::Text);
assert_eq!(v.text[0].raw, "hello ");
let v = cast!(world, AstNode::Interpolation);
assert_eq!(v.source, "world");
}
pub fn base_parse(s: &str) -> AstRoot {
let tokens = base_scan(s);
let parser = Parser::new(ParseOption {
is_native_element: |s| s != "comp",
..Default::default()
});
let eh = TestErrorHandler;
parser.parse(tokens, eh)
}
pub fn mock_element(s: &str) -> Element {
let mut m = base_parse(s).children;
m.pop().unwrap().into_element()
}
}