use markdown::ParseOptions;
use markdown::mdast::Html;
use markdown::mdast::Node;
use markdown::to_mdast;
use serde::Deserialize;
use serde::Serialize;
use super::MdtError;
use super::MdtResult;
use crate::Position;
use crate::lexer::tokenize;
use crate::patterns::closing_pattern;
use crate::patterns::consumer_pattern;
use crate::patterns::inline_pattern;
use crate::patterns::provider_pattern;
use crate::tokens::Token;
use crate::tokens::TokenGroup;
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum ParseDiagnostic {
UnclosedBlock {
name: String,
line: usize,
column: usize,
},
UnknownTransformer {
name: String,
line: usize,
column: usize,
},
InvalidTransformerArgs {
name: String,
expected: String,
got: usize,
line: usize,
column: usize,
},
}
pub fn parse(content: impl AsRef<str>) -> MdtResult<Vec<Block>> {
let content = content.as_ref();
let html_nodes = get_html_nodes(content)?;
let token_groups = tokenize(html_nodes)?;
build_blocks_from_groups(&token_groups)
}
pub fn parse_with_diagnostics(
content: impl AsRef<str>,
) -> MdtResult<(Vec<Block>, Vec<ParseDiagnostic>)> {
let content = content.as_ref();
let html_nodes = get_html_nodes(content)?;
let token_groups = tokenize(html_nodes)?;
build_blocks_from_groups_with_diagnostics(&token_groups)
}
pub fn build_blocks_from_groups(token_groups: &[TokenGroup]) -> MdtResult<Vec<Block>> {
build_blocks_inner(token_groups, false)
}
pub fn build_blocks_from_groups_lenient(token_groups: &[TokenGroup]) -> MdtResult<Vec<Block>> {
build_blocks_inner(token_groups, true)
}
pub fn build_blocks_from_groups_with_diagnostics(
token_groups: &[TokenGroup],
) -> MdtResult<(Vec<Block>, Vec<ParseDiagnostic>)> {
let mut pending: Vec<BlockCreator> = vec![];
let mut blocks: Vec<Block> = vec![];
let mut diagnostics: Vec<ParseDiagnostic> = vec![];
for group in token_groups {
match classify_group_with_diagnostics(group, &mut diagnostics) {
GroupKind::Provider {
name,
transformers,
arguments,
} => {
pending.push(BlockCreator {
name,
r#type: BlockType::Provider,
opening: group.position,
closing: None,
transformers,
arguments,
});
}
GroupKind::Consumer {
name,
transformers,
arguments,
} => {
pending.push(BlockCreator {
name,
r#type: BlockType::Consumer,
opening: group.position,
closing: None,
transformers,
arguments,
});
}
GroupKind::Inline {
name,
transformers,
arguments,
} => {
pending.push(BlockCreator {
name,
r#type: BlockType::Inline,
opening: group.position,
closing: None,
transformers,
arguments,
});
}
GroupKind::Close { name } => {
let pos = pending.iter().rposition(|bc| bc.name == name);
if let Some(idx) = pos {
let mut creator = pending.remove(idx);
creator.closing = Some(group.position);
blocks.push(creator.into_block()?);
}
}
GroupKind::Unknown => {}
}
}
for creator in pending {
diagnostics.push(ParseDiagnostic::UnclosedBlock {
name: creator.name,
line: creator.opening.start.line,
column: creator.opening.start.column,
});
}
Ok((blocks, diagnostics))
}
fn build_blocks_inner(token_groups: &[TokenGroup], lenient: bool) -> MdtResult<Vec<Block>> {
let mut pending: Vec<BlockCreator> = vec![];
let mut blocks: Vec<Block> = vec![];
for group in token_groups {
match classify_group(group) {
GroupKind::Provider {
name,
transformers,
arguments,
} => {
pending.push(BlockCreator {
name,
r#type: BlockType::Provider,
opening: group.position,
closing: None,
transformers,
arguments,
});
}
GroupKind::Consumer {
name,
transformers,
arguments,
} => {
pending.push(BlockCreator {
name,
r#type: BlockType::Consumer,
opening: group.position,
closing: None,
transformers,
arguments,
});
}
GroupKind::Inline {
name,
transformers,
arguments,
} => {
pending.push(BlockCreator {
name,
r#type: BlockType::Inline,
opening: group.position,
closing: None,
transformers,
arguments,
});
}
GroupKind::Close { name } => {
let pos = pending.iter().rposition(|bc| bc.name == name);
if let Some(idx) = pos {
let mut creator = pending.remove(idx);
creator.closing = Some(group.position);
blocks.push(creator.into_block()?);
}
}
GroupKind::Unknown => {}
}
}
if !lenient {
if let Some(creator) = pending.into_iter().next() {
return Err(MdtError::MissingClosingTag(creator.name));
}
}
Ok(blocks)
}
pub fn get_html_nodes(content: impl AsRef<str>) -> MdtResult<Vec<Html>> {
let options = ParseOptions::gfm();
let mdast =
to_mdast(content.as_ref(), &options).map_err(|e| MdtError::Markdown(e.to_string()))?;
let mut html_nodes = vec![];
collect_html(&mdast, &mut html_nodes);
Ok(html_nodes)
}
fn collect_html(node: &Node, nodes: &mut Vec<Html>) {
match node {
Node::Html(html) => nodes.push(html.clone()),
_ => {
if let Some(node) = node.children() {
for child in node {
collect_html(child, nodes);
}
}
}
}
}
enum GroupKind {
Provider {
name: String,
transformers: Vec<Transformer>,
arguments: Vec<String>,
},
Consumer {
name: String,
transformers: Vec<Transformer>,
arguments: Vec<String>,
},
Inline {
name: String,
transformers: Vec<Transformer>,
arguments: Vec<String>,
},
Close {
name: String,
},
Unknown,
}
fn classify_group(group: &TokenGroup) -> GroupKind {
if group.matches_pattern(&provider_pattern()).unwrap_or(false) {
let (name, transformers, arguments) =
extract_name_transformers_and_arguments(group, &Token::ProviderTag);
return GroupKind::Provider {
name,
transformers,
arguments,
};
}
if group.matches_pattern(&consumer_pattern()).unwrap_or(false) {
let (name, transformers, arguments) =
extract_name_transformers_and_arguments(group, &Token::ConsumerTag);
return GroupKind::Consumer {
name,
transformers,
arguments,
};
}
if group.matches_pattern(&inline_pattern()).unwrap_or(false) {
let (name, transformers, arguments) =
extract_name_transformers_and_arguments(group, &Token::InlineTag);
return GroupKind::Inline {
name,
transformers,
arguments,
};
}
if group.matches_pattern(&closing_pattern()).unwrap_or(false) {
let name = extract_close_name(group);
return GroupKind::Close { name };
}
GroupKind::Unknown
}
fn classify_group_with_diagnostics(
group: &TokenGroup,
diagnostics: &mut Vec<ParseDiagnostic>,
) -> GroupKind {
if group.matches_pattern(&provider_pattern()).unwrap_or(false) {
let (name, transformers, arguments, unknown) =
extract_name_transformers_arguments_with_diagnostics(group, &Token::ProviderTag);
for unknown_name in unknown {
diagnostics.push(ParseDiagnostic::UnknownTransformer {
name: unknown_name,
line: group.position.start.line,
column: group.position.start.column,
});
}
return GroupKind::Provider {
name,
transformers,
arguments,
};
}
if group.matches_pattern(&consumer_pattern()).unwrap_or(false) {
let (name, transformers, arguments, unknown) =
extract_name_transformers_arguments_with_diagnostics(group, &Token::ConsumerTag);
for unknown_name in unknown {
diagnostics.push(ParseDiagnostic::UnknownTransformer {
name: unknown_name,
line: group.position.start.line,
column: group.position.start.column,
});
}
return GroupKind::Consumer {
name,
transformers,
arguments,
};
}
if group.matches_pattern(&inline_pattern()).unwrap_or(false) {
let (name, transformers, arguments, unknown) =
extract_name_transformers_arguments_with_diagnostics(group, &Token::InlineTag);
for unknown_name in unknown {
diagnostics.push(ParseDiagnostic::UnknownTransformer {
name: unknown_name,
line: group.position.start.line,
column: group.position.start.column,
});
}
return GroupKind::Inline {
name,
transformers,
arguments,
};
}
if group.matches_pattern(&closing_pattern()).unwrap_or(false) {
let name = extract_close_name(group);
return GroupKind::Close { name };
}
GroupKind::Unknown
}
fn extract_name_transformers_and_arguments(
group: &TokenGroup,
tag_token: &Token,
) -> (String, Vec<Transformer>, Vec<String>) {
let mut name = String::new();
let mut transformers = Vec::new();
let mut arguments = Vec::new();
let mut found_tag = false;
let mut found_name = false;
let mut in_transformers = false;
let mut iter = group.tokens.iter().peekable();
while let Some(token) = iter.next() {
if !found_tag {
if token.same_type(tag_token) {
found_tag = true;
}
continue;
}
if !found_name {
if let Token::Ident(ident) = token {
name.clone_from(ident);
found_name = true;
}
continue;
}
if !in_transformers {
match token {
Token::ArgumentDelimiter => {
while let Some(Token::Whitespace(_) | Token::Newline) = iter.peek() {
iter.next();
}
if let Some(Token::String(s, _)) = iter.next() {
arguments.push(s.clone());
}
continue;
}
Token::Pipe => {
in_transformers = true;
if let Some(transformer) = parse_transformer(&mut iter) {
transformers.push(transformer);
}
continue;
}
_ => continue,
}
}
if matches!(token, Token::Pipe) {
if let Some(transformer) = parse_transformer(&mut iter) {
transformers.push(transformer);
}
}
}
(name, transformers, arguments)
}
fn extract_name_transformers_arguments_with_diagnostics(
group: &TokenGroup,
tag_token: &Token,
) -> (String, Vec<Transformer>, Vec<String>, Vec<String>) {
let mut name = String::new();
let mut transformers = Vec::new();
let mut arguments = Vec::new();
let mut unknown_transformers = Vec::new();
let mut found_tag = false;
let mut found_name = false;
let mut in_transformers = false;
let mut iter = group.tokens.iter().peekable();
while let Some(token) = iter.next() {
if !found_tag {
if token.same_type(tag_token) {
found_tag = true;
}
continue;
}
if !found_name {
if let Token::Ident(ident) = token {
name.clone_from(ident);
found_name = true;
}
continue;
}
if !in_transformers {
match token {
Token::ArgumentDelimiter => {
while let Some(Token::Whitespace(_) | Token::Newline) = iter.peek() {
iter.next();
}
if let Some(Token::String(s, _)) = iter.next() {
arguments.push(s.clone());
}
continue;
}
Token::Pipe => {
in_transformers = true;
match parse_transformer_with_unknown(&mut iter) {
TransformerParseResult::Ok(transformer) => {
transformers.push(transformer);
}
TransformerParseResult::Unknown(unknown_name) => {
unknown_transformers.push(unknown_name);
}
TransformerParseResult::NoIdent => {}
}
continue;
}
_ => continue,
}
}
if matches!(token, Token::Pipe) {
match parse_transformer_with_unknown(&mut iter) {
TransformerParseResult::Ok(transformer) => transformers.push(transformer),
TransformerParseResult::Unknown(unknown_name) => {
unknown_transformers.push(unknown_name);
}
TransformerParseResult::NoIdent => {}
}
}
}
(name, transformers, arguments, unknown_transformers)
}
enum TransformerParseResult {
Ok(Transformer),
Unknown(String),
NoIdent,
}
fn parse_transformer_with_unknown(
iter: &mut std::iter::Peekable<std::slice::Iter<'_, Token>>,
) -> TransformerParseResult {
while let Some(Token::Whitespace(_) | Token::Newline) = iter.peek() {
iter.next();
}
let transformer_name = match iter.next() {
Some(Token::Ident(name)) => name.clone(),
_ => return TransformerParseResult::NoIdent,
};
let transformer_type = match transformer_name.as_str() {
"trim" => TransformerType::Trim,
"trimStart" | "trim_start" => TransformerType::TrimStart,
"trimEnd" | "trim_end" => TransformerType::TrimEnd,
"wrap" => TransformerType::Wrap,
"indent" => TransformerType::Indent,
"codeblock" | "codeBlock" | "code_block" => TransformerType::CodeBlock,
"code" => TransformerType::Code,
"replace" => TransformerType::Replace,
"prefix" => TransformerType::Prefix,
"suffix" => TransformerType::Suffix,
"linePrefix" | "line_prefix" => TransformerType::LinePrefix,
"lineSuffix" | "line_suffix" => TransformerType::LineSuffix,
"if" => TransformerType::If,
_ => return TransformerParseResult::Unknown(transformer_name),
};
let args = parse_transformer_args(iter);
TransformerParseResult::Ok(Transformer {
r#type: transformer_type,
args,
})
}
fn parse_transformer(
iter: &mut std::iter::Peekable<std::slice::Iter<'_, Token>>,
) -> Option<Transformer> {
match parse_transformer_with_unknown(iter) {
TransformerParseResult::Ok(transformer) => Some(transformer),
TransformerParseResult::Unknown(_) | TransformerParseResult::NoIdent => None,
}
}
fn parse_transformer_args(
iter: &mut std::iter::Peekable<std::slice::Iter<'_, Token>>,
) -> Vec<Argument> {
let mut args = Vec::new();
loop {
while let Some(Token::Whitespace(_) | Token::Newline) = iter.peek() {
iter.next();
}
match iter.peek() {
Some(Token::ArgumentDelimiter) => {
iter.next();
while let Some(Token::Whitespace(_) | Token::Newline) = iter.peek() {
iter.next();
}
match iter.next() {
Some(Token::String(s, _)) => args.push(Argument::String(s.clone())),
Some(Token::Int(n)) => {
args.push(Argument::Number(OrderedFloat(*n as f64)));
}
Some(Token::Float(n)) => {
args.push(Argument::Number(OrderedFloat(*n)));
}
Some(Token::Ident(s)) if s == "true" => args.push(Argument::Boolean(true)),
Some(Token::Ident(s)) if s == "false" => args.push(Argument::Boolean(false)),
_ => break,
}
}
_ => break,
}
}
args
}
fn extract_close_name(group: &TokenGroup) -> String {
for token in &group.tokens {
if let Token::CloseTag = token {
let mut found_close = false;
for t in &group.tokens {
if found_close {
if let Token::Ident(name) = t {
return name.clone();
}
}
if matches!(t, Token::CloseTag) {
found_close = true;
}
}
}
}
String::new()
}
struct BlockCreator {
name: String,
r#type: BlockType,
opening: Position,
closing: Option<Position>,
transformers: Vec<Transformer>,
arguments: Vec<String>,
}
impl BlockCreator {
pub fn into_block(self) -> MdtResult<Block> {
let Some(closing) = self.closing else {
return Err(MdtError::MissingClosingTag(self.name));
};
let block = Block {
name: self.name,
r#type: self.r#type,
opening: self.opening,
closing,
transformers: self.transformers,
arguments: self.arguments,
};
Ok(block)
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Block {
pub name: String,
pub r#type: BlockType,
pub opening: Position,
pub closing: Position,
pub transformers: Vec<Transformer>,
pub arguments: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Transformer {
pub r#type: TransformerType,
pub args: Vec<Argument>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[non_exhaustive]
pub enum Argument {
String(String),
Number(OrderedFloat),
Boolean(bool),
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct OrderedFloat(pub f64);
impl PartialEq for OrderedFloat {
fn eq(&self, other: &Self) -> bool {
float_cmp::approx_eq!(f64, self.0, other.0)
}
}
impl std::fmt::Display for OrderedFloat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[non_exhaustive]
pub enum TransformerType {
Trim,
TrimStart,
TrimEnd,
Wrap,
Indent,
CodeBlock,
Code,
Replace,
Prefix,
Suffix,
LinePrefix,
LineSuffix,
If,
}
impl std::fmt::Display for TransformerType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Trim => write!(f, "trim"),
Self::TrimStart => write!(f, "trimStart"),
Self::TrimEnd => write!(f, "trimEnd"),
Self::Wrap => write!(f, "wrap"),
Self::Indent => write!(f, "indent"),
Self::CodeBlock => write!(f, "codeBlock"),
Self::Code => write!(f, "code"),
Self::Replace => write!(f, "replace"),
Self::Prefix => write!(f, "prefix"),
Self::Suffix => write!(f, "suffix"),
Self::LinePrefix => write!(f, "linePrefix"),
Self::LineSuffix => write!(f, "lineSuffix"),
Self::If => write!(f, "if"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[non_exhaustive]
pub enum BlockType {
Provider,
Consumer,
Inline,
}
impl std::fmt::Display for BlockType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Provider => write!(f, "provider"),
Self::Consumer => write!(f, "consumer"),
Self::Inline => write!(f, "inline"),
}
}
}