use std::collections::{HashMap, HashSet};
use crate::error::ParseError;
use crate::tokenizer::{tokenize, Token, TokenStream};
use crate::types::{ElementData, SourcePosition, SourceSpan};
use crate::Markdown;
pub const MAX_DEPTH: u32 = 1024;
pub const MAX_INPUT_BYTES: usize = u32::MAX as usize - 1;
pub fn parse(input: &str) -> Result<Markdown, ParseError> {
parse_owned(input.to_string())
}
pub fn parse_fragment(input: &str) -> Result<Markdown, ParseError> {
parse(input)
}
pub fn parse_owned(input: String) -> Result<Markdown, ParseError> {
if input.len() > MAX_INPUT_BYTES {
return Err(ParseError::InputTooLarge {
size: input.len() as u64,
max: MAX_INPUT_BYTES as u64,
});
}
let stream = tokenize(&input)?;
assemble(input, stream)
}
type IdScope = HashMap<String, HashSet<String>>;
struct Frame {
name: String,
attrs: Vec<(String, String)>,
body_start: usize,
span_start: SourcePosition,
children: Vec<ElementData>,
seen_ids: Option<IdScope>,
}
fn assemble(input: String, stream: TokenStream) -> Result<Markdown, ParseError> {
let TokenStream { tokens, trivia } = stream;
let mut stack: Vec<Frame> = Vec::new();
let mut roots: Vec<ElementData> = Vec::new();
let mut root_seen: Option<IdScope> = None;
for token in tokens {
match token {
Token::Open {
name,
attrs,
span,
body_start,
} => {
if stack.len() >= MAX_DEPTH as usize {
return Err(ParseError::MaxDepthExceeded {
tag: name,
max: MAX_DEPTH,
line: span.start.line,
});
}
stack.push(Frame {
name,
attrs,
body_start,
span_start: span.start,
children: Vec::new(),
seen_ids: None,
});
}
Token::SelfClose { name, attrs, span } => {
if stack.len() >= MAX_DEPTH as usize {
return Err(ParseError::MaxDepthExceeded {
tag: name,
max: MAX_DEPTH,
line: span.start.line,
});
}
let scope = current_scope(&mut stack, &mut root_seen);
check_duplicate_id(&name, &attrs, span.start.line, scope)?;
let empty_pos = span.end.offset_usize();
let elem = ElementData {
tag: name,
attrs,
content_range: empty_pos..empty_pos,
children: Vec::new(),
span,
self_closing: true,
};
push_element(elem, &mut stack, &mut roots);
}
Token::Close {
name,
span,
body_end,
} => {
let Some(frame) = stack.pop() else {
return Err(ParseError::StrayClose {
tag: name,
line: span.start.line,
});
};
if frame.name != name {
return Err(ParseError::MismatchedClose {
found: name,
expected: frame.name,
line: span.start.line,
});
}
let scope = current_scope(&mut stack, &mut root_seen);
check_duplicate_id(&frame.name, &frame.attrs, frame.span_start.line, scope)?;
let full_span = SourceSpan {
start: frame.span_start,
end: span.end,
};
let elem = ElementData {
tag: frame.name,
attrs: frame.attrs,
content_range: frame.body_start..body_end,
children: frame.children,
span: full_span,
self_closing: false,
};
push_element(elem, &mut stack, &mut roots);
}
}
}
if let Some(unclosed) = stack.pop() {
return Err(ParseError::UnclosedTag {
tag: unclosed.name,
line: unclosed.span_start.line,
});
}
Ok(Markdown::from_parts(input, roots, trivia))
}
fn push_element(elem: ElementData, stack: &mut [Frame], roots: &mut Vec<ElementData>) {
if let Some(top) = stack.last_mut() {
top.children.push(elem);
} else {
roots.push(elem);
}
}
fn current_scope<'a>(
stack: &'a mut [Frame],
root: &'a mut Option<IdScope>,
) -> &'a mut Option<IdScope> {
if let Some(top) = stack.last_mut() {
&mut top.seen_ids
} else {
root
}
}
fn check_duplicate_id(
tag: &str,
attrs: &[(String, String)],
line: u32,
seen: &mut Option<IdScope>,
) -> Result<(), ParseError> {
let Some(id) = attrs
.iter()
.find(|(k, _)| k == "id")
.map(|(_, v)| v.as_str())
else {
return Ok(());
};
let scope = seen.get_or_insert_with(HashMap::new);
if !scope
.entry(tag.to_string())
.or_default()
.insert(id.to_string())
{
return Err(ParseError::DuplicateId {
tag: tag.to_string(),
id: id.to_string(),
line,
});
}
Ok(())
}