use super::shared::{opt_span, GrammarSpan};
use crate::parser::ast::{Node, NodeKind};
use nom::IResult;
use nom::Input;
pub fn parse_entity_reference(input: GrammarSpan) -> IResult<GrammarSpan, Node> {
let fragment = input.fragment();
if !fragment.starts_with('&') {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
const MAX_ENTITY_LEN: usize = 64;
let semi_pos = fragment
.find(';')
.filter(|&idx| idx > 0 && idx < MAX_ENTITY_LEN)
.ok_or_else(|| {
nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::TakeUntil,
))
})?;
let consumed_len = semi_pos + 1;
let entity_str = &fragment[..consumed_len];
let decoded = match htmlescape::decode_html(entity_str) {
Ok(s) => s,
Err(_) => {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
};
if decoded == entity_str {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
let entity_span = input.take(consumed_len);
let rest = input.take_from(consumed_len);
let node = Node {
kind: NodeKind::Text(decoded),
span: opt_span(entity_span),
children: Vec::new(),
};
Ok((rest, node))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn smoke_test_parse_entity_reference_named() {
let input = GrammarSpan::new("© and more");
let (rest, node) = parse_entity_reference(input).expect("parse failed");
assert_eq!(rest.fragment(), &" and more");
match node.kind {
NodeKind::Text(s) => assert_eq!(s, "©"),
other => panic!("expected Text, got {other:?}"),
}
}
#[test]
fn smoke_test_parse_entity_reference_numeric_decimal() {
let input = GrammarSpan::new("©");
let (_, node) = parse_entity_reference(input).expect("parse failed");
match node.kind {
NodeKind::Text(s) => assert_eq!(s, "©"),
other => panic!("expected Text, got {other:?}"),
}
}
#[test]
fn smoke_test_parse_entity_reference_invalid_entity_fails() {
let input = GrammarSpan::new("&nosuchentity;");
assert!(parse_entity_reference(input).is_err());
}
}