use markdown2pdf::markdown::*;
use super::common::parse;
fn collected(input: &str) -> String {
Token::collect_all_text(&parse(input))
}
#[test]
fn xml_safe_entities() {
assert_eq!(collected("a & b"), "a & b");
assert_eq!(collected("<tag>"), "<tag>");
assert_eq!(collected("she said "hi""), "she said \"hi\"");
assert_eq!(collected("it's"), "it's");
}
#[test]
fn common_html_named_entities() {
assert_eq!(collected("© 2025"), "© 2025");
assert_eq!(collected("® mark"), "® mark");
assert_eq!(collected("™"), "™");
assert_eq!(collected("—"), "—");
assert_eq!(collected("–"), "–");
assert_eq!(collected("…"), "…");
}
#[test]
fn numeric_decimal_reference() {
assert_eq!(collected("#"), "#");
assert_eq!(collected("A"), "A");
assert_eq!(collected("—"), "—"); }
#[test]
fn numeric_hex_reference() {
assert_eq!(collected("#"), "#");
assert_eq!(collected("A"), "A");
assert_eq!(collected("A"), "A"); assert_eq!(collected("—"), "—");
}
#[test]
fn unknown_entity_passes_through() {
assert_eq!(collected("&zzznotreal;"), "&zzznotreal;");
}
#[test]
fn missing_semicolon_passes_through() {
assert_eq!(collected("& foo"), "& foo");
}
#[test]
fn lone_ampersand_is_literal() {
assert_eq!(collected("a & b"), "a & b");
}
#[test]
fn entity_inside_emphasis() {
let tokens = parse("*alpha & beta*");
if let Token::Emphasis { content, .. } = &tokens[0] {
let inner = Token::collect_all_text(content);
assert!(inner.contains("alpha & beta"), "got {:?}", inner);
} else {
panic!("expected emphasis, got {:?}", tokens);
}
}
#[test]
fn entity_not_decoded_inside_code_span() {
let tokens = parse("`&`");
assert_eq!(tokens, vec![Token::Code { language: "".to_string(), content: "&".to_string(), block: false }]);
}
#[test]
fn invalid_numeric_passes_through() {
assert_eq!(collected("&#xZZZ;"), "&#xZZZ;");
assert_eq!(collected("&#abc;"), "&#abc;");
}
#[test]
fn extended_named_entities_decode() {
assert_eq!(collected("α"), "\u{03B1}");
assert_eq!(collected("β"), "\u{03B2}");
assert_eq!(collected("Π"), "\u{03A0}");
assert_eq!(collected("∞"), "\u{221E}");
assert_eq!(collected("€"), "\u{20AC}");
assert_eq!(collected("¶"), "\u{00B6}");
assert_eq!(collected("­"), "\u{00AD}"); }
#[test]
fn longest_named_entity_decodes() {
assert_eq!(
collected("∳"),
"\u{2233}"
);
}
#[test]
fn multi_codepoint_named_entities_decode() {
assert_eq!(collected("fj"), "fj");
assert_eq!(collected("  "), "\u{205F}\u{200A}");
}
#[test]
fn entity_names_are_case_sensitive() {
assert_eq!(collected("Á"), "\u{00C1}");
assert_eq!(collected("á"), "\u{00E1}");
}
#[test]
fn numeric_null_becomes_replacement_char() {
assert_eq!(collected("�"), "\u{FFFD}");
assert_eq!(collected("�"), "\u{FFFD}");
}
#[test]
fn numeric_surrogate_becomes_replacement_char() {
assert_eq!(collected("�"), "\u{FFFD}");
assert_eq!(collected("�"), "\u{FFFD}");
assert_eq!(collected("�"), "\u{FFFD}"); }
#[test]
fn numeric_out_of_range_becomes_replacement_char() {
assert_eq!(collected("�"), "\u{FFFD}");
assert_eq!(collected("�"), "\u{FFFD}");
}
#[test]
fn numeric_overflow_passes_through_literal() {
assert_eq!(collected("�"), "�");
}
#[test]
fn empty_numeric_digits_passes_through() {
assert_eq!(collected("&#;"), "&#;");
assert_eq!(collected("&#x;"), "&#x;");
}
#[test]
fn legacy_non_semicolon_entity_passes_through() {
assert_eq!(collected("Æ hello"), "Æ hello");
}
#[test]
fn many_entities_in_one_paragraph() {
let text = collected("α β γ δ ε");
assert_eq!(text, "\u{03B1} \u{03B2} \u{03B3} \u{03B4} \u{03B5}");
}
#[test]
fn unknown_long_entity_does_not_runaway() {
let text = collected("a &thisnameisreallylongandnotrealatall but continues here.");
assert!(text.starts_with("a &thisname"), "got: {:?}", text);
assert!(text.contains("continues here"), "got: {:?}", text);
}