use dmc::engine::compile::Compiler;
use dmc_diagnostic::Code;
use duck_diagnostic::DiagnosticEngine;
fn compile_default(src: &str) -> String {
let mut diag: DiagnosticEngine<Code> = DiagnosticEngine::new();
let out = Compiler::compile(src, &mut diag);
out.html
}
fn lcg(seed: &mut u64) -> u32 {
*seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
(*seed >> 33) as u32
}
#[test]
fn output_never_contains_null_byte() {
let mut seed = 0xA11C0u64;
for _ in 0..400 {
let n = (lcg(&mut seed) as usize % 200) + 1;
let alphabet = b"abc \n\t<>[](){}\\\0\x01\x07\x1F";
let mut s = String::new();
for _ in 0..n {
s.push(alphabet[(lcg(&mut seed) as usize) % alphabet.len()] as char);
}
let html = compile_default(&s);
assert!(!html.contains('\0'), "null byte leaked in output for src={s:?}");
}
}
#[test]
fn equivalent_grammar_forms_produce_equivalent_html() {
let pairs: &[(&str, &str)] = &[
("# Hello\n", "Hello\n=====\n"),
("## Hi\n", "Hi\n----\n"),
("`x`\n", "`x`\n"),
("---\n", "***\n"),
("---\n", "___\n"),
("*hi*\n", "_hi_\n"),
("**hi**\n", "__hi__\n"),
];
for (a, b) in pairs {
let ha = compile_default(a).replace([' ', '\n'], "");
let hb = compile_default(b).replace([' ', '\n'], "");
if ha == hb {
continue;
}
if ha.to_lowercase().contains("<h1>") && hb.to_lowercase().contains("<h1>") {
continue;
}
if ha.to_lowercase().contains("<h2>") && hb.to_lowercase().contains("<h2>") {
continue;
}
if ha.to_lowercase().contains("<hr") && hb.to_lowercase().contains("<hr") {
continue;
}
if ha.to_lowercase().contains("<em>") && hb.to_lowercase().contains("<em>") {
continue;
}
if ha.to_lowercase().contains("<strong>") && hb.to_lowercase().contains("<strong>") {
continue;
}
panic!("equivalent forms diverged:\n a={a:?} -> {ha}\n b={b:?} -> {hb}");
}
}
#[test]
fn block_concatenation_is_additive_for_simple_paragraphs() {
let parts = ["alpha\n", "beta\n", "gamma\n"];
let mut joined = String::new();
for p in parts {
joined.push_str(p);
joined.push('\n');
}
let html_joined = compile_default(&joined);
for p in parts {
let h = compile_default(p);
let body = h.trim();
let lookfor = body.trim_start_matches("<p>").trim_end_matches("</p>").to_string();
assert!(html_joined.contains(&lookfor), "joined html missing {lookfor:?}: {html_joined}");
}
}
#[test]
fn very_large_input_terminates() {
let unit = "a paragraph with **bold** and `code` and [link](url).\n\n";
let s = unit.repeat(5000);
let _ = compile_default(&s);
}
#[test]
fn combinatorial_block_inline_corpus() {
let blocks: &[&str] = &[
"{INLINE}\n",
"# {INLINE}\n",
"## {INLINE}\n",
"> {INLINE}\n",
"- {INLINE}\n",
"1. {INLINE}\n",
"| {INLINE} |\n|-|\n",
"```text\n{INLINE}\n```\n",
" {INLINE}\n",
"<Comp>{INLINE}</Comp>\n",
];
let inlines: &[&str] = &[
"plain",
"*emph*",
"**bold**",
"***both***",
"~~strike~~",
"`code`",
"[link](url)",
"",
"<http://x.com>",
"https://x.com",
"html: <span>x</span>",
"entity: &",
"mdx: {1+1}",
"esc: \\*",
"mix: **a `b` c**",
"long: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
"unicode: 🦆 café",
"footnote ref: [^x]",
"math: $x_y^z$",
"lots: *a* **b** `c` [d](e) ",
];
let mut count = 0;
for b in blocks {
for i in inlines {
let src = b.replace("{INLINE}", i);
let _ = compile_default(&src);
count += 1;
}
}
println!("combinatorial: {count} inputs");
assert!(count >= 200, "expected ≥200 combinations, got {count}");
}
#[test]
fn ascii_byte_insertion_corpus() {
let seed = "hello world test paragraph end.";
for b in 0u8..=127 {
let c = b as char;
let s1 = format!("{c}{seed}\n");
let s2 = format!("{}{c}{}\n", &seed[..seed.len() / 2], &seed[seed.len() / 2..]);
let s3 = format!("{seed}{c}\n");
let _ = compile_default(&s1);
let _ = compile_default(&s2);
let _ = compile_default(&s3);
}
}
#[test]
fn container_boundaries_compile() {
let containers = ["> ", "- ", "1. ", " "];
let bodies = ["text", "**bold** text", "# heading", "```\ncode\n```", "| t |\n|-|"];
for c in containers {
for b in bodies {
let lines = b.lines().collect::<Vec<_>>();
let mut s = String::new();
for l in &lines {
s.push_str(c);
s.push_str(l);
s.push('\n');
}
let _ = compile_default(&s);
let _ = compile_default(&format!("{s}\n"));
let _ = compile_default(&format!("\n{s}"));
let _ = compile_default(&format!("para\n{s}"));
let _ = compile_default(&format!("para\n\n{s}"));
}
}
}
#[test]
fn frontmatter_corner_cases() {
let cases = &[
"---\n---\nbody\n",
"---\ntitle: x\n---\n",
"---\ntitle: x\n---\nbody\n",
"---\ntitle: x\n---\n\nbody\n",
"---\n title: x\n---\nbody\n",
"---\ntitle: \"x\"\n---\nbody\n",
"---\ntitle: 'x'\n---\nbody\n",
"---\ntitle: |\n a\n b\n---\nbody\n",
"---\ntags:\n - a\n - b\n---\nbody\n",
"---\nempty:\n---\n",
"---\n---\n---\n",
"before\n---\ntitle: x\n---\nbody\n",
"---\nUNTERMINATED\n",
"---\ntitle: 'unclosed string\n---\n",
];
for s in cases {
let _ = compile_default(s);
}
}
#[test]
fn output_size_is_roughly_linear() {
let inputs = [
"plain text\n",
"**bold** _italic_ `code`\n",
"- a\n- b\n- c\n",
"| h |\n|-|\n| 1 |\n| 2 |\n",
"[a](url)\n",
"<Comp prop=\"x\" />\n",
"# h\n## h2\n### h3\n",
];
for input in inputs {
let big = input.repeat(1000);
let out = compile_default(&big);
let ratio = out.len() as f64 / big.len() as f64;
assert!(ratio < 50.0, "output blowup x{ratio:.1} for repeated {input:?} (in={} out={})", big.len(), out.len());
}
}
#[test]
fn grammar_walk_400_steps() {
let productions = [
"para",
"\n# heading",
"\n- list item",
"\n> quote",
"\n```\ncode\n```",
"\n[link](url)",
"\n",
" **bold**",
" _italic_",
" `code`",
" ~~strike~~",
" {1+2}",
" <Comp/>",
" <http://x>",
" &",
" \\*esc*",
" [^fn]",
"\n| a | b |\n|-|-|\n| 1 | 2 |\n",
" $math$",
" 🦆",
];
let mut seed = 0xCAB4u64;
for trial in 0..400 {
let mut s = String::new();
let n = (lcg(&mut seed) as usize % 30) + 1;
for _ in 0..n {
s.push_str(productions[(lcg(&mut seed) as usize) % productions.len()]);
}
s.push('\n');
let _ = compile_default(&s);
if trial % 50 == 0 {
println!("walk #{trial:04}");
}
}
}
#[test]
fn block_context_reset_after_blank_line() {
let constructs = ["# h\n", "- l\n", "> q\n", "```\nc\n```\n", "| t |\n|-|\n"];
for a in constructs {
for b in constructs {
let s = format!("{a}\n{b}");
let _ = compile_default(&s);
}
}
}
#[test]
fn math_inside_emphasis_does_not_break_delimiters() {
let cases =
&["*$x_y$*", "**$a_b^c$**", "*$$\nx_y\n$$*", "_$x$_", "**$\\frac{a}{b}$**", "*a $x_y$ b*", "*$x$ and $y$*"];
for s in cases {
let _ = compile_default(s);
}
}
#[test]
fn mixed_line_endings_normalize() {
let cases = &[
"a\nb\n",
"a\rb\r",
"a\r\nb\r\n",
"a\nb\rc\r\nd",
"a\n\n\rb",
"a\r\n\nb",
"\r\r\r",
"\n\r\n\r",
"para1\r\nlazy\r\ncontinuation",
];
for s in cases {
let _ = compile_default(s);
}
}
#[test]
fn html_mismatch_does_not_crash() {
let cases = &[
"<a><b></a></b>",
"<a><b></c>",
"</orphan>",
"<a></a></a></a>",
"<a><a><a></a>",
"<svg><x/></svg>",
"<details><summary>s</summary>body</details>",
"<dl><dt>k</dt><dd>v</dd></dl>",
"<a href=\"a><b\">x</a>",
"<p><div>nested-block-in-inline</div></p>",
];
for s in cases {
let _ = compile_default(s);
}
}
#[test]
fn footnote_cycles_terminate() {
let cases = &[
"[^a]\n\n[^a]: refers to [^a]\n",
"[^a]\n\n[^a]: see [^b]\n\n[^b]: see [^a]\n",
"[^a] [^b] [^c]\n\n[^a]: 1 [^b]\n[^b]: 2 [^c]\n[^c]: 3 [^a]\n",
"[^x]: also [^y]\n[^y]: cycles to [^x] and [^y]\n[^x]\n",
];
for s in cases {
let _ = compile_default(s);
}
}
#[test]
fn dangerous_schemes_blocked_in_links_and_images() {
let schemes = [
"javascript:",
"JaVaScRiPt:",
"java\tscript:",
"java\nscript:",
"vbscript:",
"data:text/html,",
"data:image/svg+xml,",
"file:///etc/passwd",
];
for sch in schemes {
let link = format!("[x]({sch}alert(1))");
let img = format!(")");
let sch_lower = sch.to_ascii_lowercase();
let sch_stripped: String =
sch_lower.chars().filter(|c| !c.is_whitespace() && (*c as u32) > 0x1f && *c != '\u{7f}').collect();
for src in [&link, &img] {
let html = compile_default(src);
let low = html.to_ascii_lowercase();
let href_needle = format!("href=\"{sch_stripped}");
let src_needle = format!("src=\"{sch_stripped}");
assert!(!low.contains(&href_needle), "scheme {sch:?} leaked into href= for src={src:?}\n html={html}");
assert!(!low.contains(&src_needle), "scheme {sch:?} leaked into src= for src={src:?}\n html={html}");
}
}
}