use markdown2pdf::markdown::{Lexer, LexerError};
use std::time::{Duration, Instant};
const PER_INPUT_BUDGET: Duration = Duration::from_secs(2);
fn run_within_budget(name: &str, input: String) {
use std::sync::mpsc;
let (tx, rx) = mpsc::channel();
let start = Instant::now();
let input_for_thread = input.clone();
std::thread::Builder::new()
.stack_size(16 * 1024 * 1024)
.spawn(move || {
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let mut lexer = Lexer::new(input_for_thread);
lexer.parse()
}));
let _ = tx.send(result);
})
.expect("spawn stress thread");
match rx.recv_timeout(PER_INPUT_BUDGET) {
Ok(Ok(Ok(_))) => {
let elapsed = start.elapsed();
assert!(
elapsed < PER_INPUT_BUDGET,
"{}: took {:?} (over budget)",
name,
elapsed
);
}
Ok(Ok(Err(e))) => panic!("{}: lexer error {:?}", name, e),
Ok(Err(_)) => panic!("{}: panicked", name),
Err(_) => panic!("{}: timed out (>{:?})", name, PER_INPUT_BUDGET),
}
}
fn run_resilient(name: &str, input: String) {
use std::sync::mpsc;
let (tx, rx) = mpsc::channel();
let start = Instant::now();
std::thread::Builder::new()
.stack_size(16 * 1024 * 1024)
.spawn(move || {
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let mut lexer = Lexer::new(input);
lexer.parse()
}));
let _ = tx.send(result);
})
.expect("spawn stress thread");
match rx.recv_timeout(PER_INPUT_BUDGET) {
Ok(Ok(Ok(_))) | Ok(Ok(Err(_))) => {
let elapsed = start.elapsed();
assert!(
elapsed < PER_INPUT_BUDGET,
"{}: took {:?} (over budget)",
name,
elapsed
);
}
Ok(Err(_)) => panic!("{}: panicked / stack-overflowed", name),
Err(_) => panic!("{}: timed out (>{:?})", name, PER_INPUT_BUDGET),
}
}
#[test]
fn deep_nested_blockquotes() {
let depth = 24;
let input = ">".repeat(depth) + " foo\n";
run_within_budget("deep_nested_blockquotes", input);
}
#[test]
fn deep_nested_emphasis() {
let depth = 24;
let input = "*".repeat(depth) + "x" + &"*".repeat(depth) + "\n";
run_within_budget("deep_nested_emphasis", input);
}
#[test]
fn deep_nested_lists() {
let depth = 24;
let mut input = String::new();
for i in 0..depth {
input.push_str(&" ".repeat(i * 2));
input.push_str("- item\n");
}
run_within_budget("deep_nested_lists", input);
}
#[test]
fn mass_backticks() {
let n = 100_000;
let input = "`".repeat(n) + "\n";
run_within_budget("mass_backticks", input);
}
#[test]
fn mass_asterisks_line_start() {
let n = 50_000;
let input = "*".repeat(n) + "\n";
run_within_budget("mass_asterisks_line_start", input);
}
#[test]
fn mass_open_brackets_no_close() {
let n = 20_000;
let input = "[".repeat(n) + "\n";
run_resilient("mass_open_brackets_no_close", input);
}
#[test]
fn many_paragraphs() {
let n = 5_000;
let mut input = String::new();
for i in 0..n {
input.push_str(&format!("paragraph {}\n\n", i));
}
run_within_budget("many_paragraphs", input);
}
#[test]
fn mixed_line_endings() {
let input = "line1\r\nline2\nline3\rline4\r\n\r\nline5";
run_within_budget("mixed_line_endings", input.to_string());
}
#[test]
fn null_bytes_and_control_chars() {
let input = "foo\u{0000}bar\u{0001}\u{0007}\u{001F}baz\n";
run_within_budget("null_bytes_and_control_chars", input.to_string());
}
#[test]
fn leading_bom() {
let input = "\u{FEFF}# Heading\n";
run_within_budget("leading_bom", input.to_string());
}
#[test]
fn unicode_in_headings_links_codespans() {
let input = "# Iñtërnâtiônàlizætiøn 🦀\n\n[ภาษาไทย](https://example.com/ทดสอบ \"標題\")\n\n`日本語コード` and `emoji 🚀`\n";
run_within_budget("unicode_in_headings_links_codespans", input.to_string());
}
#[test]
fn surrogate_and_oob_numeric_refs() {
let input = "� � � � � �\n";
run_within_budget("surrogate_and_oob_numeric_refs", input.to_string());
}
#[test]
fn unicode_punctuation_flanking_boundaries() {
let cases = ["¡*x*!", "—*x*—", "«*x*»", "*x*。", "‘*x*’", "·*x*·"];
for c in &cases {
run_within_budget(c, format!("{}\n", c));
}
}
#[test]
fn reference_self_cycle_does_not_loop() {
let input = "[a][a]\n\n[a]: /u\n";
run_within_budget("reference_self_cycle", input.to_string());
}
#[test]
fn reference_mutual_cycle_does_not_loop() {
let input = "[a][b] [b][a]\n\n[a]: /a\n[b]: /b\n";
run_within_budget("reference_mutual_cycle", input.to_string());
}
#[test]
fn unclosed_code_fence() {
let input = "```rust\nfn main() {}\nno closer here\n";
run_within_budget("unclosed_code_fence", input.to_string());
}
#[test]
fn unterminated_emphasis_at_eof() {
let input = "**unterminated bold at end of file";
run_within_budget("unterminated_emphasis_at_eof", input.to_string());
}
#[test]
fn extremely_long_single_line() {
let n = 100_000;
let input: String = "a".repeat(n);
run_within_budget("extremely_long_single_line", input);
}
#[test]
fn many_link_definitions() {
let n = 1_000;
let mut input = String::new();
for i in 0..n {
input.push_str(&format!("[ref{}]: /u{}\n", i, i));
}
input.push_str("\n");
for i in 0..n {
input.push_str(&format!("[ref{}] ", i));
}
run_within_budget("many_link_definitions", input);
}
#[test]
fn mass_reference_definitions() {
let mut input = String::new();
for i in 0..50_000 {
input.push_str(&format!("[l{i}]: http://example/{i}\n"));
}
input.push_str("\nsee [l0].\n");
run_within_budget("mass_reference_definitions", input);
}
#[test]
fn single_megabyte_reference_label() {
let big = "a".repeat(1_000_000);
let input = format!("[{big}]: http://example\n\n[{big}]\n");
run_within_budget("single_megabyte_reference_label", input);
}
#[test]
fn mass_shortcut_reference_uses() {
let mut input = String::from("[x]: http://example\n\n");
for _ in 0..50_000 {
input.push_str("[x] ");
}
input.push('\n');
run_within_budget("mass_shortcut_reference_uses", input);
}
#[test]
fn alternating_blockquote_paragraph() {
let n = 500;
let mut input = String::new();
for i in 0..n {
input.push_str(&format!("> quote {}\n\nparagraph {}\n\n", i, i));
}
run_within_budget("alternating_blockquote_paragraph", input);
}
#[test]
fn pathological_emphasis_pairs() {
let n = 200;
let mut input = String::new();
for _ in 0..n {
input.push_str("*_");
}
for _ in 0..n {
input.push_str("_*");
}
input.push('\n');
run_within_budget("pathological_emphasis_pairs", input);
}
#[test]
fn nested_links_do_not_infinite_recurse() {
let input = "[a [b [c [d [e](u5)](u4)](u3)](u2)](u1)\n";
run_within_budget("nested_links", input.to_string());
}
#[test]
fn tab_only_long_line() {
let n = 64_000;
let input = "\t".repeat(n) + "\n";
run_within_budget("tab_only_long_line", input);
}
#[test]
fn alternating_emphasis_and_code() {
let n = 10_000;
let mut s = String::new();
for _ in 0..n {
s.push_str("*`*`");
}
s.push('\n');
run_within_budget("alternating_emphasis_and_code", s);
}
#[test]
fn mass_reference_definitions_unused() {
let n = 10_000;
let mut s = String::new();
for i in 0..n {
s.push_str(&format!("[d{}]: /u{}\n", i, i));
}
run_within_budget("mass_reference_definitions_unused", s);
}
#[test]
fn mass_image_references_unresolved() {
let n = 5_000;
let mut s = String::new();
for i in 0..n {
s.push_str(&format!("![alt{}] ", i));
}
s.push('\n');
run_within_budget("mass_image_references_unresolved", s);
}
#[test]
fn mass_links_with_titles() {
let n = 2_000;
let mut s = String::new();
for i in 0..n {
s.push_str(&format!(r#"[t{}](/u{} "title{}") "#, i, i, i));
}
s.push('\n');
run_within_budget("mass_links_with_titles", s);
}
#[test]
fn mass_tables_back_to_back() {
let n = 500;
let mut s = String::new();
for _ in 0..n {
s.push_str("| a | b |\n| --- | --- |\n| 1 | 2 |\n\n");
}
run_within_budget("mass_tables_back_to_back", s);
}
#[test]
fn mixed_cr_in_code_block() {
let input = "```\r\nbody one\rbody two\rend\r\n```\r\n";
run_within_budget("mixed_cr_in_code_block", input.to_string());
}
#[test]
fn mass_html_comment_open_no_close() {
let n = 10_000;
let input = "<!--".repeat(n) + "\n";
run_within_budget("mass_html_comment_open_no_close", input);
}
#[test]
fn deeply_nested_image_in_link() {
let mut s = String::new();
let depth = 30;
for i in 0..depth {
s.push_str(&format!("[]", i, i));
}
s.push_str("(outer)\n");
run_within_budget("deeply_nested_image_in_link", s);
}
#[test]
fn unicode_combining_marks_in_emphasis() {
let cases = [
"*á*",
"*a\u{0301}*",
"*a\u{200D}b*", "*\u{FE0F}*", "*test\u{0301}\u{0302}*",
];
for c in &cases {
run_within_budget(c, format!("{}\n", c));
}
}
#[test]
fn mass_entity_references_unknown() {
let n = 10_000;
let input = "&xyzzy;".repeat(n) + "\n";
run_within_budget("mass_entity_references_unknown", input);
}
#[test]
fn mass_setext_underlines() {
let n = 5_000;
let mut s = String::new();
for i in 0..n {
s.push_str(&format!("para {}\n=\n\n", i));
}
run_within_budget("mass_setext_underlines", s);
}
#[test]
fn mass_thematic_breaks() {
let n = 10_000;
let mut s = String::new();
for _ in 0..n {
s.push_str("---\n");
}
run_within_budget("mass_thematic_breaks", s);
}
#[test]
fn pathological_table_unbalanced_pipes() {
let n = 1_000;
let mut s = String::from("| a | b | c |\n| --- | --- | --- |\n");
for i in 0..n {
let pipes = (i % 7) + 1;
let row: Vec<String> = (0..pipes).map(|j| format!("{}.{}", i, j)).collect();
s.push_str(&format!("| {} |\n", row.join(" | ")));
}
run_within_budget("pathological_table_unbalanced_pipes", s);
}
#[test]
fn mass_html_block_div_openers() {
let n = 5_000;
let mut s = String::new();
for _ in 0..n {
s.push_str("<div>\nx\n</div>\n\n");
}
run_within_budget("mass_html_block_div_openers", s);
}
#[test]
fn mass_unclosed_raw_html_blocks() {
let n = 1_000;
let mut s = String::new();
for i in 0..n {
s.push_str(&format!("<script>\nbody {}\n</script>\n\n", i));
}
run_within_budget("mass_unclosed_raw_html_blocks", s);
}
#[test]
fn deeply_nested_html_inside_blockquote() {
let depth = 24;
let mut s = String::new();
for _ in 0..depth {
s.push_str("> ");
}
s.push_str("<div>\nbody\n</div>\n");
run_within_budget("deeply_nested_html_inside_blockquote", s);
}
#[test]
fn pathological_html_attribute_storm() {
let n = 1_000;
let mut s = String::from("<a");
for i in 0..n {
s.push_str(&format!(" attr{}=\"value{}\"", i, i));
}
s.push_str(">\nbody\n</a>\n");
run_within_budget("pathological_html_attribute_storm", s);
}
#[test]
fn html_tag_scanner_no_redos() {
run_within_budget(
"html_equals_storm",
format!("<a {}>\n", "=".repeat(50_000)),
);
run_within_budget(
"html_unquoted_value_boundaries",
format!(
"<a{}>\n",
(0..20_000).map(|i| format!(" x=y{i}")).collect::<String>()
),
);
run_within_budget(
"html_bare_lt_storm",
"<".repeat(100_000) + "\n",
);
run_within_budget(
"html_nested_tag_openers",
"<a<a<a".repeat(20_000) + "\n",
);
}
#[test]
fn mass_inline_processing_instructions() {
let n = 1_000;
let mut s = String::new();
for i in 0..n {
s.push_str(&format!("text <?php echo {}; ?> ", i));
}
s.push('\n');
run_within_budget("mass_inline_processing_instructions", s);
}
#[test]
fn mass_html_comment_short_forms() {
let n = 5_000;
let mut s = String::new();
for _ in 0..n {
s.push_str("foo <!--> bar <!---> baz ");
}
s.push('\n');
run_within_budget("mass_html_comment_short_forms", s);
}
#[test]
fn alternating_html_block_and_paragraph() {
let n = 2_000;
let mut s = String::new();
for i in 0..n {
s.push_str(&format!("paragraph {}\n<div>\nbody {}\n</div>\n\n", i, i));
}
run_within_budget("alternating_html_block_and_paragraph", s);
}
#[test]
fn pipe_heavy_non_table_is_not_quadratic() {
let mut input = String::new();
for _ in 0..30_000 {
input.push_str("| col a | col b | col c |\n\n");
}
run_within_budget("pipe_heavy_non_table", input);
}
#[test]
fn huge_single_paragraph_is_linear() {
let mut input = String::new();
for _ in 0..15_000 {
input.push_str("| col a | col b | col c |\n");
}
run_within_budget("huge_single_paragraph", input);
}
#[test]
fn deep_blockquote_far_beyond_cap_is_graceful() {
let input = ">".repeat(5_000) + " foo\n";
run_resilient("deep_blockquote_far_beyond_cap", input);
}
#[test]
fn deep_nested_list_far_beyond_cap_is_graceful() {
let mut input = String::new();
for i in 0..2_000 {
input.push_str(&" ".repeat(i * 2));
input.push_str("- item\n");
}
run_resilient("deep_nested_list_far_beyond_cap", input);
}
#[test]
fn deep_emphasis_far_beyond_cap_is_graceful() {
let input = "*".repeat(5_000) + "x" + &"*".repeat(5_000) + "\n";
run_resilient("deep_emphasis_far_beyond_cap", input);
}
#[test]
fn deep_link_nesting_far_beyond_cap_is_graceful() {
let mut input = "[".repeat(3_000);
input.push_str("text");
input.push_str(&"](u)".repeat(3_000));
input.push('\n');
run_resilient("deep_link_nesting_far_beyond_cap", input);
}
#[test]
fn nesting_cap_returns_typed_error_not_crash() {
let handle = std::thread::Builder::new()
.stack_size(16 * 1024 * 1024)
.spawn(|| {
let mut lexer = Lexer::new(">".repeat(2_000) + " foo\n");
lexer.parse()
})
.expect("spawn thread");
match handle.join().expect("thread panicked") {
Err(LexerError::UnknownToken { message, .. }) => {
assert!(
message.contains("maximum nesting depth"),
"unexpected error message: {message}"
);
}
other => panic!("expected a nesting-depth LexerError, got {:?}", other),
}
}