use comrak::nodes::NodeValue;
use comrak::{parse_document, Arena, ComrakOptions};
use insta::assert_snapshot;
use lex_babel::format::Format;
use lex_babel::formats::markdown::MarkdownFormat;
use lex_core::lex::transforms::standard::STRING_TO_AST;
fn lex_to_comrak_ast<'a>(
lex_src: &str,
arena: &'a Arena<comrak::nodes::AstNode<'a>>,
) -> &'a comrak::nodes::AstNode<'a> {
let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
let options = ComrakOptions::default();
parse_document(arena, &md, &options)
}
fn collect_node_types<'a>(
node: &'a comrak::nodes::AstNode<'a>,
types: &mut std::collections::HashSet<String>,
) {
let value = &node.data.borrow().value;
let type_name = match value {
NodeValue::Document => "Document",
NodeValue::Paragraph => "Paragraph",
NodeValue::Heading(_) => "Heading",
NodeValue::List(_) => "List",
NodeValue::Item(_) => "Item",
NodeValue::CodeBlock(_) => "CodeBlock",
NodeValue::Strong => "Strong",
NodeValue::Emph => "Emph",
NodeValue::Code(_) => "Code",
NodeValue::Link(_) => "Link",
_ => "Other",
};
types.insert(type_name.to_string());
for child in node.children() {
collect_node_types(child, types);
}
}
#[test]
fn test_paragraph_simple() {
let lex_src = "This is a simple paragraph.\n";
let arena = Arena::new();
let root = lex_to_comrak_ast(lex_src, &arena);
let mut found_paragraph = false;
for child in root.children() {
if matches!(child.data.borrow().value, NodeValue::Paragraph) {
found_paragraph = true;
}
}
assert!(found_paragraph, "Should have a paragraph node");
}
#[test]
fn test_heading_and_paragraph_separation() {
let lex_src = "1. Title\n\n Body text.\n";
let arena = Arena::new();
let root = lex_to_comrak_ast(lex_src, &arena);
let mut heading_text = String::new();
let mut paragraph_text = String::new();
for child in root.children() {
match &child.data.borrow().value {
NodeValue::Heading(_) => {
for inline in child.children() {
if let NodeValue::Text(t) = &inline.data.borrow().value {
heading_text.push_str(t);
}
}
}
NodeValue::Paragraph => {
for inline in child.children() {
if let NodeValue::Text(t) = &inline.data.borrow().value {
paragraph_text.push_str(t);
}
}
}
_ => {}
}
}
assert_eq!(heading_text.trim(), "1. Title");
assert!(paragraph_text.contains("Body text."));
}
#[test]
fn test_trifecta_010_paragraphs_sessions_flat_single() {
let lex_src = std::fs::read_to_string(
"../../comms/specs/trifecta/010-paragraphs-sessions-flat-single.lex",
)
.expect("trifecta 010 file should exist");
let arena = Arena::new();
let root = lex_to_comrak_ast(&lex_src, &arena);
let mut paragraphs = 0;
let mut headings = 0;
let mut heading_levels = Vec::new();
for child in root.children() {
match &child.data.borrow().value {
NodeValue::Paragraph => paragraphs += 1,
NodeValue::Heading(h) => {
headings += 1;
heading_levels.push(h.level);
}
_ => {}
}
}
assert!(paragraphs > 0, "Should have paragraphs");
assert!(
headings >= 3,
"Should have at least 3 headings (1 title + 2 sessions), found {headings}"
);
assert!(
heading_levels.contains(&1),
"Should have H1 for document title"
);
let session_levels: Vec<_> = heading_levels
.iter()
.filter(|&&l| l != 1)
.copied()
.collect();
for level in &session_levels {
assert_eq!(*level, 2, "Root-level sessions should be h2");
}
println!(
"Trifecta 010: {paragraphs} paragraphs, {headings} headings (levels: {heading_levels:?})"
);
}
#[test]
fn test_trifecta_020_paragraphs_sessions_flat_multiple() {
let lex_src = std::fs::read_to_string(
"../../comms/specs/trifecta/020-paragraphs-sessions-flat-multiple.lex",
)
.expect("trifecta 020 file should exist");
let arena = Arena::new();
let root = lex_to_comrak_ast(&lex_src, &arena);
let heading_count = root
.children()
.filter(|child| matches!(child.data.borrow().value, NodeValue::Heading(_)))
.count();
assert!(
heading_count >= 4,
"Should have at least 4 headings (sessions), found {heading_count}"
);
let mut has_h1 = false;
let mut heading_levels_vec = Vec::new();
for child in root.children() {
if let NodeValue::Heading(h) = &child.data.borrow().value {
heading_levels_vec.push(h.level);
if h.level == 2 {
has_h1 = true;
}
}
}
assert!(has_h1, "Should have at least one h2 (root session)");
println!("Trifecta 020: Heading levels: {heading_levels_vec:?}");
println!("Trifecta 020: {heading_count} headings");
}
#[test]
fn test_trifecta_060_nesting() {
let lex_src = std::fs::read_to_string("../../comms/specs/trifecta/060-trifecta-nesting.lex")
.expect("trifecta 060 file should exist");
let arena = Arena::new();
let root = lex_to_comrak_ast(&lex_src, &arena);
let mut heading_levels = Vec::new();
let mut has_paragraphs = false;
let mut has_lists = false;
for child in root.children() {
match &child.data.borrow().value {
NodeValue::Heading(h) => heading_levels.push(h.level),
NodeValue::Paragraph => has_paragraphs = true,
NodeValue::List(_) => has_lists = true,
_ => {}
}
}
let min_level = heading_levels.iter().min().copied().unwrap_or(0);
let max_level = heading_levels.iter().max().copied().unwrap_or(0);
assert!(
min_level >= 1 && max_level >= 2,
"Should have nested heading levels (h1, h2, h3...), found range {min_level}..{max_level}"
);
assert!(has_paragraphs, "Should have paragraphs");
assert!(has_lists, "Should have lists");
assert!(!heading_levels.is_empty(), "Should have headings");
assert!(
heading_levels.contains(&2),
"Should have h2 for root sessions"
);
assert!(
heading_levels.contains(&3) || heading_levels.contains(&4),
"Should have h3 or h4 for nested sessions"
);
println!(
"Trifecta 060: {} headings with levels {:?}, {} paragraphs, {} lists",
heading_levels.len(),
heading_levels,
if has_paragraphs { "some" } else { "no" },
if has_lists { "some" } else { "no" }
);
}
#[test]
fn test_kitchensink() {
let lex_src = std::fs::read_to_string("../../comms/specs/benchmark/010-kitchensink.lex")
.expect("kitchensink file should exist");
let arena = Arena::new();
let root = lex_to_comrak_ast(&lex_src, &arena);
let mut node_types = std::collections::HashSet::new();
collect_node_types(root, &mut node_types);
assert!(
node_types.len() >= 5,
"Kitchensink should have at least 5 different node types, found: {node_types:?}"
);
println!("Kitchensink node types: {node_types:?}");
}
#[test]
fn test_kitchensink_snapshot() {
let lex_src = std::fs::read_to_string("../../comms/specs/benchmark/010-kitchensink.lex")
.expect("kitchensink file should exist");
let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert_snapshot!("kitchensink_markdown", md);
}
#[test]
fn test_reference_url_converted_to_link() {
let lex_src = "Visit [https://example.com] for more.\n";
let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(
md.contains("https://example.com"),
"URL should be present in output"
);
assert!(
!md.contains("\\[https://"),
"Should not escape brackets for URLs"
);
}
#[test]
fn test_reference_anchor_converted_to_link() {
let lex_src = "See section [#introduction] above.\n";
let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(
md.contains("(#introduction)"),
"Anchor should link to #introduction"
);
assert!(
md.contains("[section]"),
"Link text should be the implicit anchor word 'section'"
);
}
#[test]
fn test_citation_converted_to_ref_link() {
let lex_src = "According to [@smith2023], this is true.\n";
let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(md.contains("[@smith2023]"), "Citation should be a link");
assert!(
!md.contains("\\[@"),
"Should not escape brackets for citations"
);
}
#[test]
fn test_placeholder_reference_as_text() {
let lex_src = "This needs citation [TK-REF-2025-01].\n";
let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(
md.contains("\\[TK-REF-2025-01\\]"),
"Placeholder should be visible as text with escaped brackets"
);
assert!(
md.contains("TK-REF-2025-01"),
"Placeholder content should be present"
);
}
#[test]
fn test_list_with_simple_items() {
let lex_src = "- First item\n- Second item\n- Third item\n";
let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(md.contains("- First item"));
assert!(md.contains("- Second item"));
assert!(md.contains("- Third item"));
}
#[test]
fn test_list_with_multi_paragraph_items() {
let lex_src = concat!(
"- Item one with first paragraph.\n",
"\n",
" Second paragraph in item one.\n",
"\n",
"- Item two.\n"
);
let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(md.contains("- "), "Should have list markers");
assert!(md.contains("Item one"));
assert!(md.contains("Item two"));
}
#[test]
fn test_document_title_exported_as_h1() {
let lex_src = std::fs::read_to_string(
"../../comms/specs/elements/document.docs/document-01-title-explicit.lex",
)
.expect("document-01 spec file should exist");
let lex_doc = STRING_TO_AST.run(lex_src).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(
md.starts_with("# My Document Title\n"),
"Should start with H1 title heading"
);
}
#[test]
fn test_document_first_paragraph_as_title() {
let lex_src = std::fs::read_to_string(
"../../comms/specs/elements/document.docs/document-06-title-empty.lex",
)
.expect("document-06 spec file should exist");
let lex_doc = STRING_TO_AST.run(lex_src).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(
md.starts_with("# Just a paragraph with no title.\n"),
"First paragraph should become H1 title"
);
}
#[test]
fn test_document_session_only_no_h1_title() {
let lex_src = std::fs::read_to_string(
"../../comms/specs/elements/document.docs/document-05-title-session-hoist.lex",
)
.expect("document-05 spec file should exist");
let lex_doc = STRING_TO_AST.run(lex_src).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(
!md.starts_with("# "),
"Session-only doc should not have H1 title"
);
assert!(md.contains("## "), "Session should be exported as H2");
}
#[test]
fn test_numbered_session_preserves_numbering() {
let lex_src = "1. Introduction\n\n Content here.\n";
let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(
md.contains("## 1"),
"Numbered session heading must contain '1' prefix: {md}"
);
assert!(
md.contains("Introduction"),
"Heading must contain title text"
);
}
#[test]
fn test_dotted_numbering_preserved() {
let lex_src = "1. Parent\n\n 1.1. Child\n\n Content.\n";
let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(md.contains("Parent"), "Parent heading must be present");
assert!(
md.contains("1.1"),
"Dotted numbering '1.1' must be preserved in nested heading: {md}"
);
assert!(md.contains("Child"), "Child heading text must be present");
}
#[test]
fn test_unnumbered_session_no_numbering_added() {
let lex_src = "My Session Title\n\n Content.\n";
let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(
md.contains("## My Session Title"),
"Unnumbered session should export as-is: {md}"
);
}
#[test]
fn test_numbering_round_trip_lex_md_lex() {
let lex_src =
"1. First Session\n\n Paragraph one.\n\n2. Second Session\n\n Paragraph two.\n";
let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(md.contains("1"), "MD should contain numbering '1'");
assert!(md.contains("2"), "MD should contain numbering '2'");
let lex_doc2 = MarkdownFormat.parse(&md).unwrap();
let md2 = MarkdownFormat.serialize(&lex_doc2).unwrap();
assert!(
md2.contains("First Session"),
"First session must survive round-trip"
);
assert!(
md2.contains("Second Session"),
"Second session must survive round-trip"
);
}
#[test]
fn test_nested_numbering_round_trip() {
let lex_src = concat!(
"1. Top Level\n\n",
" Opening paragraph.\n\n",
" 1.1. Nested\n\n",
" Nested content.\n\n",
" 1.2. Another Nested\n\n",
" More content.\n",
);
let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
let md = MarkdownFormat.serialize(&lex_doc).unwrap();
assert!(md.contains("## "), "Top level should be H2");
assert!(md.contains("### "), "Nested sessions should be H3: {md}");
assert!(md.contains("Top Level"), "Top level title present");
assert!(md.contains("Nested"), "First nested title present");
assert!(md.contains("Another Nested"), "Second nested title present");
let lex_doc2 = MarkdownFormat.parse(&md).unwrap();
let md2 = MarkdownFormat.serialize(&lex_doc2).unwrap();
assert!(md2.contains("Top Level"), "Top level survives round-trip");
assert!(md2.contains("Nested"), "Nested survives round-trip");
}