use super::parser;
use crate::lex::building::ast_tree::{AstTreeBuilder, BuildOutput};
use crate::lex::parsing::ir::{NodeType, ParseNode};
use crate::lex::token::{to_line_container, LineContainer, Token};
use std::ops::Range as ByteRange;
use crate::lex::lexing::transformations::line_token_grouping::GroupedTokens;
pub fn parse_from_grouped_stream(
grouped_tokens: Vec<GroupedTokens>,
source: &str,
) -> Result<BuildOutput, String> {
use crate::lex::lexing::transformations::DocumentStartMarker;
let line_tokens: Vec<_> = grouped_tokens
.into_iter()
.map(GroupedTokens::into_line_token)
.collect();
let line_tokens = DocumentStartMarker::mark(line_tokens);
let tree = to_line_container::build_line_container(line_tokens);
parse_experimental_v2(tree, source)
}
pub fn parse_from_flat_tokens(
tokens: Vec<(Token, ByteRange<usize>)>,
source: &str,
) -> Result<BuildOutput, String> {
use crate::lex::lexing::transformations::LineTokenGroupingMapper;
let mut mapper = LineTokenGroupingMapper::new();
let grouped_tokens = mapper.map(tokens);
parse_from_grouped_stream(grouped_tokens, source)
}
pub fn parse_experimental_v2(tree: LineContainer, source: &str) -> Result<BuildOutput, String> {
let children = match tree {
LineContainer::Container { children, .. } => children,
LineContainer::Token(_) => {
return Err("Expected root container, found single token".to_string())
}
};
let content = parser::parse_with_declarative_grammar(children, source)?;
let root_node = ParseNode::new(NodeType::Document, vec![], content);
let builder = AstTreeBuilder::new(source);
builder.build(root_node).map_err(|e| format!("{e}"))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lex::parsing::ContentItem;
fn lex_helper(
source: &str,
) -> Result<Vec<(crate::lex::token::Token, std::ops::Range<usize>)>, String> {
let tokens = crate::lex::lexing::tokenize(source);
Ok(crate::lex::lexing::lex(tokens)?)
}
#[test]
fn test_parse_simple_paragraphs() {
let source = "Simple paragraph\n";
let tokens = lex_helper(source).expect("Failed to tokenize");
let result = parse_from_flat_tokens(tokens, source);
assert!(result.is_ok(), "Parser should succeed");
let root = result.unwrap().root;
assert!(!root.children.is_empty(), "Should have content");
assert!(matches!(root.children[0], ContentItem::Paragraph(_)));
}
#[test]
fn test_parse_definition() {
let source = "Definition:\n This is the definition content\n";
let tokens = lex_helper(source).expect("Failed to tokenize");
let result = parse_from_flat_tokens(tokens, source);
assert!(result.is_ok(), "Parser should succeed");
let root = result.unwrap().root;
let has_definition = root
.children
.iter()
.any(|item| matches!(item, ContentItem::Definition(_)));
assert!(has_definition, "Should contain Definition node");
}
#[test]
fn test_parse_session() {
let source = "Session:\n\n Session content here\n";
let tokens = lex_helper(source).expect("Failed to tokenize");
let result = parse_from_flat_tokens(tokens, source);
assert!(result.is_ok(), "Parser should succeed");
let root = result.unwrap().root;
let has_session = root
.children
.iter()
.any(|item| matches!(item, ContentItem::Session(_)));
assert!(has_session, "Should contain a Session node");
}
#[test]
fn test_parse_session_with_multiple_blank_lines() {
let source = "Title Two\n\n\n Content with two blank lines.\n";
let tokens = lex_helper(source).expect("Failed to tokenize");
let result = parse_from_flat_tokens(tokens, source);
assert!(result.is_ok(), "Parser should succeed");
let root = result.unwrap().root;
let has_session = root
.children
.iter()
.any(|item| matches!(item, ContentItem::Session(_)));
assert!(
has_session,
"Should parse as Session even with 2+ blank lines. Got: {:?}",
root.children
.iter()
.map(|c| match c {
ContentItem::Paragraph(_) => "Paragraph",
ContentItem::Session(_) => "Session",
ContentItem::BlankLineGroup(_) => "BlankLineGroup",
_ => "Other",
})
.collect::<Vec<_>>()
);
}
#[test]
fn test_parse_session_with_three_blank_lines() {
let source = "Title Three\n\n\n\n Content with three blank lines.\n";
let tokens = lex_helper(source).expect("Failed to tokenize");
let result = parse_from_flat_tokens(tokens, source);
assert!(result.is_ok(), "Parser should succeed");
let root = result.unwrap().root;
let has_session = root
.children
.iter()
.any(|item| matches!(item, ContentItem::Session(_)));
assert!(has_session, "Should parse as Session with 3 blank lines");
}
#[test]
fn test_verbatim_with_double_closing_marker() {
let source =
"Code Example:\n\n function hello() {\n return \"world\";\n }\n\n:: javascript ::\n";
let tokens = lex_helper(source).expect("Failed to tokenize");
let root = parse_from_flat_tokens(tokens, source)
.expect("Parser failed")
.root;
let has_verbatim = root
.children
.iter()
.any(|item| matches!(item, ContentItem::VerbatimBlock(_)));
assert!(
has_verbatim,
"Should contain a Verbatim block. Got: {:?}",
root.children
.iter()
.map(|c| match c {
ContentItem::Paragraph(_) => "Paragraph",
ContentItem::Session(_) => "Session",
ContentItem::VerbatimBlock(_) => "Verbatim",
ContentItem::BlankLineGroup(_) => "BlankLineGroup",
ContentItem::Annotation(_) => "Annotation",
_ => "Other",
})
.collect::<Vec<_>>()
);
}
#[test]
fn test_annotations_inside_session() {
let source = "1. Session\n\n Some content.\n\n :: note-editor :: Maybe this could be better rephrased?\n :: note.author :: Done keeping it simple\n\n More content.\n";
let tokens = lex_helper(source).expect("Failed to tokenize");
let root = parse_from_flat_tokens(tokens, source)
.expect("Parser failed")
.root;
let session = root
.children
.iter()
.find(|item| matches!(item, ContentItem::Session(_)));
assert!(session.is_some(), "Should contain a Session");
if let Some(ContentItem::Session(s)) = session {
let annotation_count = s
.children
.iter()
.filter(|item| matches!(item, ContentItem::Annotation(_)))
.count();
assert!(
annotation_count >= 2,
"Session should contain at least 2 annotations, got {}. Children: {:?}",
annotation_count,
s.children
.iter()
.map(|c| match c {
ContentItem::Paragraph(_) => "Paragraph",
ContentItem::Annotation(_) => "Annotation",
ContentItem::BlankLineGroup(_) => "BlankLineGroup",
ContentItem::Session(_) => "Session",
_ => "Other",
})
.collect::<Vec<_>>()
);
}
}
#[test]
fn test_parse_annotation() {
let source = ":: note ::\n";
let tokens = lex_helper(source).expect("Failed to tokenize");
let result = parse_from_flat_tokens(tokens, source);
assert!(result.is_ok(), "Parser should succeed");
let root = result.unwrap().root;
let has_annotation = root
.children
.iter()
.any(|item| matches!(item, ContentItem::Annotation(_)));
assert!(has_annotation, "Should contain an Annotation node");
}
#[test]
fn test_annotations_combined_trifecta() {
let source = r#"Document with annotations and trifecta
:: info ::
Paragraph before session.
1. Session with annotation inside
:: note author="system" ::
This is an annotated note within a session
- List item 1
- List item 2
Another paragraph in session.
:: warning severity=high ::
- Item in annotated warning
- Important item
Final paragraph.
"#;
let tokens = lex_helper(source).expect("Failed to tokenize");
let root = parse_from_flat_tokens(tokens, source)
.expect("Parser failed")
.root;
eprintln!("\n=== ANNOTATIONS + TRIFECTA COMBINED ===");
eprintln!("Root items count: {}", root.children.len());
for (i, item) in root.children.iter().enumerate() {
match item {
ContentItem::Paragraph(p) => {
eprintln!(" [{}] Paragraph: {} lines", i, p.lines.len())
}
ContentItem::Annotation(a) => {
eprintln!(
" [{}] Annotation: label='{}' content={} items",
i,
a.data.label.value,
a.children.len()
)
}
ContentItem::Session(s) => {
eprintln!(" [{}] Session: {} items", i, s.children.len())
}
ContentItem::List(l) => eprintln!(" [{}] List: {} items", i, l.items.len()),
_ => eprintln!(" [{i}] Other"),
}
}
let has_annotations = root
.children
.iter()
.any(|item| matches!(item, ContentItem::Annotation(_)));
let has_paragraphs = root
.children
.iter()
.any(|item| matches!(item, ContentItem::Paragraph(_)));
let has_sessions = root
.children
.iter()
.any(|item| matches!(item, ContentItem::Session(_)));
assert!(has_annotations, "Should contain annotations");
assert!(has_paragraphs, "Should contain paragraphs");
assert!(has_sessions, "Should contain sessions");
}
#[test]
fn test_parse_empty_input() {
let source = "";
let tokens = lex_helper(source).expect("Failed to tokenize");
let result = parse_from_flat_tokens(tokens, source);
assert!(result.is_ok(), "Empty input should parse successfully");
let root = result.unwrap().root;
assert_eq!(
root.children.len(),
0,
"Empty document should have no children"
);
}
#[test]
fn test_parse_only_whitespace() {
let source = " \n\n \n";
let tokens = lex_helper(source).expect("Failed to tokenize");
let result = parse_from_flat_tokens(tokens, source);
assert!(
result.is_ok(),
"Whitespace-only input should parse successfully"
);
}
#[test]
fn test_parse_incomplete_annotation_block() {
let source = r#"
:: warning ::
This is content
No closing marker
"#;
let tokens = lex_helper(source).expect("Failed to tokenize");
let result = parse_from_flat_tokens(tokens, source);
assert!(
result.is_ok(),
"Parser should handle incomplete annotations gracefully"
);
}
}