use super::extraction::{
DataExtraction, DefinitionData, FootnoteLineData, ListItemData, ParagraphData, SessionData,
TableCellData, TableData, TableRowData, VerbatimBlockData, VerbatimGroupData,
};
use super::location::{
aggregate_locations, byte_range_to_ast_range, compute_location_from_locations, default_location,
};
use crate::lex::ast::elements::blank_line_group::BlankLineGroup;
use crate::lex::ast::elements::typed_content::{
ContentElement, ListContent, SessionContent, VerbatimContent,
};
use crate::lex::ast::elements::verbatim::VerbatimGroupItem;
use crate::lex::ast::elements::SequenceMarker;
use crate::lex::ast::range::SourceLocation;
use crate::lex::ast::traits::AstNode;
use crate::lex::ast::{
Annotation, Data, Definition, Label, List, ListItem, Paragraph, Range, Session, Table,
TableCell, TableCellAlignment, TableRow, TextContent, TextLine, Verbatim,
};
use crate::lex::parsing::ContentItem;
use crate::lex::token::Token;
use std::ops::Range as ByteRange;
pub(super) fn paragraph_node(data: ParagraphData, source_location: &SourceLocation) -> ContentItem {
let lines: Vec<ContentItem> = data
.text_lines
.into_iter()
.map(|(text, byte_range)| {
let location = byte_range_to_ast_range(byte_range, source_location);
let text_content = TextContent::from_string(text, Some(location.clone()));
let text_line = TextLine::new(text_content).at(location);
ContentItem::TextLine(text_line)
})
.collect();
let overall_location = byte_range_to_ast_range(data.overall_byte_range, source_location);
ContentItem::Paragraph(Paragraph {
lines,
annotations: Vec::new(),
location: overall_location,
})
}
pub(in crate::lex::building) fn session_node(
session_data: SessionData,
content: Vec<SessionContent>,
source_location: &SourceLocation,
) -> ContentItem {
let title_location = source_location.byte_range_to_ast_range(&session_data.title_byte_range);
let title_text = session_data.title_text;
let marker = if let Some(marker_data) = session_data.marker {
let marker_location = source_location.byte_range_to_ast_range(&marker_data.byte_range);
Some(SequenceMarker::new(
marker_data.style,
marker_data.separator,
marker_data.form,
TextContent::from_string(marker_data.text, Some(marker_location.clone())),
marker_location,
))
} else {
None
};
if let Some(ref m) = marker {
debug_assert!(
m.is_valid_for_session(),
"Invalid session marker: {m:?}. Sessions don't support Plain (-) markers."
);
}
let child_items: Vec<ContentItem> = content.iter().cloned().map(ContentItem::from).collect();
let location = aggregate_locations(title_location.clone(), &child_items);
let title = TextContent::from_string(title_text, Some(title_location));
let mut session = Session::new(title, content).at(location);
session.marker = marker;
ContentItem::Session(session)
}
pub(super) fn definition_node(
data: DefinitionData,
content: Vec<ContentElement>,
source_location: &SourceLocation,
) -> ContentItem {
let subject_location = byte_range_to_ast_range(data.subject_byte_range, source_location);
let subject = TextContent::from_string(data.subject_text, Some(subject_location.clone()));
let child_items: Vec<ContentItem> = content.iter().cloned().map(ContentItem::from).collect();
let location = aggregate_locations(subject_location, &child_items);
let definition = Definition::new(subject, content).at(location);
ContentItem::Definition(definition)
}
pub(super) fn list_node(items: Vec<ListItem>) -> ContentItem {
let item_locations: Vec<Range> = items.iter().map(|item| item.location.clone()).collect();
let marker = items.first().and_then(|first_item| {
use crate::lex::ast::elements::SequenceMarker;
let marker_text = first_item.marker.as_string();
let marker_location = first_item.marker.location.clone();
SequenceMarker::parse(marker_text, marker_location)
});
let typed_items: Vec<ListContent> = items.into_iter().map(ListContent::ListItem).collect();
let location = if item_locations.is_empty() {
Range::default()
} else {
compute_location_from_locations(&item_locations)
};
ContentItem::List(List {
items: crate::lex::ast::elements::container::ListContainer::from_typed(typed_items),
marker,
annotations: Vec::new(),
location,
})
}
pub(super) fn list_item_node(
data: ListItemData,
content: Vec<ContentElement>,
source_location: &SourceLocation,
) -> ListItem {
let marker_location = byte_range_to_ast_range(data.marker_byte_range, source_location);
let marker = TextContent::from_string(data.marker_text, Some(marker_location.clone()));
let body_location = byte_range_to_ast_range(data.body_byte_range, source_location);
let body = TextContent::from_string(data.body_text, Some(body_location.clone()));
let child_items: Vec<ContentItem> = content.iter().cloned().map(ContentItem::from).collect();
let mut location_sources = vec![marker_location, body_location];
location_sources.extend(child_items.iter().map(|item| item.range().clone()));
let location = compute_location_from_locations(&location_sources);
ListItem::with_text_content(marker, body, content).at(location)
}
pub(super) fn data_node(data: DataExtraction, source_location: &SourceLocation) -> Data {
use crate::lex::ast::Parameter;
let label_location = byte_range_to_ast_range(data.label_byte_range, source_location);
let label = Label::new(data.label_text).at(label_location.clone());
let mut parameter_ranges = vec![label_location.clone()];
let parameters: Vec<Parameter> = data
.parameters
.into_iter()
.map(|param_data| {
let location = byte_range_to_ast_range(param_data.overall_byte_range, source_location);
parameter_ranges.push(location.clone());
Parameter {
key: param_data.key_text,
value: param_data.value_text.unwrap_or_default(),
location,
}
})
.collect();
let location = compute_location_from_locations(¶meter_ranges);
Data::new(label, parameters).at(location)
}
pub(super) fn annotation_node(data: Data, content: Vec<ContentElement>) -> ContentItem {
let child_items: Vec<ContentItem> = content.iter().cloned().map(ContentItem::from).collect();
let location = aggregate_locations(data.location.clone(), &child_items);
let annotation = Annotation::from_data(data, content).at(location);
ContentItem::Annotation(annotation)
}
pub(super) fn verbatim_block_node(
data: VerbatimBlockData,
closing_data: Data,
source_location: &SourceLocation,
) -> ContentItem {
if data.groups.is_empty() {
panic!("Verbatim blocks must contain at least one subject/content pair");
}
let mode = data.mode;
let mut data_groups = data.groups.into_iter();
let (first_subject, first_children, mut location_sources) =
build_verbatim_group(data_groups.next().unwrap(), source_location);
let mut additional_groups: Vec<VerbatimGroupItem> = Vec::new();
for group_data in data_groups {
let (subject, children, mut group_locations) =
build_verbatim_group(group_data, source_location);
location_sources.append(&mut group_locations);
additional_groups.push(VerbatimGroupItem::new(subject, children));
}
location_sources.push(closing_data.location.clone());
let location = compute_location_from_locations(&location_sources);
let verbatim_block = Verbatim::new(first_subject, first_children, closing_data, mode)
.with_additional_groups(additional_groups)
.at(location);
ContentItem::VerbatimBlock(Box::new(verbatim_block))
}
fn build_verbatim_group(
group_data: VerbatimGroupData,
source_location: &SourceLocation,
) -> (TextContent, Vec<VerbatimContent>, Vec<Range>) {
use crate::lex::ast::elements::VerbatimLine;
let subject_location = byte_range_to_ast_range(group_data.subject_byte_range, source_location);
let subject = TextContent::from_string(group_data.subject_text, Some(subject_location.clone()));
let mut children: Vec<VerbatimContent> = Vec::new();
let mut locations: Vec<Range> = vec![subject_location];
for (line_text, line_byte_range) in group_data.content_lines {
let line_location = byte_range_to_ast_range(line_byte_range, source_location);
locations.push(line_location.clone());
let line_content = TextContent::from_string(line_text, Some(line_location.clone()));
let verbatim_line = VerbatimLine::from_text_content(line_content).at(line_location);
children.push(VerbatimContent::VerbatimLine(verbatim_line));
}
(subject, children, locations)
}
pub(super) fn table_node(
data: TableData,
alignments: &[TableCellAlignment],
source_location: &SourceLocation,
) -> ContentItem {
let subject_location = byte_range_to_ast_range(data.subject_byte_range, source_location);
let subject = TextContent::from_string(data.subject_text, Some(subject_location.clone()));
let mut location_sources = vec![subject_location];
let header_rows: Vec<TableRow> = data
.header_rows
.into_iter()
.map(|row_data| {
let row = build_table_row(row_data, alignments, source_location);
location_sources.push(row.location.clone());
row
})
.collect();
let body_rows: Vec<TableRow> = data
.body_rows
.into_iter()
.map(|row_data| {
let row = build_table_row(row_data, alignments, source_location);
location_sources.push(row.location.clone());
row
})
.collect();
let footnotes = if data.footnotes.is_empty() {
None
} else {
Some(build_footnote_list(data.footnotes, source_location))
};
let location = compute_location_from_locations(&location_sources);
let mut table = Table::new(subject, header_rows, body_rows, data.mode).at(location);
if let Some(list) = footnotes {
table = table.with_footnotes(list);
}
ContentItem::Table(Box::new(table))
}
fn build_table_row(
row_data: TableRowData,
alignments: &[TableCellAlignment],
source_location: &SourceLocation,
) -> TableRow {
let row_location = byte_range_to_ast_range(row_data.byte_range, source_location);
let cells: Vec<TableCell> = row_data
.cells
.into_iter()
.enumerate()
.map(|(col_idx, cell_data)| {
build_table_cell(cell_data, col_idx, alignments, source_location)
})
.collect();
TableRow::new(cells).at(row_location)
}
fn build_table_cell(
cell_data: TableCellData,
col_idx: usize,
alignments: &[TableCellAlignment],
source_location: &SourceLocation,
) -> TableCell {
let cell_location = byte_range_to_ast_range(cell_data.byte_range, source_location);
let content = TextContent::from_string(cell_data.text, Some(cell_location.clone()));
let align = alignments
.get(col_idx)
.copied()
.unwrap_or(TableCellAlignment::None);
let mut cell = TableCell::new(content)
.with_span(cell_data.colspan, cell_data.rowspan)
.with_align(align)
.with_header(cell_data.is_header)
.at(cell_location);
if let Some(block_content) = cell_data.block_content {
cell = cell.with_children(block_content);
}
cell
}
fn build_footnote_list(footnotes: Vec<FootnoteLineData>, source_location: &SourceLocation) -> List {
let items: Vec<ListItem> = footnotes
.into_iter()
.map(|f| {
let location = byte_range_to_ast_range(f.byte_range, source_location);
ListItem::new(f.marker, f.text).at(location)
})
.collect();
List::new(items)
}
pub(super) fn blank_line_group_node(
tokens: Vec<(Token, ByteRange<usize>)>,
source_location: &SourceLocation,
) -> ContentItem {
if tokens.is_empty() {
return ContentItem::BlankLineGroup(BlankLineGroup::new(0, vec![]).at(default_location()));
}
let count = tokens
.iter()
.filter(|(token, _)| matches!(token, Token::BlankLine(_)))
.count()
.max(1);
let ast_locations: Vec<Range> = tokens
.iter()
.map(|(_, span)| byte_range_to_ast_range(span.clone(), source_location))
.collect();
let location = compute_location_from_locations(&ast_locations);
let source_tokens = tokens.into_iter().map(|(token, _)| token).collect();
ContentItem::BlankLineGroup(BlankLineGroup::new(count, source_tokens).at(location))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lex::ast::elements::typed_content::{ContentElement, SessionContent};
use crate::lex::ast::elements::verbatim::VerbatimBlockMode;
use crate::lex::ast::range::SourceLocation;
use crate::lex::ast::traits::AstNode;
use crate::lex::ast::Position;
use crate::lex::building::extraction;
#[test]
fn test_paragraph_node() {
let source = "hello";
let source_location = SourceLocation::new(source);
let data = ParagraphData {
text_lines: vec![("hello".to_string(), 0..5)],
overall_byte_range: 0..5,
};
let result = paragraph_node(data, &source_location);
match result {
ContentItem::Paragraph(para) => {
assert_eq!(para.lines.len(), 1);
assert_eq!(para.location.start, Position::new(0, 0));
assert_eq!(para.location.end, Position::new(0, 5));
}
_ => panic!("Expected Paragraph"),
}
}
#[test]
fn test_session_node() {
let source = "Session";
let source_location = SourceLocation::new(source);
let data = SessionData {
title_text: "Session".to_string(),
title_byte_range: 0..7,
marker: None,
};
let result = session_node(data, Vec::<SessionContent>::new(), &source_location);
match result {
ContentItem::Session(session) => {
assert_eq!(session.title.as_string(), "Session");
assert_eq!(session.location.start, Position::new(0, 0));
assert_eq!(session.location.end, Position::new(0, 7));
}
_ => panic!("Expected Session"),
}
}
#[test]
fn test_data_node_assigns_parameter_locations() {
let source = "note severity=high";
let source_location = SourceLocation::new(source);
let extraction = extraction::extract_data(
vec![
(Token::Text("note".to_string()), 0..4),
(Token::Whitespace(1), 4..5),
(Token::Text("severity".to_string()), 5..13),
(Token::Equals, 13..14),
(Token::Text("high".to_string()), 14..18),
],
source,
);
let data = data_node(extraction, &source_location);
assert_eq!(data.label.value, "note");
assert_eq!(data.label.location.span, 0..5);
assert_eq!(data.parameters.len(), 1);
assert_eq!(data.parameters[0].location.span, 5..18);
assert_eq!(data.location.span, 0..18);
}
#[test]
fn test_verbatim_block_node_aggregates_groups() {
let source = "Example:\n code line\nOther:\n more\n:: shell ::\n";
let source_location = SourceLocation::new(source);
fn span(haystack: &str, needle: &str) -> std::ops::Range<usize> {
let start = haystack.find(needle).expect("needle not found");
start..start + needle.len()
}
let data = VerbatimBlockData {
groups: vec![
VerbatimGroupData {
subject_text: "Example:".to_string(),
subject_byte_range: span(source, "Example:"),
content_lines: vec![("code line".to_string(), span(source, "code line"))],
},
VerbatimGroupData {
subject_text: "Other:".to_string(),
subject_byte_range: span(source, "Other:"),
content_lines: vec![("more".to_string(), span(source, "more"))],
},
],
mode: VerbatimBlockMode::Inflow,
};
let closing_span = span(source, ":: shell ::");
let closing_label_span = span(source, "shell");
let closing_label = Label::new("shell".to_string()).at(byte_range_to_ast_range(
closing_label_span,
&source_location,
));
let closing_data = Data::new(closing_label, Vec::new()).at(byte_range_to_ast_range(
closing_span.clone(),
&source_location,
));
let block = match verbatim_block_node(data, closing_data, &source_location) {
ContentItem::VerbatimBlock(block) => block,
other => panic!("Expected verbatim block, got {:?}", other.node_type()),
};
assert_eq!(block.location.span, 0..closing_span.end);
assert_eq!(
block.subject.location.as_ref().unwrap().span,
span(source, "Example:")
);
assert_eq!(block.group_len(), 2);
let mut groups = block.group();
let first = groups.next().expect("first group missing");
assert_eq!(
first.subject.location.as_ref().unwrap().span,
span(source, "Example:")
);
if let Some(ContentItem::VerbatimLine(line)) = first.children.iter().next() {
assert_eq!(line.location.span, span(source, "code line"));
} else {
panic!("expected verbatim line in first group");
}
let second = groups.next().expect("second group missing");
assert_eq!(
second.subject.location.as_ref().unwrap().span,
span(source, "Other:")
);
if let Some(ContentItem::VerbatimLine(line)) = second.children.iter().next() {
assert_eq!(line.location.span, span(source, "more"));
} else {
panic!("expected verbatim line in second group");
}
assert_eq!(block.closing_data.location.span, closing_span);
}
#[test]
fn test_session_allows_session_child() {
use crate::lex::ast::elements::Session;
let source = "Parent Session\n Nested Session\n";
let source_location = SourceLocation::new(source);
let nested_session = Session::with_title("Nested Session".to_string());
let content = vec![SessionContent::Session(nested_session)];
let data = SessionData {
title_text: "Parent Session".to_string(),
title_byte_range: 0..14,
marker: None,
};
let result = session_node(data, content, &source_location);
match result {
ContentItem::Session(session) => {
assert_eq!(session.children.len(), 1);
assert_eq!(session.title.as_string(), "Parent Session");
}
_ => panic!("Expected Session"),
}
}
#[test]
fn test_definition_allows_non_session_children() {
use crate::lex::ast::elements::Paragraph;
let source = "Test Subject:\n Some content\n";
let source_location = SourceLocation::new(source);
let para = Paragraph::from_line("Some content".to_string());
let content = vec![ContentElement::Paragraph(para)];
let data = DefinitionData {
subject_text: "Test Subject".to_string(),
subject_byte_range: 0..12,
};
let result = definition_node(data, content, &source_location);
match result {
ContentItem::Definition(def) => {
assert_eq!(def.children.len(), 1);
assert_eq!(def.subject.as_string(), "Test Subject");
}
_ => panic!("Expected Definition"),
}
}
#[test]
fn test_annotation_allows_non_session_children() {
use crate::lex::ast::elements::Paragraph;
let source = ":: note ::\n Some content\n";
let source_location = SourceLocation::new(source);
let para = Paragraph::from_line("Some content".to_string());
let content = vec![ContentElement::Paragraph(para)];
let data = DataExtraction {
label_text: "note".to_string(),
label_byte_range: 0..4,
parameters: vec![],
};
let data_node = data_node(data, &source_location);
let result = annotation_node(data_node, content);
match result {
ContentItem::Annotation(ann) => {
assert_eq!(ann.children.len(), 1);
assert_eq!(ann.data.label.value, "note");
}
_ => panic!("Expected Annotation"),
}
}
#[test]
fn test_list_item_allows_non_session_children() {
use crate::lex::ast::elements::Paragraph;
let source = "- Item\n Some content\n";
let source_location = SourceLocation::new(source);
let para = Paragraph::from_line("Item content".to_string());
let content = vec![ContentElement::Paragraph(para)];
let data = ListItemData {
marker_text: "-".to_string(),
marker_byte_range: 0..1,
body_text: "Item".to_string(),
body_byte_range: 2..6,
};
let result = list_item_node(data, content, &source_location);
assert_eq!(result.children.len(), 1);
assert_eq!(result.marker(), "-");
assert_eq!(result.text(), "Item");
}
}