use crate::lex::assembling::{AttachAnnotations, AttachRoot};
use crate::lex::parsing::ir::ParseNode;
use crate::lex::parsing::Document;
use crate::lex::token::Token;
use crate::lex::transforms::stages::{
CoreTokenization, ParseInlines, Parsing, SemanticIndentation,
};
use crate::lex::transforms::{Runnable, Transform};
use once_cell::sync::Lazy;
use std::ops::Range;
pub type TokenStream = Vec<(Token, Range<usize>)>;
pub type LexingTransform = Transform<String, TokenStream>;
pub type AstTransform = Transform<String, Document>;
pub static CORE_TOKENIZATION: Lazy<LexingTransform> =
Lazy::new(|| Transform::from_fn(Ok).then(CoreTokenization::new()));
pub static LEXING: Lazy<LexingTransform> = Lazy::new(|| {
Transform::from_fn(Ok)
.then(CoreTokenization::new())
.then(SemanticIndentation::new())
});
pub type IrTransform = Transform<String, ParseNode>;
pub static TO_IR: Lazy<IrTransform> = Lazy::new(|| Transform::from_fn(Ok).then(Parsing::new()));
pub static STRING_TO_AST: Lazy<AstTransform> = Lazy::new(|| {
Transform::from_fn(|s: String| {
run_string_to_ast(
s,
crate::lex::assembling::stages::normalize_labels::Mode::Strict,
)
})
});
pub(crate) fn parse_to_attached_root(
s: String,
) -> Result<
(
crate::lex::ast::Document,
crate::lex::anchoring::AnchoringPrepass,
),
crate::lex::transforms::TransformError,
> {
let source = if !s.is_empty() && !s.ends_with('\n') {
format!("{s}\n")
} else {
s
};
let prepass = crate::lex::anchoring::extract_reference_lines(&source);
let core_tokens = CoreTokenization::new().run(source.clone())?;
let core_tokens = prepass.filter_tokens(core_tokens);
let tokens = SemanticIndentation::new().run(core_tokens)?;
let mut mapper = crate::lex::lexing::transformations::LineTokenGroupingMapper::new();
let grouped_tokens = mapper.map(tokens);
let mut output =
crate::lex::parsing::engine::parse_from_grouped_stream(grouped_tokens, &source).map_err(
|e| crate::lex::transforms::TransformError::StageFailed {
stage: "Parser".to_string(),
message: e.to_string(),
},
)?;
output.root = ParseInlines::new().run(output.root)?;
if let Some(ref mut title) = output.title {
title.content.ensure_inline_parsed_with_anchors();
}
let doc = AttachRoot::new().run(output)?;
Ok((doc, prepass))
}
pub fn run_string_to_ast(
s: String,
label_mode: crate::lex::assembling::stages::normalize_labels::Mode,
) -> Result<crate::lex::ast::Document, crate::lex::transforms::TransformError> {
use crate::lex::assembling::stages::normalize_labels::Mode;
let (mut doc, prepass) = parse_to_attached_root(s)?;
doc = AttachAnnotations::new().run(doc)?;
let normalize = match label_mode {
Mode::Strict => crate::lex::assembling::stages::NormalizeLabels::new(),
Mode::Permissive => crate::lex::assembling::stages::NormalizeLabels::permissive(),
};
doc = normalize.run(doc)?;
doc = crate::lex::assembling::stages::ApplyTableConfig::new().run(doc)?;
doc.reference_lines = prepass.reference_lines;
doc.reference_line_diagnostics = prepass.diagnostics;
Ok(doc)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lex::token::Token;
#[test]
fn test_core_tokenization() {
let result = CORE_TOKENIZATION.run("Hello world\n".to_string()).unwrap();
assert!(!result.is_empty());
assert!(result.iter().any(|(t, _)| matches!(t, Token::Text(_))));
}
#[test]
fn test_lexing_adds_indentation() {
let source = "Session:\n Content\n".to_string();
let result = LEXING.run(source).unwrap();
assert!(result.iter().any(|(t, _)| matches!(t, Token::Indent(_))));
assert!(result.iter().any(|(t, _)| matches!(t, Token::Dedent(_))));
}
#[test]
fn test_lexing_no_indentation() {
let source = "Hello\n".to_string();
let result = LEXING.run(source).unwrap();
assert!(!result.iter().any(|(t, _)| matches!(t, Token::Indent(_))));
assert!(!result.iter().any(|(t, _)| matches!(t, Token::Dedent(_))));
}
#[test]
fn test_string_to_ast_simple() {
let result = STRING_TO_AST.run("Hello world\n".to_string()).unwrap();
assert!(!result.root.children.is_empty());
}
#[test]
fn test_string_to_ast_with_session() {
let source = "Session:\n Content here\n".to_string();
let result = STRING_TO_AST.run(source).unwrap();
assert!(!result.root.children.is_empty());
}
#[test]
fn test_string_to_ast_adds_newline() {
let result = STRING_TO_AST.run("Hello world".to_string()).unwrap();
assert!(!result.root.children.is_empty());
}
#[test]
fn test_transforms_are_reusable() {
let result1 = LEXING.run("Hello\n".to_string()).unwrap();
let result2 = LEXING.run("World\n".to_string()).unwrap();
assert!(!result1.is_empty());
assert!(!result2.is_empty());
}
}