use crate::lex::assembling::{AttachAnnotations, AttachRoot};
use crate::lex::parsing::ir::ParseNode;
use crate::lex::parsing::Document;
use crate::lex::token::Token;
use crate::lex::transforms::stages::{
CoreTokenization, ParseInlines, Parsing, SemanticIndentation,
};
use crate::lex::transforms::{Runnable, Transform};
use once_cell::sync::Lazy;
use std::ops::Range;
pub type TokenStream = Vec<(Token, Range<usize>)>;
pub type LexingTransform = Transform<String, TokenStream>;
pub type AstTransform = Transform<String, Document>;
pub static CORE_TOKENIZATION: Lazy<LexingTransform> =
Lazy::new(|| Transform::from_fn(Ok).then(CoreTokenization::new()));
pub static LEXING: Lazy<LexingTransform> = Lazy::new(|| {
Transform::from_fn(Ok)
.then(CoreTokenization::new())
.then(SemanticIndentation::new())
});
pub type IrTransform = Transform<String, ParseNode>;
pub static TO_IR: Lazy<IrTransform> = Lazy::new(|| Transform::from_fn(Ok).then(Parsing::new()));
pub static STRING_TO_AST: Lazy<AstTransform> = Lazy::new(|| {
Transform::from_fn(|s: String| {
let source = if !s.is_empty() && !s.ends_with('\n') {
format!("{s}\n")
} else {
s
};
let tokens = LEXING.run(source.clone())?;
let mut output = crate::lex::parsing::engine::parse_from_flat_tokens(tokens, &source)
.map_err(|e| crate::lex::transforms::TransformError::StageFailed {
stage: "Parser".to_string(),
message: e.to_string(),
})?;
output.root = ParseInlines::new().run(output.root)?;
if let Some(ref mut title) = output.title {
title.content.ensure_inline_parsed();
}
let mut doc = AttachRoot::new().run(output)?;
doc = AttachAnnotations::new().run(doc)?;
doc = crate::lex::assembling::stages::ApplyTableConfig::new().run(doc)?;
Ok(doc)
})
});
#[cfg(test)]
mod tests {
use super::*;
use crate::lex::token::Token;
#[test]
fn test_core_tokenization() {
let result = CORE_TOKENIZATION.run("Hello world\n".to_string()).unwrap();
assert!(!result.is_empty());
assert!(result.iter().any(|(t, _)| matches!(t, Token::Text(_))));
}
#[test]
fn test_lexing_adds_indentation() {
let source = "Session:\n Content\n".to_string();
let result = LEXING.run(source).unwrap();
assert!(result.iter().any(|(t, _)| matches!(t, Token::Indent(_))));
assert!(result.iter().any(|(t, _)| matches!(t, Token::Dedent(_))));
}
#[test]
fn test_lexing_no_indentation() {
let source = "Hello\n".to_string();
let result = LEXING.run(source).unwrap();
assert!(!result.iter().any(|(t, _)| matches!(t, Token::Indent(_))));
assert!(!result.iter().any(|(t, _)| matches!(t, Token::Dedent(_))));
}
#[test]
fn test_string_to_ast_simple() {
let result = STRING_TO_AST.run("Hello world\n".to_string()).unwrap();
assert!(!result.root.children.is_empty());
}
#[test]
fn test_string_to_ast_with_session() {
let source = "Session:\n Content here\n".to_string();
let result = STRING_TO_AST.run(source).unwrap();
assert!(!result.root.children.is_empty());
}
#[test]
fn test_string_to_ast_adds_newline() {
let result = STRING_TO_AST.run("Hello world".to_string()).unwrap();
assert!(!result.root.children.is_empty());
}
#[test]
fn test_transforms_are_reusable() {
let result1 = LEXING.run("Hello\n".to_string()).unwrap();
let result2 = LEXING.run("World\n".to_string()).unwrap();
assert!(!result1.is_empty());
assert!(!result2.is_empty());
}
}