use serde_derive::Deserialize;
use std::collections::HashMap;
use std::iter::FromIterator;
use super::{ParseError, Parser, ParserConfig, Printer};
use crate::document::ast::Node;
use crate::document::code::CodeBlock;
use crate::document::text::TextBlock;
use crate::document::Document;
use crate::util::try_collect::TryCollectExt;
#[derive(Clone, Deserialize, Debug)]
pub struct TexParser {
pub code_environment: String,
pub default_language: Option<String>,
pub comment_start: String,
pub interpolation_start: String,
pub interpolation_end: String,
pub macro_start: String,
pub macro_end: String,
}
impl Default for TexParser {
fn default() -> Self {
Self {
default_language: None,
code_environment: String::from("code"),
comment_start: String::from("//"),
interpolation_start: String::from("@{"),
interpolation_end: String::from("}"),
macro_start: String::from("==> "),
macro_end: String::from("."),
}
}
}
impl TexParser {
pub fn for_language(language: String) -> Self {
Self {
default_language: Some(language),
..Self::default()
}
}
pub fn default_language(&self, language: Option<String>) -> Self {
if let Some(language) = language {
Self {
default_language: Some(language),
..self.clone()
}
} else {
self.clone()
}
}
}
impl ParserConfig for TexParser {
fn comment_start(&self) -> &str {
&self.comment_start
}
fn interpolation_start(&self) -> &str {
&self.interpolation_start
}
fn interpolation_end(&self) -> &str {
&self.interpolation_end
}
fn macro_start(&self) -> &str {
&self.macro_start
}
fn macro_end(&self) -> &str {
&self.macro_end
}
}
impl TexParser {
fn parse_arguments<'a>(
&self,
arg_string: &'a str,
) -> Result<Option<HashMap<&'a str, &'a str>>, TexErrorKind> {
if arg_string.starts_with('[') {
if !arg_string.ends_with(']') {
return Err(TexErrorKind::UnclosedArgumentList);
}
let mut args = HashMap::new();
let mut arg_string = &arg_string[1..arg_string.len() - 1];
while !arg_string.is_empty() {
let equal = match arg_string.find('=') {
Some(index) => index,
None => return Err(TexErrorKind::MissingValueForArgument),
};
let (name, rest) = arg_string.split_at(equal);
let rest = &rest[1..];
let (value, rest) = if rest.starts_with('{') {
let rest = &rest[1..];
let value_len = rest
.chars()
.scan((1, '{'), |state, ch| {
let previous = std::mem::replace(&mut state.1, ch);
if previous == '\\' {
return Some(ch);
} else if ch == '{' {
state.0 += 1;
} else if ch == '}' {
state.0 -= 1;
}
if state.0 == 0 {
None
} else {
Some(ch)
}
})
.collect::<String>()
.len();
let (value, rest) = rest[..value_len].split_at(value_len);
if !rest.is_empty() && !rest[1..].starts_with(',') {
return Err(TexErrorKind::InvalidArgumentList);
}
(value, if rest.is_empty() { rest } else { &rest[1..] })
} else {
let comma = match rest.find(',') {
Some(index) => index,
None => rest.len(),
};
let (value, rest) = rest.split_at(comma);
(value, if rest.is_empty() { rest } else { &rest[1..] })
};
args.insert(name.trim(), value.trim());
arg_string = rest;
}
Ok(Some(args))
} else {
Ok(None)
}
}
}
impl Parser for TexParser {
type Error = TexError;
fn parse<'a>(&self, input: &'a str) -> Result<Document<'a>, Self::Error> {
struct State<'a> {
node: Node<'a>,
}
enum Parse<'a> {
Incomplete,
Complete(Node<'a>),
Error(TexError),
}
let env_start = format!("\\begin{{{}}}", self.code_environment);
let env_end = format!("\\end{{{}}}", self.code_environment);
let mut state = State {
node: Node::Text(TextBlock::new()),
};
let mut document = input
.lines()
.enumerate()
.scan(&mut state, |state, (line_number, line)| {
match &mut state.node {
Node::Code(code_block) => {
if !line.starts_with(code_block.indent) {
return Some(Parse::Error(TexError::Single {
line_number,
kind: TexErrorKind::IncorrectIndentation,
}));
}
let line = &line[code_block.indent.len()..];
if line.starts_with(&env_end) {
let node =
std::mem::replace(&mut state.node, Node::Text(TextBlock::new()));
Some(Parse::Complete(node))
} else {
let line = match self.parse_line(line_number, line) {
Ok(line) => line,
Err(error) => {
return Some(Parse::Error(TexError::Single {
line_number,
kind: error.into(),
}))
}
};
code_block.add_line(line);
Some(Parse::Incomplete)
}
}
Node::Text(text_block) => {
if line.trim_start().starts_with(&env_start) {
let indent_length = line.find(&env_start).unwrap();
let (indent, rest) = line.split_at(indent_length);
let rest = &rest[env_start.len()..].trim();
let args = match self.parse_arguments(rest) {
Ok(args) => args,
Err(kind) => {
return Some(Parse::Error(TexError::Single {
line_number,
kind,
}))
}
};
let mut code_block = CodeBlock::new().indented(indent);
if let Some(args) = args {
if let Some(name) = args.get("name") {
let (name, vars) = match self.parse_name(name) {
Ok(name) => name,
Err(error) => {
return Some(Parse::Error(TexError::Single {
line_number,
kind: error.into(),
}))
}
};
code_block = code_block.named(name, vars);
}
code_block = match args.get("language") {
Some(language) => code_block.in_language(language.to_string()),
None => match &self.default_language {
Some(language) => {
code_block.in_language(language.to_string())
}
None => code_block,
},
};
}
let node = std::mem::replace(&mut state.node, Node::Code(code_block));
Some(Parse::Complete(node))
} else {
text_block.add_line(line);
Some(Parse::Incomplete)
}
}
}
})
.filter_map(|parse| match parse {
Parse::Incomplete => None,
Parse::Error(error) => Some(Err(error)),
Parse::Complete(node) => Some(Ok(node)),
})
.try_collect::<_, _, Vec<_>, TexError>()?;
document.push(state.node);
Ok(Document::from_iter(document))
}
}
impl Printer for TexParser {
fn print_text_block<'a>(&self, block: &TextBlock<'a>) -> String {
format!("{}\n", block.to_string())
}
fn print_code_block<'a>(&self, block: &CodeBlock<'a>) -> String {
let mut output = format!("\\begin{{{}}}", self.code_environment);
if block.language.is_some() || block.name.is_some() {
output.push('[');
}
if let Some(language) = &block.language {
output.push_str("language=");
output.push_str(language);
output.push(',');
}
if let Some(name) = &block.name {
output.push_str("name={");
output.push_str(
&name
.replace("{", "\\{")
.replace("}", "\\}")
.replace("_", "\\_"),
);
output.push('}');
}
if block.language.is_some() || block.name.is_some() {
output.push(']');
}
output.push('\n');
for line in &block.source {
output.push_str(&self.print_line(&line, true));
output.push('\n');
}
output.push_str(&format!("\\end{{{}}}\n", self.code_environment));
output
}
}
#[derive(Debug)]
pub enum TexErrorKind {
IncorrectIndentation,
MissingValueForArgument,
UnclosedArgumentList,
InvalidArgumentList,
Parse(ParseError),
}
#[derive(Debug)]
pub enum TexError {
#[doc(hidden)]
Single {
line_number: usize,
kind: TexErrorKind,
},
#[doc(hidden)]
Multi(Vec<TexError>),
}
impl std::fmt::Display for TexError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TexError::Multi(errors) => {
for error in errors {
writeln!(f, "{}", error)?;
}
Ok(())
}
TexError::Single { line_number, kind } => {
writeln!(f, "{:?} (line {})", kind, line_number)
}
}
}
}
impl std::error::Error for TexError {}
impl FromIterator<TexError> for TexError {
fn from_iter<I: IntoIterator<Item = TexError>>(iter: I) -> Self {
TexError::Multi(iter.into_iter().collect())
}
}
impl From<Vec<TexError>> for TexError {
fn from(multi: Vec<TexError>) -> Self {
TexError::Multi(multi)
}
}
impl From<ParseError> for TexErrorKind {
fn from(error: ParseError) -> Self {
TexErrorKind::Parse(error)
}
}