use super::commands::process_line;
use super::environments::{process_list, process_table, process_table_with_caption};
use super::metadata::extract_metadata_from_line;
use super::utilities::{collect_environment, extract_braced, extract_env_name};
use crate::types::{Metadata, Table};
pub struct LatexParser<'a> {
source: &'a str,
metadata: Metadata,
tables: Vec<Table>,
output: String,
}
impl<'a> LatexParser<'a> {
pub fn new(source: &'a str) -> Self {
Self {
source,
metadata: Metadata::default(),
tables: Vec::new(),
output: String::new(),
}
}
pub fn parse(&mut self) -> (String, Metadata, Vec<Table>) {
let lines: Vec<&str> = self.source.lines().collect();
let mut in_document = false;
let mut skip_until_end = None::<String>;
let mut i = 0;
let is_plain_tex = self.source.contains("\\bye") && !self.source.contains("\\begin{document}");
if is_plain_tex {
in_document = true;
}
while i < lines.len() {
let line = lines[i];
let trimmed = line.trim();
if let Some(ref env) = skip_until_end {
if trimmed.contains(&format!("\\end{{{}}}", env)) {
skip_until_end = None;
}
i += 1;
continue;
}
if is_plain_tex && trimmed.contains("\\bye") {
break;
}
if !in_document && !is_plain_tex {
extract_metadata_from_line(trimmed, &mut self.metadata);
}
if !is_plain_tex && trimmed.contains("\\begin{document}") {
in_document = true;
if trimmed.contains("\\end{document}") {
self.process_single_line_document(trimmed);
break;
}
i += 1;
continue;
}
if !is_plain_tex && trimmed.contains("\\end{document}") {
break;
}
if in_document {
if self.process_environments(&lines, trimmed, &mut i, &mut skip_until_end) {
continue;
}
self.process_sections_and_content(trimmed, &lines, &mut i);
}
i += 1;
}
let content = self.output.trim().to_string();
(content, self.metadata.clone(), self.tables.clone())
}
fn process_single_line_document(&mut self, trimmed: &str) {
let Some(begin_pos) = trimmed.find("\\begin{document}") else {
return;
};
let Some(end_pos) = trimmed.find("\\end{document}") else {
return;
};
let content_between = trimmed[begin_pos + 16..end_pos].trim();
if !content_between.is_empty() {
if content_between.starts_with("\\section{") {
if let Some(title) = extract_braced(content_between, "section") {
self.output.push_str(&format!("\n# {}\n\n", title));
}
} else {
let processed = process_line(content_between);
if !processed.is_empty() {
self.output.push_str(&processed);
self.output.push('\n');
}
}
}
}
fn process_environments(
&mut self,
lines: &[&str],
trimmed: &str,
i: &mut usize,
skip_until_end: &mut Option<String>,
) -> bool {
if !trimmed.contains("\\begin{") {
return false;
}
let Some(env_name) = extract_env_name(trimmed) else {
return false;
};
match env_name.as_str() {
"itemize" | "enumerate" | "description" => {
let (env_content, new_i) = collect_environment(lines, *i, &env_name);
process_list(&env_content, &env_name, &mut self.output);
*i = new_i;
true
}
"tabular" => {
let (env_content, new_i) = collect_environment(lines, *i, "tabular");
process_table(&env_content, &mut self.output, &mut self.tables);
*i = new_i;
true
}
"table" => {
let (env_content, new_i) = collect_environment(lines, *i, "table");
process_table_with_caption(&env_content, &mut self.output, &mut self.tables);
*i = new_i;
true
}
"equation" | "align" | "gather" | "multline" => {
let (env_content, new_i) = collect_environment(lines, *i, &env_name);
self.output.push_str("$$\\begin{");
self.output.push_str(&env_name);
self.output.push_str("}\n");
self.output.push_str(&env_content);
self.output.push_str("\\end{");
self.output.push_str(&env_name);
self.output.push_str("}$$\n\n");
*i = new_i;
true
}
_ => {
*skip_until_end = Some(env_name);
false
}
}
}
fn process_sections_and_content(&mut self, trimmed: &str, lines: &[&str], i: &mut usize) {
if trimmed.starts_with("\\section{") {
if let Some(title) = extract_braced(trimmed, "section") {
self.output.push_str(&format!("\n# {}\n\n", title));
}
} else if trimmed.starts_with("\\subsection{") {
if let Some(title) = extract_braced(trimmed, "subsection") {
self.output.push_str(&format!("## {}\n\n", title));
}
} else if trimmed.starts_with("\\subsubsection{") {
if let Some(title) = extract_braced(trimmed, "subsubsection") {
self.output.push_str(&format!("### {}\n\n", title));
}
} else if trimmed.starts_with("\\[") {
self.process_display_math(trimmed, lines, i);
} else if !trimmed.is_empty() && !trimmed.starts_with("%") {
let processed = process_line(trimmed);
if !processed.is_empty() {
self.output.push_str(&processed);
self.output.push('\n');
}
}
}
fn process_display_math(&mut self, trimmed: &str, lines: &[&str], i: &mut usize) {
let mut math_content = trimmed.to_string();
if !trimmed.contains("\\]") {
*i += 1;
while *i < lines.len() {
let math_line = lines[*i];
math_content.push('\n');
math_content.push_str(math_line);
if math_line.trim().contains("\\]") {
break;
}
*i += 1;
}
}
self.output.push_str(&math_content);
self.output.push('\n');
}
}