#![allow(clippy::while_let_on_iterator)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum Alignment {
#[default]
Default,
Left,
Center,
Right,
}
impl Alignment {
pub fn from_latex_char(c: char) -> Self {
match c {
'l' => Alignment::Left,
'c' => Alignment::Center,
'r' => Alignment::Right,
'p' | 'm' | 'b' | 'X' => Alignment::Left, _ => Alignment::Default,
}
}
pub fn to_latex_char(&self) -> char {
match self {
Alignment::Left | Alignment::Default => 'l',
Alignment::Center => 'c',
Alignment::Right => 'r',
}
}
pub fn to_typst(&self) -> &'static str {
match self {
Alignment::Left | Alignment::Default => "left",
Alignment::Center => "center",
Alignment::Right => "right",
}
}
}
#[derive(Debug, Clone, PartialEq, Default)]
pub enum ColWidth {
#[default]
Auto,
Fixed(f64),
Percent(f64),
}
#[derive(Debug, Clone, Default)]
pub struct ColSpec {
pub alignment: Alignment,
pub width: ColWidth,
pub has_left_border: bool,
pub has_right_border: bool,
}
#[derive(Debug, Clone)]
pub struct Cell {
pub content: String,
pub colspan: u32,
pub rowspan: u32,
pub alignment: Option<Alignment>,
}
impl Cell {
pub fn new(content: String) -> Self {
Self {
content,
colspan: 1,
rowspan: 1,
alignment: None,
}
}
pub fn with_span(content: String, colspan: u32, rowspan: u32) -> Self {
Self {
content,
colspan,
rowspan,
alignment: None,
}
}
pub fn is_empty(&self) -> bool {
self.content.trim().is_empty()
}
}
impl Default for Cell {
fn default() -> Self {
Self::new(String::new())
}
}
#[derive(Debug, Clone, Default)]
pub struct Row {
pub cells: Vec<Cell>,
pub has_bottom_border: bool,
}
impl Row {
pub fn new() -> Self {
Self::default()
}
pub fn push(&mut self, cell: Cell) {
self.cells.push(cell);
}
pub fn is_empty(&self) -> bool {
self.cells.is_empty() || self.cells.iter().all(|c| c.is_empty())
}
}
#[derive(Debug, Clone, Default)]
pub struct Caption {
pub short: Option<String>,
pub long: String,
}
#[derive(Debug, Clone)]
pub struct Table {
pub colspecs: Vec<ColSpec>,
pub header: Vec<Row>,
pub body: Vec<Row>,
pub footer: Vec<Row>,
pub caption: Option<Caption>,
pub label: Option<String>,
pub has_top_border: bool,
}
impl Table {
pub fn new(num_cols: usize) -> Self {
Self {
colspecs: vec![ColSpec::default(); num_cols],
header: Vec::new(),
body: Vec::new(),
footer: Vec::new(),
caption: None,
label: None,
has_top_border: false,
}
}
pub fn num_cols(&self) -> usize {
self.colspecs.len()
}
pub fn push_header(&mut self, row: Row) {
self.header.push(row);
}
pub fn push_body(&mut self, row: Row) {
self.body.push(row);
}
pub fn push_footer(&mut self, row: Row) {
self.footer.push(row);
}
}
pub fn parse_latex_table(input: &str) -> Option<Table> {
let is_tabular = input.contains("\\begin{tabular}");
let is_longtable = input.contains("\\begin{longtable}");
let is_tabularx = input.contains("\\begin{tabularx}");
if !is_tabular && !is_longtable && !is_tabularx {
return None;
}
let colspecs = extract_colspecs(input)?;
let mut table = Table::new(colspecs.len());
table.colspecs = colspecs;
if let Some(caption) = extract_caption(input) {
table.caption = Some(caption);
}
if let Some(label) = extract_label(input) {
table.label = Some(label);
}
let content = extract_table_content(input);
let rows = parse_rows(&content, table.num_cols());
let mut in_header = true;
for row in rows {
if in_header && (row.has_bottom_border || table.header.is_empty()) {
table.push_header(row);
in_header = false;
} else {
table.push_body(row);
}
}
Some(table)
}
fn extract_colspecs(input: &str) -> Option<Vec<ColSpec>> {
let begin_pattern = if input.contains("\\begin{tabularx}") {
"\\begin{tabularx}"
} else if input.contains("\\begin{longtable}") {
"\\begin{longtable}"
} else {
"\\begin{tabular}"
};
let start = input.find(begin_pattern)? + begin_pattern.len();
let rest = &input[start..];
let rest = if begin_pattern == "\\begin{tabularx}" {
skip_braced_arg(rest)
} else {
rest.trim_start()
};
if !rest.starts_with('{') {
return None;
}
let end = find_matching_brace(rest)?;
let spec_str = &rest[1..end];
let mut colspecs: Vec<ColSpec> = Vec::new();
let mut has_left_border = false;
let mut chars = spec_str.chars().peekable();
while let Some(c) = chars.next() {
match c {
'|' => {
if colspecs.is_empty() {
has_left_border = true;
} else if let Some(last) = colspecs.last_mut() {
last.has_right_border = true;
}
}
'l' | 'c' | 'r' => {
let spec = ColSpec {
alignment: Alignment::from_latex_char(c),
has_left_border,
..Default::default()
};
has_left_border = false;
colspecs.push(spec);
}
'p' | 'm' | 'b' => {
if chars.peek() == Some(&'{') {
skip_braced_content(&mut chars);
}
let spec = ColSpec {
alignment: Alignment::Left,
has_left_border,
..Default::default()
};
has_left_border = false;
colspecs.push(spec);
}
'X' => {
let spec = ColSpec {
alignment: Alignment::Left,
width: ColWidth::Fixed(1.0), has_left_border,
..Default::default()
};
has_left_border = false;
colspecs.push(spec);
}
'*' => {
if chars.peek() == Some(&'{') {
if let Some(count) = extract_repeat_count(&mut chars) {
if let Some(repeat_spec) = extract_repeat_spec(&mut chars) {
for _ in 0..count {
for rc in repeat_spec.chars() {
if rc == '|' {
if colspecs.is_empty() {
has_left_border = true;
} else if let Some(last) = colspecs.last_mut() {
last.has_right_border = true;
}
} else if "lcr".contains(rc) {
let spec = ColSpec {
alignment: Alignment::from_latex_char(rc),
has_left_border,
..Default::default()
};
has_left_border = false;
colspecs.push(spec);
}
}
}
}
}
}
}
'@' | '>' | '<' | '!' => {
if chars.peek() == Some(&'{') {
skip_braced_content(&mut chars);
}
}
_ => {}
}
}
if colspecs.is_empty() {
None
} else {
Some(colspecs)
}
}
fn skip_braced_arg(s: &str) -> &str {
let s = s.trim_start();
if !s.starts_with('{') {
return s;
}
if let Some(end) = find_matching_brace(s) {
&s[end + 1..]
} else {
s
}
}
fn find_matching_brace(s: &str) -> Option<usize> {
let mut depth = 0;
for (i, c) in s.char_indices() {
match c {
'{' => depth += 1,
'}' => {
depth -= 1;
if depth == 0 {
return Some(i);
}
}
_ => {}
}
}
None
}
fn skip_braced_content(chars: &mut std::iter::Peekable<std::str::Chars>) {
if chars.peek() != Some(&'{') {
return;
}
chars.next(); let mut depth = 1;
while let Some(c) = chars.next() {
match c {
'{' => depth += 1,
'}' => {
depth -= 1;
if depth == 0 {
break;
}
}
_ => {}
}
}
}
fn extract_repeat_count(chars: &mut std::iter::Peekable<std::str::Chars>) -> Option<usize> {
if chars.peek() != Some(&'{') {
return None;
}
chars.next();
let mut num_str = String::new();
while let Some(&c) = chars.peek() {
if c == '}' {
chars.next();
break;
}
num_str.push(c);
chars.next();
}
num_str.trim().parse().ok()
}
fn extract_repeat_spec(chars: &mut std::iter::Peekable<std::str::Chars>) -> Option<String> {
if chars.peek() != Some(&'{') {
return None;
}
chars.next();
let mut spec = String::new();
let mut depth = 1;
while let Some(c) = chars.next() {
match c {
'{' => {
depth += 1;
spec.push(c);
}
'}' => {
depth -= 1;
if depth == 0 {
break;
}
spec.push(c);
}
_ => spec.push(c),
}
}
Some(spec)
}
fn extract_caption(input: &str) -> Option<Caption> {
if let Some(start) = input.find("\\caption") {
let rest = &input[start + "\\caption".len()..];
let (short, rest) = if rest.trim_start().starts_with('[') {
let trimmed = rest.trim_start();
if let Some(end) = trimmed.find(']') {
(Some(trimmed[1..end].to_string()), &trimmed[end + 1..])
} else {
(None, rest)
}
} else {
(None, rest)
};
let rest = rest.trim_start();
if rest.starts_with('{') {
if let Some(end) = find_matching_brace(rest) {
return Some(Caption {
short,
long: rest[1..end].to_string(),
});
}
}
}
None
}
fn extract_label(input: &str) -> Option<String> {
if let Some(start) = input.find("\\label{") {
let rest = &input[start + "\\label{".len()..];
if let Some(end) = rest.find('}') {
return Some(rest[..end].to_string());
}
}
None
}
fn extract_table_content(input: &str) -> String {
let env_patterns = [
("\\begin{tabular}", "\\end{tabular}"),
("\\begin{tabularx}", "\\end{tabularx}"),
("\\begin{longtable}", "\\end{longtable}"),
];
for (begin, end) in env_patterns {
if let Some(begin_pos) = input.find(begin) {
let after_begin = &input[begin_pos + begin.len()..];
let after_spec = if let Some(brace_start) = after_begin.find('{') {
let from_brace = &after_begin[brace_start..];
if let Some(brace_end) = find_matching_brace(from_brace) {
&after_begin[brace_start + brace_end + 1..]
} else {
after_begin
}
} else {
after_begin
};
if let Some(end_pos) = after_spec.find(end) {
return after_spec[..end_pos].to_string();
}
}
}
input.to_string()
}
fn parse_rows(content: &str, _num_cols: usize) -> Vec<Row> {
let mut rows: Vec<Row> = Vec::new();
let mut current_row = Row::new();
let mut current_cell = String::new();
let mut in_brace = 0;
let lines: Vec<&str> = content.lines().collect();
for line in lines {
let line = line.trim();
if line.starts_with("\\hline")
|| line.starts_with("\\toprule")
|| line.starts_with("\\midrule")
|| line.starts_with("\\bottomrule")
|| line.starts_with("\\cmidrule")
{
if !current_row.is_empty() {
current_row.has_bottom_border = true;
} else if let Some(last) = rows.last_mut() {
last.has_bottom_border = true;
}
continue;
}
if line.starts_with("\\endhead") || line.starts_with("\\endfirsthead") {
continue;
}
for c in line.chars() {
match c {
'{' => {
in_brace += 1;
current_cell.push(c);
}
'}' => {
in_brace -= 1;
current_cell.push(c);
}
'&' if in_brace == 0 => {
let cell = parse_cell(current_cell.trim());
current_row.push(cell);
current_cell.clear();
}
_ => {
current_cell.push(c);
}
}
}
if line.ends_with("\\\\") || line.contains("\\\\") {
let cell_content = current_cell
.trim()
.trim_end_matches("\\\\")
.trim_end_matches('\\')
.trim();
if !cell_content.is_empty() || !current_row.cells.is_empty() {
let cell = parse_cell(cell_content);
current_row.push(cell);
}
if !current_row.is_empty() {
rows.push(current_row);
}
current_row = Row::new();
current_cell.clear();
}
}
if !current_cell.trim().is_empty() {
let cell = parse_cell(current_cell.trim());
current_row.push(cell);
}
if !current_row.is_empty() {
rows.push(current_row);
}
rows
}
fn parse_cell(content: &str) -> Cell {
let content = content.trim();
if content.starts_with("\\multicolumn{") {
return parse_multicolumn(content);
}
if content.starts_with("\\multirow{") {
return parse_multirow(content);
}
Cell::new(content.to_string())
}
fn parse_multicolumn(content: &str) -> Cell {
let rest = &content["\\multicolumn{".len()..];
let colspan = if let Some(end) = rest.find('}') {
rest[..end].parse().unwrap_or(1)
} else {
return Cell::new(content.to_string());
};
let rest = &rest[rest.find('}').unwrap_or(0) + 1..];
let rest = rest.trim_start();
let alignment = if rest.starts_with('{') {
if let Some(end) = find_matching_brace(rest) {
let align_str = &rest[1..end];
Some(Alignment::from_latex_char(
align_str
.chars()
.find(|c| "lcr".contains(*c))
.unwrap_or('c'),
))
} else {
None
}
} else {
None
};
let rest = if rest.starts_with('{') {
if let Some(end) = find_matching_brace(rest) {
&rest[end + 1..]
} else {
rest
}
} else {
rest
};
let cell_content = if rest.trim_start().starts_with('{') {
let trimmed = rest.trim_start();
if let Some(end) = find_matching_brace(trimmed) {
trimmed[1..end].to_string()
} else {
rest.to_string()
}
} else {
rest.to_string()
};
let mut cell = Cell::with_span(cell_content, colspan, 1);
cell.alignment = alignment;
cell
}
fn parse_multirow(content: &str) -> Cell {
let rest = &content["\\multirow{".len()..];
let rowspan = if let Some(end) = rest.find('}') {
rest[..end].parse().unwrap_or(1)
} else {
return Cell::new(content.to_string());
};
let rest = &rest[rest.find('}').unwrap_or(0) + 1..];
let rest = if rest.trim_start().starts_with('{') {
skip_braced_arg(rest)
} else {
rest
};
let cell_content = if rest.trim_start().starts_with('{') {
let trimmed = rest.trim_start();
if let Some(end) = find_matching_brace(trimmed) {
trimmed[1..end].to_string()
} else {
rest.to_string()
}
} else {
rest.to_string()
};
Cell::with_span(cell_content, 1, rowspan)
}
pub fn parse_typst_table(input: &str) -> Option<Table> {
if !input.contains("table(") && !input.contains("#table(") {
return None;
}
let num_cols = extract_typst_columns(input).unwrap_or(2);
let mut table = Table::new(num_cols);
if let Some(cells) = extract_typst_cells(input) {
let mut current_row = Row::new();
for (i, cell_content) in cells.iter().enumerate() {
current_row.push(Cell::new(cell_content.clone()));
if (i + 1) % num_cols == 0 {
table.push_body(current_row);
current_row = Row::new();
}
}
if !current_row.is_empty() {
table.push_body(current_row);
}
}
Some(table)
}
fn extract_typst_columns(input: &str) -> Option<usize> {
if let Some(start) = input.find("columns:") {
let rest = &input[start + "columns:".len()..];
let rest = rest.trim_start();
if let Some(c) = rest.chars().next() {
if c.is_ascii_digit() {
let num_str: String = rest.chars().take_while(|c| c.is_ascii_digit()).collect();
return num_str.parse().ok();
}
if c == '(' {
let mut count = 0;
let mut depth = 0;
for c in rest.chars() {
match c {
'(' => depth += 1,
')' => {
depth -= 1;
if depth == 0 {
count += 1;
break;
}
}
',' if depth == 1 => count += 1,
_ => {}
}
}
return Some(count);
}
}
}
None
}
fn extract_typst_cells(input: &str) -> Option<Vec<String>> {
let start = input.find("table(")? + "table(".len();
let rest = &input[start..];
let mut depth = 1;
let mut end = 0;
for (i, c) in rest.char_indices() {
match c {
'(' | '[' | '{' => depth += 1,
')' | ']' | '}' => {
depth -= 1;
if depth == 0 {
end = i;
break;
}
}
_ => {}
}
}
let content = &rest[..end];
let mut cells = Vec::new();
let mut i = 0;
let chars: Vec<char> = content.chars().collect();
while i < chars.len() {
if chars[i] == '[' {
let mut cell = String::new();
let mut bracket_depth = 1;
i += 1;
while i < chars.len() && bracket_depth > 0 {
if chars[i] == '[' {
bracket_depth += 1;
} else if chars[i] == ']' {
bracket_depth -= 1;
if bracket_depth == 0 {
break;
}
}
cell.push(chars[i]);
i += 1;
}
cells.push(cell.trim().to_string());
}
i += 1;
}
if cells.is_empty() {
None
} else {
Some(cells)
}
}
pub fn table_to_latex(table: &Table) -> String {
let mut output = String::new();
if table.caption.is_some() {
output.push_str("\\begin{table}[htbp]\n");
output.push_str("\\centering\n");
}
let col_spec: String = table
.colspecs
.iter()
.map(|spec| {
let mut s = String::new();
if spec.has_left_border {
s.push('|');
}
s.push(spec.alignment.to_latex_char());
if spec.has_right_border {
s.push('|');
}
s
})
.collect();
output.push_str(&format!("\\begin{{tabular}}{{{}}}\n", col_spec));
if table.has_top_border {
output.push_str("\\hline\n");
}
for row in &table.header {
output.push_str(&row_to_latex(row));
if row.has_bottom_border {
output.push_str("\\hline\n");
}
}
for row in &table.body {
output.push_str(&row_to_latex(row));
if row.has_bottom_border {
output.push_str("\\hline\n");
}
}
for row in &table.footer {
output.push_str(&row_to_latex(row));
if row.has_bottom_border {
output.push_str("\\hline\n");
}
}
output.push_str("\\end{tabular}\n");
if let Some(ref caption) = table.caption {
if let Some(ref short) = caption.short {
output.push_str(&format!("\\caption[{}]{{{}}}\n", short, caption.long));
} else {
output.push_str(&format!("\\caption{{{}}}\n", caption.long));
}
}
if let Some(ref label) = table.label {
output.push_str(&format!("\\label{{{}}}\n", label));
}
if table.caption.is_some() {
output.push_str("\\end{table}\n");
}
output
}
fn row_to_latex(row: &Row) -> String {
let cells: Vec<String> = row.cells.iter().map(cell_to_latex).collect();
format!("{} \\\\\n", cells.join(" & "))
}
fn cell_to_latex(cell: &Cell) -> String {
if cell.colspan > 1 {
let align = cell.alignment.unwrap_or(Alignment::Center);
format!(
"\\multicolumn{{{}}}{{{}}}{{{}}}",
cell.colspan,
align.to_latex_char(),
cell.content
)
} else if cell.rowspan > 1 {
format!("\\multirow{{{}}}{{*}}{{{}}}", cell.rowspan, cell.content)
} else {
cell.content.clone()
}
}
pub fn table_to_typst(table: &Table) -> String {
let mut output = String::new();
if table.caption.is_some() {
output.push_str("#figure(\n");
}
output.push_str(" table(\n");
let _cols = table.num_cols();
let widths: Vec<String> = table
.colspecs
.iter()
.map(|spec| match spec.width {
ColWidth::Auto => "auto".to_string(),
ColWidth::Fixed(w) => format!("{}%", w * 100.0),
ColWidth::Percent(p) => format!("{}%", p),
})
.collect();
output.push_str(&format!(" columns: ({}),\n", widths.join(", ")));
let aligns: Vec<&str> = table
.colspecs
.iter()
.map(|spec| spec.alignment.to_typst())
.collect();
output.push_str(&format!(" align: ({}),\n", aligns.join(", ")));
if table.has_top_border
|| table
.colspecs
.iter()
.any(|s| s.has_left_border || s.has_right_border)
{
output.push_str(" stroke: 0.5pt,\n");
}
if !table.header.is_empty() {
output.push_str(" table.header(\n");
for row in &table.header {
output.push_str(&row_to_typst(row));
}
output.push_str(" ),\n");
}
for row in &table.body {
output.push_str(&row_to_typst(row));
}
if !table.footer.is_empty() {
output.push_str(" table.footer(\n");
for row in &table.footer {
output.push_str(&row_to_typst(row));
}
output.push_str(" ),\n");
}
output.push_str(" )");
if let Some(ref caption) = table.caption {
output.push_str(",\n");
output.push_str(&format!(" caption: [{}]\n", caption.long));
output.push(')');
if let Some(ref label) = table.label {
output.push_str(&format!(" <{}>", label));
}
}
output.push('\n');
output
}
fn row_to_typst(row: &Row) -> String {
let cells: Vec<String> = row.cells.iter().map(cell_to_typst).collect();
format!(" {},\n", cells.join(", "))
}
fn cell_to_typst(cell: &Cell) -> String {
if cell.colspan > 1 || cell.rowspan > 1 {
let mut args = Vec::new();
if cell.colspan > 1 {
args.push(format!("colspan: {}", cell.colspan));
}
if cell.rowspan > 1 {
args.push(format!("rowspan: {}", cell.rowspan));
}
format!("table.cell({})[{}]", args.join(", "), cell.content)
} else {
format!("[{}]", cell.content)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_simple_table() {
let input = r#"
\begin{tabular}{|c|c|c|}
\hline
A & B & C \\
\hline
1 & 2 & 3 \\
4 & 5 & 6 \\
\hline
\end{tabular}
"#;
let table = parse_latex_table(input).unwrap();
assert_eq!(table.num_cols(), 3);
assert!(!table.header.is_empty());
}
#[test]
fn test_colspec_parsing() {
let specs = extract_colspecs("\\begin{tabular}{|l|c|r|}").unwrap();
assert_eq!(specs.len(), 3);
assert_eq!(specs[0].alignment, Alignment::Left);
assert_eq!(specs[1].alignment, Alignment::Center);
assert_eq!(specs[2].alignment, Alignment::Right);
}
#[test]
fn test_multicolumn() {
let cell = parse_multicolumn("\\multicolumn{3}{c}{Merged}");
assert_eq!(cell.colspan, 3);
assert_eq!(cell.alignment, Some(Alignment::Center));
}
#[test]
fn test_table_to_latex() {
let mut table = Table::new(2);
table.colspecs[0].alignment = Alignment::Left;
table.colspecs[1].alignment = Alignment::Right;
let mut row = Row::new();
row.push(Cell::new("A".to_string()));
row.push(Cell::new("B".to_string()));
table.push_body(row);
let latex = table_to_latex(&table);
assert!(latex.contains("\\begin{tabular}{lr}"));
assert!(latex.contains("A & B"));
}
#[test]
fn test_table_to_typst() {
let mut table = Table::new(2);
let mut row = Row::new();
row.push(Cell::new("A".to_string()));
row.push(Cell::new("B".to_string()));
table.push_body(row);
let typst = table_to_typst(&table);
assert!(typst.contains("table("));
assert!(typst.contains("columns:"));
assert!(typst.contains("[A]"));
}
#[test]
fn test_caption_extraction() {
let input = r#"\caption[Short]{Long caption}"#;
let caption = extract_caption(input).unwrap();
assert_eq!(caption.short, Some("Short".to_string()));
assert_eq!(caption.long, "Long caption");
}
#[test]
fn test_colspan_cell() {
let cell = Cell::with_span("Merged".to_string(), 3, 1);
let latex = cell_to_latex(&cell);
assert!(latex.contains("\\multicolumn{3}"));
let typst = cell_to_typst(&cell);
assert!(typst.contains("colspan: 3"));
}
#[test]
fn test_rowspan_cell() {
let cell = Cell::with_span("Vertical".to_string(), 1, 2);
let latex = cell_to_latex(&cell);
assert!(latex.contains("\\multirow{2}"));
let typst = cell_to_typst(&cell);
assert!(typst.contains("rowspan: 2"));
}
}