use anyhow::Result;
pub fn rtf_to_typst(rtf_bytes: &[u8]) -> Result<String> {
match parse_structured(rtf_bytes) {
Ok(s) => Ok(s),
Err(_) => Ok(strip_to_plain_text(rtf_bytes)),
}
}
fn parse_structured(rtf_bytes: &[u8]) -> Result<String> {
use rtf_parser_tt::lexer::Lexer;
use rtf_parser_tt::parser::Parser;
let s = std::str::from_utf8(rtf_bytes)
.map_err(|e| anyhow::anyhow!("rtf: not UTF-8: {e}"))?;
let tokens = Lexer::scan(s)
.map_err(|e| anyhow::anyhow!("rtf lex: {e}"))?;
let mut parser = Parser::new(tokens);
let doc = parser
.parse()
.map_err(|e| anyhow::anyhow!("rtf parse: {e}"))?;
let mut out = String::new();
for block in &doc.body {
let painter = &block.painter;
let raw = &block.text;
if raw.is_empty() {
continue;
}
for (i, line) in raw.split('\n').enumerate() {
if i > 0 {
ensure_paragraph_break(&mut out);
}
emit_styled_line(&mut out, line, painter);
}
}
while out.ends_with("\n\n\n") {
out.pop();
}
Ok(out)
}
fn emit_styled_line(
out: &mut String,
line: &str,
painter: &rtf_parser_tt::parser::Painter,
) {
let trimmed = line.trim_end_matches('\r');
if trimmed.is_empty() {
return;
}
let bold = painter.bold;
let italic = painter.italic;
if bold {
out.push_str("**");
}
if italic {
out.push('_');
}
for c in trimmed.chars() {
match c {
'*' | '_' | '#' | '@' | '<' | '>' | '$' | '\\' => {
out.push('\\');
out.push(c);
}
_ => out.push(c),
}
}
if italic {
out.push('_');
}
if bold {
out.push_str("**");
}
}
fn ensure_paragraph_break(out: &mut String) {
while out.ends_with('\n') && out.len() > 0 {
let last_two = out.chars().rev().take(2).collect::<String>();
if last_two == "\n\n" {
return;
}
if out.ends_with('\n') {
out.push('\n');
return;
}
}
if !out.is_empty() && !out.ends_with('\n') {
out.push('\n');
}
out.push('\n');
}
fn strip_to_plain_text(rtf_bytes: &[u8]) -> String {
let s = String::from_utf8_lossy(rtf_bytes);
let mut out = String::new();
let mut chars = s.chars().peekable();
let mut depth = 0usize;
while let Some(c) = chars.next() {
match c {
'{' => depth += 1,
'}' => depth = depth.saturating_sub(1),
'\\' => {
while let Some(&next) = chars.peek() {
if next.is_ascii_alphanumeric() || next == '-' {
chars.next();
} else {
if next == ' ' {
chars.next();
}
break;
}
}
}
'\n' | '\r' => {}
_ if depth > 0 => out.push(c),
_ => {}
}
}
out.trim().to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_rtf() {
let rtf = b"{\\rtf1\\ansi}";
let out = rtf_to_typst(rtf).unwrap();
assert!(out.trim().is_empty());
}
#[test]
fn plain_paragraph() {
let rtf = b"{\\rtf1\\ansi The quick brown fox.}";
let out = rtf_to_typst(rtf).unwrap();
assert!(out.contains("The quick brown fox"));
}
#[test]
fn strip_fallback_handles_garbage() {
let rtf = b"\\xxx{\\bogus garbage \\b text \\par more}";
let out = strip_to_plain_text(rtf);
assert!(out.contains("garbage") || out.contains("text"));
}
}