use crate::ooxml::error::{OoxmlError, Result};
use quick_xml::events::Event;
use quick_xml::Reader;
#[derive(Debug, Clone)]
pub struct TextFrame {
xml_bytes: Vec<u8>,
}
impl TextFrame {
pub(crate) fn from_xml(xml_bytes: &[u8]) -> Result<Self> {
Ok(Self {
xml_bytes: xml_bytes.to_vec(),
})
}
pub fn text(&self) -> Result<String> {
let mut reader = Reader::from_reader(&self.xml_bytes[..]);
reader.config_mut().trim_text(true);
let mut text = String::new();
let mut in_text_element = false;
let mut buf = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(e)) => {
if e.local_name().as_ref() == b"t" {
in_text_element = true;
}
}
Ok(Event::Text(e)) if in_text_element => {
let t = std::str::from_utf8(e.as_ref())
.map_err(|e| OoxmlError::Xml(e.to_string()))?;
if !text.is_empty() && !text.ends_with('\n') {
text.push('\n');
}
text.push_str(t);
}
Ok(Event::End(e)) => {
if e.local_name().as_ref() == b"t" {
in_text_element = false;
}
}
Ok(Event::Eof) => break,
Err(e) => return Err(OoxmlError::Xml(e.to_string())),
_ => {}
}
buf.clear();
}
Ok(text)
}
pub fn paragraphs(&self) -> Result<Vec<Paragraph>> {
let mut reader = Reader::from_reader(&self.xml_bytes[..]);
reader.config_mut().trim_text(true);
let mut paragraphs = Vec::new();
let mut current_para_xml = Vec::new();
let mut in_para = false;
let mut depth = 0;
let mut buf = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(e)) => {
if e.local_name().as_ref() == b"p" && !in_para {
in_para = true;
depth = 1;
current_para_xml.clear();
current_para_xml.extend_from_slice(b"<a:p>");
} else if in_para {
depth += 1;
current_para_xml.push(b'<');
current_para_xml.extend_from_slice(e.name().as_ref());
for attr in e.attributes().flatten() {
current_para_xml.push(b' ');
current_para_xml.extend_from_slice(attr.key.as_ref());
current_para_xml.extend_from_slice(b"=\"");
current_para_xml.extend_from_slice(&attr.value);
current_para_xml.push(b'"');
}
current_para_xml.push(b'>');
}
}
Ok(Event::End(e)) => {
if in_para {
current_para_xml.extend_from_slice(b"</");
current_para_xml.extend_from_slice(e.name().as_ref());
current_para_xml.push(b'>');
depth -= 1;
if depth == 0 && e.local_name().as_ref() == b"p" {
paragraphs.push(Paragraph::new(current_para_xml.clone()));
in_para = false;
}
}
}
Ok(Event::Text(e)) if in_para => {
current_para_xml.extend_from_slice(e.as_ref());
}
Ok(Event::Empty(e)) if in_para => {
current_para_xml.push(b'<');
current_para_xml.extend_from_slice(e.name().as_ref());
for attr in e.attributes().flatten() {
current_para_xml.push(b' ');
current_para_xml.extend_from_slice(attr.key.as_ref());
current_para_xml.extend_from_slice(b"=\"");
current_para_xml.extend_from_slice(&attr.value);
current_para_xml.push(b'"');
}
current_para_xml.extend_from_slice(b"/>");
}
Ok(Event::Eof) => break,
Err(e) => return Err(OoxmlError::Xml(e.to_string())),
_ => {}
}
buf.clear();
}
Ok(paragraphs)
}
pub fn omml_formulas(&self) -> Result<Vec<String>> {
let mut formulas = Vec::new();
for para in self.paragraphs()? {
if let Ok(text) = para.text() {
if text.contains("oMath") || text.contains("m:oMath") {
formulas.push(text);
}
}
}
Ok(formulas)
}
}
#[derive(Debug, Clone)]
pub struct Paragraph {
xml_bytes: Vec<u8>,
}
impl Paragraph {
pub fn new(xml_bytes: Vec<u8>) -> Self {
Self { xml_bytes }
}
pub fn text(&self) -> Result<String> {
let mut reader = Reader::from_reader(&self.xml_bytes[..]);
reader.config_mut().trim_text(true);
let mut text = String::new();
let mut in_text_element = false;
let mut buf = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(e)) => {
if e.local_name().as_ref() == b"t" {
in_text_element = true;
}
}
Ok(Event::Text(e)) if in_text_element => {
let t = std::str::from_utf8(e.as_ref())
.map_err(|e| OoxmlError::Xml(e.to_string()))?;
text.push_str(t);
}
Ok(Event::End(e)) => {
if e.local_name().as_ref() == b"t" {
in_text_element = false;
}
}
Ok(Event::Eof) => break,
Err(e) => return Err(OoxmlError::Xml(e.to_string())),
_ => {}
}
buf.clear();
}
Ok(text)
}
}