#[cfg(not(target_arch = "wasm32"))]
use crate::detect::{detect_format_from_path, FormatType};
#[cfg(not(target_arch = "wasm32"))]
use crate::error::Result;
use crate::model::Metadata;
use crate::Error;
use std::collections::HashMap;
#[cfg(not(target_arch = "wasm32"))]
use std::ops::ControlFlow;
#[cfg(not(target_arch = "wasm32"))]
use std::path::Path;
pub enum ParseEvent<'doc> {
DocumentStart {
metadata: &'doc Metadata,
section_count: usize,
image_map: HashMap<String, String>,
},
SectionParsed(&'doc crate::model::Section),
SectionFailed {
index: usize,
error: Error,
},
DocumentEnd,
ResourceExtracted {
name: String,
data: Vec<u8>,
},
}
#[derive(Debug, Clone)]
pub struct SectionStreamOptions {
pub lenient: bool,
pub extract_resources: bool,
}
impl Default for SectionStreamOptions {
fn default() -> Self {
Self {
lenient: false,
extract_resources: true,
}
}
}
#[cfg(not(target_arch = "wasm32"))]
pub fn parse_file_streaming<F>(
path: impl AsRef<Path>,
opts: SectionStreamOptions,
f: F,
) -> Result<()>
where
F: FnMut(ParseEvent<'_>) -> ControlFlow<()>,
{
let path = path.as_ref();
let format = detect_format_from_path(path)?;
match format {
#[cfg(feature = "pptx")]
FormatType::Pptx => {
let mut parser = crate::pptx::PptxParser::open(path)?;
parser.for_each_section(opts, f)
}
#[cfg(feature = "xlsx")]
FormatType::Xlsx => {
let mut parser = crate::xlsx::XlsxParser::open(path)?;
parser.for_each_section(opts, f)
}
#[cfg(feature = "docx")]
FormatType::Docx => Err(Error::UnsupportedFormat(
"streaming not yet supported for DOCX — use parse_file() instead".into(),
)),
#[allow(unreachable_patterns)]
_ => Err(Error::UnsupportedFormat(format!("{:?}", format))),
}
}