pub mod error;
pub mod model;
pub mod parser;
pub mod reader;
use error::{HwpError, Result};
use model::HwpDocument;
use parser::{FileHeader, parse_body_text};
use reader::CfbReader;
pub fn extract_hwp_text(bytes: &[u8]) -> Result<String> {
let mut cfb = CfbReader::from_bytes(bytes)?;
let header_data = cfb.read_stream("FileHeader")?;
let header = FileHeader::parse(header_data)?;
if header.is_encrypted() {
return Err(HwpError::UnsupportedVersion(
"Password-encrypted HWP documents are not supported".to_string(),
));
}
let stream_prefix = if header.is_distribute() {
"ViewText/Section"
} else {
"BodyText/Section"
};
let mut doc = HwpDocument::default();
let mut section_idx = 0u32;
loop {
let section_name = format!("{stream_prefix}{section_idx}");
if !cfb.stream_exists(§ion_name) {
break;
}
let section_data = cfb.read_stream(§ion_name)?;
let sections = parse_body_text(section_data, header.is_compressed())?;
doc.sections.extend(sections);
section_idx += 1;
}
if doc.sections.is_empty() {
return Err(HwpError::InvalidFormat(
"No BodyText sections found in HWP document".to_string(),
));
}
Ok(doc.extract_text())
}