use crate::error::{Error, Result};
use cfb::CompoundFile;
use flate2::read::DeflateDecoder;
use std::cell::RefCell;
use std::io::{Cursor, Read, Seek};
use std::path::Path;
pub struct Hwp5Container {
cfb: RefCell<CompoundFile<Cursor<Vec<u8>>>>,
}
impl Hwp5Container {
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
let data = std::fs::read(path)?;
Self::from_bytes(data)
}
pub fn from_reader<R: Read + Seek>(mut reader: R) -> Result<Self> {
let mut data = Vec::new();
reader.read_to_end(&mut data)?;
Self::from_bytes(data)
}
pub fn from_bytes(data: Vec<u8>) -> Result<Self> {
let cursor = Cursor::new(data);
let cfb = CompoundFile::open(cursor)?;
Ok(Self {
cfb: RefCell::new(cfb),
})
}
pub fn read_file_header(&self) -> Result<super::FileHeader> {
let data = self.read_stream_raw("FileHeader")?;
super::FileHeader::parse(&data)
}
pub fn read_stream_raw(&self, name: &str) -> Result<Vec<u8>> {
let mut cfb = self.cfb.borrow_mut();
let mut stream = cfb
.open_stream(name)
.map_err(|_| Error::MissingComponent(name.to_string()))?;
let mut data = Vec::new();
stream.read_to_end(&mut data)?;
Ok(data)
}
pub fn read_stream_decompressed(&self, name: &str, compressed: bool) -> Result<Vec<u8>> {
let raw = self.read_stream_raw(name)?;
if compressed {
decompress_stream(&raw)
} else {
Ok(raw)
}
}
pub fn list_bodytext_sections(&self) -> Result<Vec<String>> {
let mut sections = Vec::new();
let mut index = 0;
loop {
let name = format!("BodyText/Section{}", index);
if self.read_stream_raw(&name).is_ok() {
sections.push(name);
index += 1;
} else {
break;
}
}
if sections.is_empty() {
return Err(Error::MissingComponent("BodyText".into()));
}
Ok(sections)
}
pub fn list_bindata(&self) -> Result<Vec<String>> {
let cfb = self.cfb.borrow_mut();
if !cfb.is_storage("/BinData") {
return Ok(Vec::new());
}
let mut resources = Vec::new();
for entry in cfb
.read_storage("/BinData")
.map_err(|e| Error::MissingComponent(format!("BinData: {}", e)))?
{
if entry.is_stream() {
resources.push(entry.name().to_string());
}
}
resources.sort();
Ok(resources)
}
pub fn read_bindata(&self, name: &str, compressed: bool) -> Result<Vec<u8>> {
let full_path = format!("BinData/{}", name);
self.read_stream_decompressed(&full_path, compressed)
}
pub fn stream_exists(&self, name: &str) -> bool {
self.read_stream_raw(name).is_ok()
}
pub fn read_preview_text(&self) -> Result<String> {
let data = self.read_stream_raw("PrvText")?;
decode_utf16le(&data)
}
}
fn decompress_stream(data: &[u8]) -> Result<Vec<u8>> {
let mut decoder = DeflateDecoder::new(data);
let mut output = Vec::new();
decoder
.read_to_end(&mut output)
.map_err(|e| Error::Decompression(e.to_string()))?;
Ok(output)
}
fn decode_utf16le(data: &[u8]) -> Result<String> {
if !data.len().is_multiple_of(2) {
return Err(Error::Encoding("Invalid UTF-16LE data length".into()));
}
let u16_iter = data
.chunks_exact(2)
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]));
String::from_utf16(&u16_iter.collect::<Vec<_>>()).map_err(|e| Error::Encoding(e.to_string()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_decode_utf16le() {
let data = [0x48, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F, 0x00];
assert_eq!(decode_utf16le(&data).unwrap(), "Hello");
}
#[test]
fn test_decode_utf16le_korean() {
let data = [0x48, 0xC5, 0x55, 0xB1]; assert_eq!(decode_utf16le(&data).unwrap(), "안녕");
}
}