use crate::error::Error;
use std::io::Read;
pub const FILE_HEADER_LEN: usize = 256;
pub const HWP_SIGNATURE: &[u8] = b"HWP Document File";
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct FileHeader {
pub version: Version,
pub compressed: bool,
pub encrypted: bool,
pub distributed: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Version {
pub major: u8,
pub minor: u8,
pub build: u8,
pub revision: u8,
}
impl FileHeader {
pub fn parse(bytes: &[u8]) -> Result<Self, Error> {
if bytes.len() < FILE_HEADER_LEN {
return Err(Error::InvalidHeader(format!(
"expected {} bytes, got {}",
FILE_HEADER_LEN,
bytes.len()
)));
}
if !bytes[..HWP_SIGNATURE.len()].eq(HWP_SIGNATURE) {
return Err(Error::InvalidHeader("signature mismatch".into()));
}
let v = &bytes[32..36];
let version = Version {
revision: v[0],
build: v[1],
minor: v[2],
major: v[3],
};
let props = u32::from_le_bytes(bytes[36..40].try_into().unwrap());
Ok(FileHeader {
version,
compressed: props & 0b0001 != 0,
encrypted: props & 0b0010 != 0,
distributed: props & 0b0100 != 0,
})
}
}
pub fn read_all<R: Read>(mut r: R) -> Result<Vec<u8>, Error> {
let mut buf = Vec::new();
r.read_to_end(&mut buf).map_err(Error::Io)?;
Ok(buf)
}
use cfb::CompoundFile;
use flate2::read::DeflateDecoder;
use std::fs::File;
use std::path::Path;
pub struct Container {
cfb: CompoundFile<File>,
pub header: FileHeader,
#[allow(dead_code)]
pub(crate) aes_key: Option<[u8; 16]>,
}
impl Container {
pub fn open(path: &Path) -> Result<Self, Error> {
if !path.exists() {
return Err(Error::NotFound(path.to_path_buf()));
}
let cfb =
CompoundFile::open(File::open(path)?).map_err(|e| Error::Container(e.to_string()))?;
let mut this = Self {
cfb,
header: FileHeader {
version: Version {
major: 0,
minor: 0,
build: 0,
revision: 0,
},
compressed: false,
encrypted: false,
distributed: false,
},
aes_key: None,
};
this.header = this.read_file_header()?;
if this.header.encrypted {
return Err(Error::Container(
"password-encrypted HWP not supported".into(),
));
}
Ok(this)
}
fn read_file_header(&mut self) -> Result<FileHeader, Error> {
let mut s = self
.cfb
.open_stream("/FileHeader")
.map_err(|_| Error::MissingStream("FileHeader".into()))?;
let bytes = read_all(&mut s)?;
FileHeader::parse(&bytes)
}
pub fn stream_size(&mut self, stream_name: &str) -> Option<u64> {
use std::io::{Seek, SeekFrom};
if !self.cfb.exists(stream_name) {
return None;
}
let mut s = self.cfb.open_stream(stream_name).ok()?;
s.seek(SeekFrom::End(0)).ok()
}
pub fn section_names(&self) -> Vec<String> {
let storage = if self.header.distributed {
"/ViewText"
} else {
"/BodyText"
};
let mut out: Vec<(u32, String)> = self
.cfb
.read_storage(storage)
.into_iter()
.flatten()
.filter_map(|entry| {
let name = entry.name().to_string();
let idx: u32 = name.strip_prefix("Section")?.parse().ok()?;
Some((idx, name))
})
.collect();
out.sort_by_key(|(i, _)| *i);
out.into_iter().map(|(_, n)| n).collect()
}
pub fn read_raw_stream(&mut self, path: &str) -> Result<Vec<u8>, Error> {
let mut s = self
.cfb
.open_stream(path)
.map_err(|_| Error::MissingStream(path.into()))?;
let raw = read_all(&mut s)?;
let decompress =
self.header.compressed && (path == "/DocInfo" || path.starts_with("/BodyText/"));
if decompress {
let mut dec = DeflateDecoder::new(&raw[..]);
let mut out = Vec::new();
dec.read_to_end(&mut out).map_err(|e| Error::Decompress {
stream: path.into(),
source: e,
})?;
Ok(out)
} else {
Ok(raw)
}
}
pub fn read_section(&mut self, name: &str) -> Result<Vec<u8>, Error> {
if self.header.distributed {
self.read_distributed_section(name)
} else {
self.read_raw_stream(&format!("/BodyText/{}", name))
}
}
fn read_distributed_section(&mut self, name: &str) -> Result<Vec<u8>, Error> {
let stream_path = format!("/ViewText/{}", name);
let mut s = self
.cfb
.open_stream(&stream_path)
.map_err(|_| Error::MissingStream(stream_path.clone()))?;
let raw = read_all(&mut s)?;
if raw.len() < 260 {
return Err(Error::Container(format!(
"distributed {} too short: {} bytes (need >= 260)",
stream_path,
raw.len()
)));
}
let word = u32::from_le_bytes(raw[0..4].try_into().unwrap());
let tag_id = (word & 0x3FF) as u16;
if tag_id != crate::record::HWPTAG_DISTRIBUTE_DOC_DATA {
return Err(Error::Container(format!(
"distributed {} missing DISTRIBUTE_DOC_DATA header (tag_id=0x{:03X})",
stream_path, tag_id
)));
}
let payload = &raw[4..260];
let key = crate::dist::derive_aes_key(payload)?;
let encrypted_body = &raw[260..];
let decrypted = crate::dist::aes128_ecb_decrypt(&key, encrypted_body);
if self.header.compressed {
let mut dec = DeflateDecoder::new(&decrypted[..]);
let mut out = Vec::new();
dec.read_to_end(&mut out).map_err(|e| Error::Decompress {
stream: stream_path,
source: e,
})?;
Ok(out)
} else {
Ok(decrypted)
}
}
}
#[cfg(test)]
mod open_tests {
use super::*;
#[test]
fn open_missing_file_returns_notfound() {
let res = Container::open(Path::new("/tmp/does-not-exist-hwp.hwp"));
assert!(matches!(res, Err(Error::NotFound(_))));
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_header(compressed: bool) -> Vec<u8> {
let mut buf = vec![0u8; FILE_HEADER_LEN];
buf[..HWP_SIGNATURE.len()].copy_from_slice(HWP_SIGNATURE);
buf[32..36].copy_from_slice(&[0, 3, 0, 5]);
let props: u32 = if compressed { 1 } else { 0 };
buf[36..40].copy_from_slice(&props.to_le_bytes());
buf
}
#[test]
fn parses_valid_header_compressed() {
let hdr = FileHeader::parse(&make_header(true)).unwrap();
assert_eq!(
hdr.version,
Version {
major: 5,
minor: 0,
build: 3,
revision: 0
}
);
assert!(hdr.compressed);
assert!(!hdr.encrypted);
}
#[test]
fn rejects_bad_signature() {
let mut buf = make_header(true);
buf[0] = b'X';
let err = FileHeader::parse(&buf).unwrap_err();
assert!(matches!(err, Error::InvalidHeader(_)));
}
#[test]
fn rejects_short_input() {
let buf = vec![0u8; 10];
assert!(FileHeader::parse(&buf).is_err());
}
}