mod body;
mod header;
pub use body::BodyParser;
pub use header::{Hwp3Header, Hwp3Version};
use crate::error::Result;
use crate::model::Document;
use encoding_rs::EUC_KR;
use std::io::{Read, Seek, SeekFrom};
use std::path::Path;
pub struct Hwp3Parser<R> {
reader: R,
header: Hwp3Header,
}
impl Hwp3Parser<std::io::BufReader<std::fs::File>> {
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
let file = std::fs::File::open(path)?;
let reader = std::io::BufReader::new(file);
Self::from_reader(reader)
}
}
impl<R: Read + Seek> Hwp3Parser<R> {
pub fn from_reader(mut reader: R) -> Result<Self> {
reader.seek(SeekFrom::Start(0))?;
let header = header::parse_header(&mut reader)?;
Ok(Self { reader, header })
}
pub fn parse(&mut self) -> Result<Document> {
let mut document = Document::new();
let body_parser = BodyParser::new(&self.header);
body_parser.parse(&mut self.reader, &mut document)?;
Ok(document)
}
pub fn header(&self) -> &Hwp3Header {
&self.header
}
}
pub fn decode_euckr(data: &[u8]) -> String {
let (decoded, _, _) = EUC_KR.decode(data);
decoded.into_owned()
}
pub fn decode_euckr_cstr(data: &[u8]) -> String {
let end = data.iter().position(|&b| b == 0).unwrap_or(data.len());
decode_euckr(&data[..end])
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_decode_euckr() {
let data = [0xC7, 0xD1, 0xB1, 0xDB];
let decoded = decode_euckr(&data);
assert_eq!(decoded, "한글");
}
#[test]
fn test_decode_euckr_cstr() {
let data = [0xC7, 0xD1, 0xB1, 0xDB, 0x00, 0xFF, 0xFF];
let decoded = decode_euckr_cstr(&data);
assert_eq!(decoded, "한글");
}
#[test]
fn test_decode_ascii() {
let data = b"Hello World";
let decoded = decode_euckr(data);
assert_eq!(decoded, "Hello World");
}
}