use crate::error::Error;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MdictVersion {
V2,
V3,
}
#[derive(Debug)]
pub struct MdictHeader {
pub version: MdictVersion,
pub encoding: String,
pub format: String,
pub title: String,
pub description: String,
pub encrypted: u8,
pub key_case_sensitive: bool,
pub keyword_sect_start: usize,
pub uuid: Option<Vec<u8>>,
}
pub fn parse_header(data: &[u8]) -> crate::Result<MdictHeader> {
if data.len() < 8 {
return Err(Error::InvalidFormat("file too small".into()));
}
let header_len = u32::from_be_bytes([data[0], data[1], data[2], data[3]]) as usize;
if data.len() < 4 + header_len + 4 {
return Err(Error::InvalidFormat("file truncated in header".into()));
}
let header_bytes = &data[4..4 + header_len];
let header_str = decode_utf16le(header_bytes)?;
let keyword_sect_start = 4 + header_len + 4;
let mut version_raw = 2.0f32;
let mut encoding = "UTF-8".to_string();
let mut format = "Html".to_string();
let mut title = String::new();
let mut description = String::new();
let mut encrypted = 0u8;
let mut key_case_sensitive = false;
let mut uuid: Option<Vec<u8>> = None;
for (key, val) in parse_xml_attrs(&header_str) {
match key.as_str() {
"GeneratedByEngineVersion" => {
version_raw = val.parse().map_err(|e| {
Error::InvalidFormat(format!(
"invalid engine version '{}': {}", val, e
))
})?;
}
"Encoding" => encoding = val,
"Format" => format = val,
"Title" => title = val,
"Description" => description = val,
"Encrypted" => {
encrypted = val.parse().map_err(|e| {
Error::InvalidFormat(format!(
"invalid encrypted field '{}': {}", val, e
))
})?;
}
"KeyCaseSensitive" => {
key_case_sensitive = val.eq_ignore_ascii_case("yes");
}
"UUID" => uuid = Some(val.into_bytes()),
_ => {}
}
}
let version = if version_raw >= 3.0 {
MdictVersion::V3
} else {
MdictVersion::V2
};
Ok(MdictHeader {
version,
encoding,
format,
title,
description,
encrypted,
key_case_sensitive,
keyword_sect_start,
uuid,
})
}
pub(crate) fn decode_utf16le(data: &[u8]) -> crate::Result<String> {
if data.len() % 2 != 0 {
return Err(Error::InvalidFormat("odd byte count for UTF-16LE".into()));
}
let u16s: Vec<u16> = data
.chunks_exact(2)
.map(|c| u16::from_le_bytes([c[0], c[1]]))
.collect();
String::from_utf16(&u16s)
.map_err(|e| Error::InvalidFormat(format!("invalid UTF-16LE: {}", e)))
}
pub(crate) fn parse_xml_attrs(xml: &str) -> Vec<(String, String)> {
let mut attrs = Vec::new();
let mut remaining = xml;
while let Some(eq_pos) = remaining.find('=') {
let before_eq = &remaining[..eq_pos];
let key = before_eq
.rsplit(|c: char| c.is_whitespace() || c == '<' || c == '/')
.next()
.unwrap_or("")
.trim()
.to_string();
remaining = &remaining[eq_pos + 1..];
let remaining_trimmed = remaining.trim_start();
if let Some(quote) = remaining_trimmed.chars().next() {
if quote == '"' || quote == '\'' {
let after_open = &remaining_trimmed[1..];
if let Some(close) = after_open.find(quote) {
let val = after_open[..close].to_string();
if !key.is_empty() {
attrs.push((key, val));
}
remaining = &after_open[close + 1..];
} else {
break;
}
} else {
break;
}
} else {
break;
}
}
attrs
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn decodes_ascii() {
let bytes = [0x48, 0x00, 0x69, 0x00];
assert_eq!(decode_utf16le(&bytes).unwrap(), "Hi");
}
#[test]
fn decodes_cjk() {
let bytes = [0x60, 0x4F];
assert_eq!(decode_utf16le(&bytes).unwrap(), "你");
}
#[test]
fn empty_input() {
assert_eq!(decode_utf16le(&[]).unwrap(), "");
}
#[test]
fn odd_byte_count_is_error() {
assert!(decode_utf16le(&[0x00]).is_err());
}
#[test]
fn single_attr_double_quotes() {
let attrs = parse_xml_attrs(r#"<Dict Title="Test">"#);
assert_eq!(attrs, vec![("Title".to_string(), "Test".to_string())]);
}
#[test]
fn single_attr_single_quotes() {
let attrs = parse_xml_attrs("<Dict Title='Test'>");
assert_eq!(attrs, vec![("Title".to_string(), "Test".to_string())]);
}
#[test]
fn multiple_attrs() {
let attrs = parse_xml_attrs(
r#"<Dict GeneratedByEngineVersion="2.0" Encoding="UTF-8" Format="Html">"#,
);
assert_eq!(attrs.len(), 3);
assert_eq!(attrs[0], ("GeneratedByEngineVersion".to_string(), "2.0".to_string()));
assert_eq!(attrs[1], ("Encoding".to_string(), "UTF-8".to_string()));
assert_eq!(attrs[2], ("Format".to_string(), "Html".to_string()));
}
#[test]
fn empty_value() {
let attrs = parse_xml_attrs(r#"<Dict Title="">"#);
assert_eq!(attrs, vec![("Title".to_string(), String::new())]);
}
#[test]
fn no_attrs() {
let attrs = parse_xml_attrs("<Dict>");
assert!(attrs.is_empty());
}
#[test]
fn empty_string() {
let attrs = parse_xml_attrs("");
assert!(attrs.is_empty());
}
#[test]
fn value_with_spaces() {
let attrs = parse_xml_attrs(r#"<Dict Title="My Cool Dict">"#);
assert_eq!(attrs, vec![("Title".to_string(), "My Cool Dict".to_string())]);
}
#[test]
fn too_small_is_invalid_format() {
let result = parse_header(&[0; 4]);
assert!(matches!(result, Err(crate::error::Error::InvalidFormat(_))));
}
#[test]
fn truncated_header_is_invalid_format() {
let mut data = vec![0; 20];
data[3] = 200; let result = parse_header(&data);
assert!(matches!(result, Err(crate::error::Error::InvalidFormat(_))));
}
#[test]
fn newlines_between_attrs() {
let attrs = parse_xml_attrs(
"<Dict\nTitle=\"Test\"\nEncoding=\"UTF-8\">",
);
assert_eq!(attrs.len(), 2);
assert_eq!(attrs[0].0, "Title");
assert_eq!(attrs[1].0, "Encoding");
}
}