use std::collections::HashMap;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom, Cursor};
use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
use salsa20::Salsa20;
use salsa20::cipher::{KeyIvInit, StreamCipher};
use ripemd::{Ripemd128, Digest};
use adler::Adler32;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("Invalid file format: {0}")]
InvalidFormat(String),
#[error("Unsupported compression type")]
UnsupportedCompression,
#[error("Encryption error: {0}")]
Encryption(String),
#[error("Invalid passcode")]
InvalidPasscode,
#[error("Checksum mismatch")]
ChecksumMismatch,
#[error("Encoding error: {0}")]
Encoding(String),
#[error("Parse error: {0}")]
Parse(String),
}
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Clone)]
pub struct Passcode {
pub regcode: Vec<u8>,
pub userid: String,
}
fn unescape_entities(text: &[u8]) -> Vec<u8> {
let mut result = text.to_vec();
let replacements = [
(b"<".as_slice(), b"<".as_slice()),
(b">", b">"),
(b"&", b"&"),
(b""", b"\""),
];
for (entity, replacement) in &replacements {
let mut pos = 0;
while let Some(found) = result[pos..].windows(entity.len()).position(|w| w == *entity) {
let actual_pos = pos + found;
result.splice(actual_pos..actual_pos + entity.len(), replacement.iter().cloned());
pos = actual_pos + replacement.len();
}
}
result
}
fn fast_decrypt(data: &[u8], key: &[u8]) -> Vec<u8> {
if key.is_empty() {
return data.to_vec();
}
data.iter()
.enumerate()
.map(|(i, &byte)| byte ^ key[i % key.len()])
.collect()
}
fn mdx_decrypt(comp_block: &[u8]) -> Result<Vec<u8>> {
let key = b"\x95\x36\x00\x00";
Ok(fast_decrypt(comp_block, key))
}
fn salsa_decrypt(ciphertext: &[u8], key: &[u8]) -> Result<Vec<u8>> {
if key.len() != 32 {
return Err(Error::Encryption("Salsa20 key must be 32 bytes".to_string()));
}
let mut cipher = Salsa20::new_from_slices(key, &[0u8; 8])
.map_err(|e| Error::Encryption(format!("Salsa20 init error: {}", e)))?;
let mut result = ciphertext.to_vec();
cipher.apply_keystream(&mut result);
Ok(result)
}
fn decrypt_regcode_by_deviceid(regcode: &[u8], deviceid: &[u8]) -> Result<Vec<u8>> {
let mut hasher = Ripemd128::new();
hasher.update(deviceid);
hasher.update(regcode);
let digest = hasher.finalize();
let mut key = [0u8; 32];
key[..16].copy_from_slice(&digest);
key[16..].copy_from_slice(&digest);
salsa_decrypt(regcode, &key)
}
fn decrypt_regcode_by_email(regcode: &[u8], email: &[u8]) -> Result<Vec<u8>> {
let mut hasher = Ripemd128::new();
hasher.update(email);
hasher.update(regcode);
let digest = hasher.finalize();
let mut key = [0u8; 32];
key[..16].copy_from_slice(&digest);
key[16..].copy_from_slice(&digest);
salsa_decrypt(regcode, &key)
}
#[derive(Debug)]
pub struct MDict {
fname: String,
encoding: String,
passcode: Option<Passcode>,
header: HashMap<String, String>,
key_list: Vec<(u64, Vec<u8>)>,
num_entries: usize,
version: f32,
encrypt: u8,
number_width: usize,
key_block_offset: u64,
record_block_offset: u64,
stylesheet: HashMap<String, (String, String)>,
}
impl MDict {
pub fn new(fname: &str, encoding: Option<String>, passcode: Option<Passcode>) -> Result<Self> {
let mut mdict = Self {
fname: fname.to_string(),
encoding: encoding.unwrap_or_default(),
passcode,
header: HashMap::new(),
key_list: Vec::new(),
num_entries: 0,
version: 0.0,
encrypt: 0,
number_width: 4,
key_block_offset: 0,
record_block_offset: 0,
stylesheet: HashMap::new(),
};
mdict.header = mdict.read_header()?;
mdict.key_list = match mdict.read_keys() {
Ok(keys) => keys,
Err(_) => {
println!("Try Brutal Force on Encrypted Key Blocks");
mdict.read_keys_brutal()?
}
};
Ok(mdict)
}
pub fn len(&self) -> usize {
self.num_entries
}
pub fn is_empty(&self) -> bool {
self.num_entries == 0
}
pub fn keys(&self) -> impl Iterator<Item = &[u8]> {
self.key_list.iter().map(|(_, key)| key.as_slice())
}
pub fn header(&self) -> &HashMap<String, String> {
&self.header
}
fn read_number<R: Read>(&self, reader: &mut R) -> Result<u64> {
if self.number_width == 4 {
Ok(reader.read_u32::<BigEndian>()? as u64)
} else {
Ok(reader.read_u64::<BigEndian>()?)
}
}
fn parse_header(header: &[u8]) -> Result<HashMap<String, String>> {
let mut tagdict = HashMap::new();
let header_str = String::from_utf8_lossy(header);
let mut chars = header_str.chars().peekable();
while let Some(ch) = chars.next() {
if ch.is_alphabetic() {
let mut key = String::new();
key.push(ch);
while let Some(&next_ch) = chars.peek() {
if next_ch.is_alphanumeric() || next_ch == '_' {
key.push(chars.next().unwrap());
} else {
break;
}
}
while let Some(&next_ch) = chars.peek() {
if next_ch.is_whitespace() || next_ch == '=' {
chars.next();
} else {
break;
}
}
if let Some('"') = chars.next() {
let mut value = String::new();
while let Some(ch) = chars.next() {
if ch == '"' {
break;
}
value.push(ch);
}
let value_bytes = unescape_entities(value.as_bytes());
let final_value = String::from_utf8_lossy(&value_bytes).to_string();
tagdict.insert(key, final_value);
}
}
}
Ok(tagdict)
}
fn read_header(&mut self) -> Result<HashMap<String, String>> {
let mut f = File::open(&self.fname)?;
let header_bytes_size = f.read_u32::<BigEndian>()? as usize;
let mut header_bytes = vec![0u8; header_bytes_size];
f.read_exact(&mut header_bytes)?;
let adler32_expected = f.read_u32::<LittleEndian>()?;
let mut hasher = Adler32::new();
hasher.write_slice(&header_bytes);
let adler32_actual = hasher.checksum();
if adler32_expected != adler32_actual {
return Err(Error::ChecksumMismatch);
}
self.key_block_offset = f.stream_position()?;
let header_text = if header_bytes.len() >= 2 {
let text_bytes = &header_bytes[..header_bytes.len() - 2];
let decoded = if text_bytes.len() % 2 == 0 {
let utf16: Vec<u16> = text_bytes
.chunks_exact(2)
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
.collect();
String::from_utf16(&utf16).unwrap_or_else(|_| String::from_utf8_lossy(text_bytes).into_owned())
} else {
String::from_utf8_lossy(text_bytes).into_owned()
};
decoded.into_bytes()
} else {
return Err(Error::InvalidFormat("Header too short".to_string()));
};
let header_tag = Self::parse_header(&header_text)?;
if self.encoding.is_empty() {
if let Some(encoding) = header_tag.get("Encoding") {
let mut enc = encoding.clone();
if enc == "GBK" || enc == "GB2312" {
enc = "GB18030".to_string();
}
self.encoding = enc;
}
}
self.encrypt = if let Some(encrypted) = header_tag.get("Encrypted") {
match encrypted.as_str() {
"No" => 0,
"Yes" => 1,
_ => encrypted.parse().unwrap_or(0),
}
} else {
0
};
if let Some(stylesheet_str) = header_tag.get("StyleSheet") {
let lines: Vec<&str> = stylesheet_str.lines().collect();
for chunk in lines.chunks(3) {
if chunk.len() == 3 {
self.stylesheet.insert(
chunk[0].to_string(),
(chunk[1].to_string(), chunk[2].to_string()),
);
}
}
}
if let Some(version_str) = header_tag.get("RequiredEngineVersion") {
self.version = version_str.parse().unwrap_or(1.0);
if self.version < 2.0 {
self.number_width = 4;
} else {
self.number_width = 8;
}
} else if let Some(version_str) = header_tag.get("GeneratedByEngineVersion") {
self.version = version_str.parse().unwrap_or(1.0);
if self.version < 2.0 {
self.number_width = 4;
} else {
self.number_width = 8;
}
} else {
self.version = 1.0;
self.number_width = 4;
}
Ok(header_tag)
}
fn read_keys(&mut self) -> Result<Vec<(u64, Vec<u8>)>> {
let mut f = File::open(&self.fname)?;
f.seek(SeekFrom::Start(self.key_block_offset))?;
let num_bytes = if self.version >= 2.0 {
8 * 5 } else {
4 * 4 };
let mut block = vec![0u8; num_bytes];
f.read_exact(&mut block)?;
if self.encrypt & 1 != 0 {
if let Some(ref passcode) = self.passcode {
let userid = passcode.userid.as_bytes();
let regcode = &passcode.regcode;
let encrypted_key = if let Some(register_by) = self.header.get("RegisterBy") {
if register_by == "EMail" {
decrypt_regcode_by_email(regcode, userid)?
} else {
decrypt_regcode_by_deviceid(regcode, userid)?
}
} else {
decrypt_regcode_by_deviceid(regcode, userid)?
};
block = salsa_decrypt(&block, &encrypted_key)?;
} else {
return Err(Error::InvalidPasscode);
}
}
let mut cursor = Cursor::new(&block);
let num_key_blocks = self.read_number(&mut cursor)?;
self.num_entries = self.read_number(&mut cursor)? as usize;
let _key_block_info_decomp_size = if self.version >= 2.0 {
self.read_number(&mut cursor)?
} else {
0
};
let key_block_info_size = self.read_number(&mut cursor)? as usize;
let key_block_size = self.read_number(&mut cursor)? as usize;
if self.version >= 2.0 {
let adler32_expected = f.read_u32::<BigEndian>()?;
let mut hasher = Adler32::new();
hasher.write_slice(&block);
let adler32_actual = hasher.checksum();
if adler32_expected != adler32_actual {
return Err(Error::ChecksumMismatch);
}
}
let mut key_block_info = vec![0u8; key_block_info_size];
f.read_exact(&mut key_block_info)?;
let key_block_info_list = self.decode_key_block_info(&key_block_info)?;
if num_key_blocks as usize != key_block_info_list.len() {
return Err(Error::InvalidFormat("Key block count mismatch".to_string()));
}
let mut key_block_compressed = vec![0u8; key_block_size];
f.read_exact(&mut key_block_compressed)?;
let key_list = self.decode_key_block(&key_block_compressed, &key_block_info_list)?;
self.record_block_offset = f.stream_position()?;
Ok(key_list)
}
fn read_keys_brutal(&mut self) -> Result<Vec<(u64, Vec<u8>)>> {
let mut f = File::open(&self.fname)?;
f.seek(SeekFrom::Start(self.key_block_offset))?;
let (num_bytes, key_block_type) = if self.version >= 2.0 {
(8 * 5 + 4, b"\x02\x00\x00\x00".to_vec())
} else {
(4 * 4, b"\x01\x00\x00\x00".to_vec())
};
let mut _block = vec![0u8; num_bytes];
f.read_exact(&mut _block)?;
let mut key_block_info = vec![0u8; 8];
f.read_exact(&mut key_block_info)?;
if self.version >= 2.0 {
if &key_block_info[..4] != b"\x02\x00\x00\x00" {
return Err(Error::InvalidFormat("Invalid key block marker".to_string()));
}
}
loop {
let fpos = f.stream_position()?;
let mut buffer = vec![0u8; 1024];
let bytes_read = f.read(&mut buffer)?;
if bytes_read == 0 {
return Err(Error::InvalidFormat("Key block type marker not found".to_string()));
}
buffer.truncate(bytes_read);
if let Some(index) = buffer.windows(key_block_type.len())
.position(|window| window == key_block_type) {
key_block_info.extend_from_slice(&buffer[..index]);
f.seek(SeekFrom::Start(fpos + index as u64))?;
break;
} else {
key_block_info.extend_from_slice(&buffer);
}
}
let key_block_info_list = self.decode_key_block_info(&key_block_info)?;
let key_block_size: usize = key_block_info_list.iter().map(|(size, _)| *size as usize).sum();
let mut key_block_compressed = vec![0u8; key_block_size];
f.read_exact(&mut key_block_compressed)?;
let key_list = self.decode_key_block(&key_block_compressed, &key_block_info_list)?;
self.record_block_offset = f.stream_position()?;
Ok(key_list)
}
fn decode_key_block_info(&self, key_block_info_compressed: &[u8]) -> Result<Vec<(u64, u64)>> {
let key_block_info = if self.version >= 2.0 {
if key_block_info_compressed.len() < 4 || &key_block_info_compressed[0..4] != b"\x02\x00\x00\x00" {
return Err(Error::Parse("Invalid key block info header".to_string()));
}
let mut data = key_block_info_compressed;
let decrypted_data;
if self.encrypt & 0x02 != 0 {
decrypted_data = mdx_decrypt(data)?;
data = &decrypted_data;
}
let mut decoder = flate2::read::ZlibDecoder::new(&data[8..]);
let mut key_block_info = Vec::new();
std::io::Read::read_to_end(&mut decoder, &mut key_block_info)
.map_err(|e| Error::Io(e))?;
let adler32_expected = u32::from_be_bytes([
data[4], data[5], data[6], data[7]
]);
let mut hasher = Adler32::new();
hasher.write_slice(&key_block_info);
let adler32_actual = hasher.checksum();
if adler32_expected != adler32_actual {
return Err(Error::Parse("Adler32 checksum mismatch".to_string()));
}
key_block_info
} else {
key_block_info_compressed.to_vec()
};
let mut key_block_info_list = Vec::new();
let mut _num_entries = 0u64;
let mut i = 0;
let (byte_format_width, text_term) = if self.version >= 2.0 {
(2, 1) } else {
(1, 0) };
while i < key_block_info.len() {
if i + self.number_width > key_block_info.len() {
break;
}
let entries = self.read_number_from_bytes(&key_block_info[i..i + self.number_width])?;
_num_entries += entries;
i += self.number_width;
if i + byte_format_width > key_block_info.len() {
break;
}
let text_head_size = if byte_format_width == 2 {
u16::from_be_bytes([key_block_info[i], key_block_info[i + 1]]) as usize
} else {
key_block_info[i] as usize
};
i += byte_format_width;
let head_skip = if self.encoding != "UTF-16" {
text_head_size + text_term
} else {
(text_head_size + text_term) * 2
};
i += head_skip;
if i + byte_format_width > key_block_info.len() {
break;
}
let text_tail_size = if byte_format_width == 2 {
u16::from_be_bytes([key_block_info[i], key_block_info[i + 1]]) as usize
} else {
key_block_info[i] as usize
};
i += byte_format_width;
let tail_skip = if self.encoding != "UTF-16" {
text_tail_size + text_term
} else {
(text_tail_size + text_term) * 2
};
i += tail_skip;
if i + 2 * self.number_width > key_block_info.len() {
break;
}
let key_block_compressed_size = self.read_number_from_bytes(&key_block_info[i..i + self.number_width])?;
i += self.number_width;
let key_block_decompressed_size = self.read_number_from_bytes(&key_block_info[i..i + self.number_width])?;
i += self.number_width;
key_block_info_list.push((key_block_compressed_size, key_block_decompressed_size));
}
Ok(key_block_info_list)
}
fn decode_key_block(&self, key_block_compressed: &[u8], key_block_info_list: &[(u64, u64)]) -> Result<Vec<(u64, Vec<u8>)>> {
let mut key_list = Vec::new();
let mut i = 0;
for &(compressed_size, _decompressed_size) in key_block_info_list {
let start = i;
let end = i + compressed_size as usize;
if end > key_block_compressed.len() {
break;
}
let key_block_type = &key_block_compressed[start..start + 4];
let adler32_expected = u32::from_be_bytes([
key_block_compressed[start + 4],
key_block_compressed[start + 5],
key_block_compressed[start + 6],
key_block_compressed[start + 7],
]);
let key_block = if key_block_type == b"\x00\x00\x00\x00" {
key_block_compressed[start + 8..end].to_vec()
} else if key_block_type == b"\x01\x00\x00\x00" {
return Err(Error::UnsupportedCompression);
} else if key_block_type == b"\x02\x00\x00\x00" {
let mut decoder = flate2::read::ZlibDecoder::new(&key_block_compressed[start + 8..end]);
let mut decompressed = Vec::new();
std::io::Read::read_to_end(&mut decoder, &mut decompressed)
.map_err(|e| Error::Io(e))?;
decompressed
} else {
return Err(Error::Parse("Unknown compression type".to_string()));
};
let mut block_keys = self.split_key_block(&key_block)?;
key_list.append(&mut block_keys);
let mut hasher = Adler32::new();
hasher.write_slice(&key_block);
let adler32_actual = hasher.checksum();
if adler32_expected != adler32_actual {
return Err(Error::Parse("Key block adler32 checksum mismatch".to_string()));
}
i += compressed_size as usize;
}
Ok(key_list)
}
fn read_number_from_bytes(&self, bytes: &[u8]) -> Result<u64> {
if self.number_width == 4 {
if bytes.len() < 4 {
return Err(Error::Parse("Not enough bytes for u32".to_string()));
}
Ok(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as u64)
} else {
if bytes.len() < 8 {
return Err(Error::Parse("Not enough bytes for u64".to_string()));
}
Ok(u64::from_be_bytes([
bytes[0], bytes[1], bytes[2], bytes[3],
bytes[4], bytes[5], bytes[6], bytes[7],
]))
}
}
fn split_key_block(&self, key_block: &[u8]) -> Result<Vec<(u64, Vec<u8>)>> {
let mut key_list = Vec::new();
let mut key_start_index = 0;
while key_start_index < key_block.len() {
if key_start_index + self.number_width > key_block.len() {
break;
}
let key_id = self.read_number_from_bytes(&key_block[key_start_index..key_start_index + self.number_width])?;
let (delimiter, width) = if self.encoding == "UTF-16" {
(b"\x00\x00".as_slice(), 2)
} else {
(b"\x00".as_slice(), 1)
};
let mut i = key_start_index + self.number_width;
let mut key_end_index = key_block.len();
while i + width <= key_block.len() {
if &key_block[i..i + width] == delimiter {
key_end_index = i;
break;
}
i += width;
}
if key_end_index == key_block.len() {
key_end_index = key_block.len();
}
let key_bytes = &key_block[key_start_index + self.number_width..key_end_index];
let key_text = if self.encoding == "UTF-16" {
let (decoded, _, _) = encoding_rs::UTF_16LE.decode(key_bytes);
decoded.trim().as_bytes().to_vec()
} else {
match self.encoding.as_str() {
"UTF-8" => key_bytes.to_vec(),
"GB18030" | "GBK" | "GB2312" => {
let (decoded, _, _) = encoding_rs::GB18030.decode(key_bytes);
decoded.trim().as_bytes().to_vec()
}
_ => {
let (decoded, _, _) = encoding_rs::UTF_8.decode(key_bytes);
decoded.trim().as_bytes().to_vec()
}
}
};
key_start_index = key_end_index + width;
key_list.push((key_id, key_text));
}
Ok(key_list)
}
}
#[derive(Debug)]
pub struct Mdx {
mdict: MDict,
substyle: bool,
}
impl Mdx {
pub fn new(fname: &str, encoding: Option<String>, substyle: bool, passcode: Option<Passcode>) -> Result<Self> {
let mdict = MDict::new(fname, encoding, passcode)?;
Ok(Self { mdict, substyle })
}
pub fn items(&self) -> Result<Vec<(Vec<u8>, Vec<u8>)>> {
self.decode_record_block()
}
fn decode_record_block(&self) -> Result<Vec<(Vec<u8>, Vec<u8>)>> {
let mut file = std::fs::File::open(&self.mdict.fname)?;
file.seek(std::io::SeekFrom::Start(self.mdict.record_block_offset))?;
let num_record_blocks = self.mdict.read_number(&mut file)?;
let num_entries = self.mdict.read_number(&mut file)?;
let actual_entries = self.mdict.key_list.len() as u64;
if num_entries != actual_entries {
eprintln!("Warning: Declared entries ({}) != actual keys ({}) - using actual key count",
num_entries, actual_entries);
}
let record_block_info_size = self.mdict.read_number(&mut file)?;
let record_block_size = self.mdict.read_number(&mut file)?;
let mut record_block_info_list = Vec::new();
let mut size_counter = 0;
for _ in 0..num_record_blocks {
let compressed_size = self.mdict.read_number(&mut file)?;
let decompressed_size = self.mdict.read_number(&mut file)?;
record_block_info_list.push((compressed_size, decompressed_size));
size_counter += self.mdict.number_width * 2;
}
if size_counter != record_block_info_size as usize {
return Err(Error::Parse("Record block info size mismatch".to_string()));
}
let mut result = Vec::new();
let mut offset = 0u64;
let mut i = 0;
let mut size_counter = 0u64;
for (compressed_size, decompressed_size) in record_block_info_list {
let mut record_block_compressed = vec![0u8; compressed_size as usize];
std::io::Read::read_exact(&mut file, &mut record_block_compressed)?;
let record_block_type = &record_block_compressed[0..4];
let adler32_expected = u32::from_be_bytes([
record_block_compressed[4],
record_block_compressed[5],
record_block_compressed[6],
record_block_compressed[7],
]);
let record_block = if record_block_type == b"\x00\x00\x00\x00" {
record_block_compressed[8..].to_vec()
} else if record_block_type == b"\x01\x00\x00\x00" {
return Err(Error::UnsupportedCompression);
} else if record_block_type == b"\x02\x00\x00\x00" {
let mut decoder = flate2::read::ZlibDecoder::new(&record_block_compressed[8..]);
let mut decompressed = Vec::new();
std::io::Read::read_to_end(&mut decoder, &mut decompressed)?;
decompressed
} else {
return Err(Error::Parse("Unknown compression type".to_string()));
};
let mut hasher = Adler32::new();
hasher.write_slice(&record_block);
let adler32_actual = hasher.checksum();
if adler32_expected != adler32_actual {
return Err(Error::Parse("Record block adler32 checksum mismatch".to_string()));
}
if record_block.len() != decompressed_size as usize {
return Err(Error::Parse("Decompressed size mismatch".to_string()));
}
while i < self.mdict.key_list.len() {
let (record_start, key_text) = &self.mdict.key_list[i];
if *record_start >= offset + record_block.len() as u64 {
break;
}
let record_end = if i < self.mdict.key_list.len() - 1 {
self.mdict.key_list[i + 1].0
} else {
record_block.len() as u64 + offset
};
i += 1;
let start_idx = (*record_start - offset) as usize;
let end_idx = (record_end - offset) as usize;
if start_idx < record_block.len() && end_idx <= record_block.len() {
let record = record_block[start_idx..end_idx].to_vec();
let record_text = if self.mdict.encoding == "UTF-16" {
let (decoded, _, _) = encoding_rs::UTF_16LE.decode(&record);
decoded.trim_end_matches('\0').trim().as_bytes().to_vec()
} else {
match self.mdict.encoding.as_str() {
"UTF-8" => {
let text = String::from_utf8_lossy(&record);
text.trim_end_matches('\0').trim().as_bytes().to_vec()
}
"GB18030" | "GBK" | "GB2312" => {
let (decoded, _, _) = encoding_rs::GB18030.decode(&record);
decoded.trim_end_matches('\0').trim().as_bytes().to_vec()
}
_ => {
let (decoded, _, _) = encoding_rs::UTF_8.decode(&record);
decoded.trim_end_matches('\0').trim().as_bytes().to_vec()
}
}
};
let final_record = if self.substyle && !self.mdict.stylesheet.is_empty() {
self.substitute_stylesheet(&record_text)?
} else {
record_text
};
result.push((key_text.clone(), final_record));
}
}
offset += record_block.len() as u64;
size_counter += compressed_size;
}
if size_counter != record_block_size {
return Err(Error::Parse("Record block size mismatch".to_string()));
}
Ok(result)
}
fn substitute_stylesheet(&self, txt: &[u8]) -> Result<Vec<u8>> {
let text = String::from_utf8_lossy(txt);
let re_split = regex::Regex::new(r"`\d+`").map_err(|e| Error::Parse(format!("Regex error: {}", e)))?;
let re_find = regex::Regex::new(r"`\d+`").map_err(|e| Error::Parse(format!("Regex error: {}", e)))?;
let txt_list: Vec<&str> = re_split.split(&text).collect();
let txt_tags: Vec<&str> = re_find.find_iter(&text).map(|m| m.as_str()).collect();
let mut txt_styled = txt_list[0].to_string();
for (j, p) in txt_list[1..].iter().enumerate() {
if j < txt_tags.len() {
let tag_num = &txt_tags[j][1..txt_tags[j].len()-1]; if let Some((style_begin, style_end)) = self.mdict.stylesheet.get(tag_num) {
if p.ends_with('\n') {
txt_styled.push_str(style_begin);
txt_styled.push_str(p.trim_end());
txt_styled.push_str(style_end);
txt_styled.push_str("\r\n");
} else {
txt_styled.push_str(style_begin);
txt_styled.push_str(p);
txt_styled.push_str(style_end);
}
} else {
txt_styled.push_str(p);
}
} else {
txt_styled.push_str(p);
}
}
Ok(txt_styled.into_bytes())
}
pub fn len(&self) -> usize {
self.mdict.len()
}
pub fn is_empty(&self) -> bool {
self.mdict.is_empty()
}
pub fn keys(&self) -> impl Iterator<Item = &[u8]> {
self.mdict.keys()
}
pub fn header(&self) -> &HashMap<String, String> {
self.mdict.header()
}
}
#[derive(Debug)]
pub struct Mdd {
mdict: MDict,
}
impl Mdd {
pub fn new(fname: &str, passcode: Option<Passcode>) -> Result<Self> {
let mdict = MDict::new(fname, Some("UTF-16".to_string()), passcode)?;
Ok(Self { mdict })
}
pub fn items(&self) -> Result<Vec<(Vec<u8>, Vec<u8>)>> {
self.decode_record_block()
}
fn decode_record_block(&self) -> Result<Vec<(Vec<u8>, Vec<u8>)>> {
let mut file = std::fs::File::open(&self.mdict.fname)?;
file.seek(std::io::SeekFrom::Start(self.mdict.record_block_offset))?;
let num_record_blocks = self.mdict.read_number(&mut file)?;
let num_entries = self.mdict.read_number(&mut file)?;
if num_entries != self.mdict.num_entries as u64 {
return Err(Error::Parse("Number of entries mismatch".to_string()));
}
let record_block_info_size = self.mdict.read_number(&mut file)?;
let record_block_size = self.mdict.read_number(&mut file)?;
let mut record_block_info_list = Vec::new();
let mut size_counter = 0;
for _ in 0..num_record_blocks {
let compressed_size = self.mdict.read_number(&mut file)?;
let decompressed_size = self.mdict.read_number(&mut file)?;
record_block_info_list.push((compressed_size, decompressed_size));
size_counter += self.mdict.number_width * 2;
}
if size_counter != record_block_info_size as usize {
return Err(Error::Parse("Record block info size mismatch".to_string()));
}
let mut result = Vec::new();
let mut offset = 0u64;
let mut i = 0;
let mut size_counter = 0u64;
for (compressed_size, decompressed_size) in record_block_info_list {
let mut record_block_compressed = vec![0u8; compressed_size as usize];
std::io::Read::read_exact(&mut file, &mut record_block_compressed)?;
let record_block_type = &record_block_compressed[0..4];
let adler32_expected = u32::from_be_bytes([
record_block_compressed[4],
record_block_compressed[5],
record_block_compressed[6],
record_block_compressed[7],
]);
let record_block = if record_block_type == b"\x00\x00\x00\x00" {
record_block_compressed[8..].to_vec()
} else if record_block_type == b"\x01\x00\x00\x00" {
return Err(Error::UnsupportedCompression);
} else if record_block_type == b"\x02\x00\x00\x00" {
let mut decoder = flate2::read::ZlibDecoder::new(&record_block_compressed[8..]);
let mut decompressed = Vec::new();
std::io::Read::read_to_end(&mut decoder, &mut decompressed)?;
decompressed
} else {
return Err(Error::Parse("Unknown compression type".to_string()));
};
let mut hasher = Adler32::new();
hasher.write_slice(&record_block);
let adler32_actual = hasher.checksum();
if adler32_expected != adler32_actual {
return Err(Error::Parse("Record block adler32 checksum mismatch".to_string()));
}
if record_block.len() != decompressed_size as usize {
return Err(Error::Parse("Decompressed size mismatch".to_string()));
}
while i < self.mdict.key_list.len() {
let (record_start, key_text) = &self.mdict.key_list[i];
if *record_start >= offset + record_block.len() as u64 {
break;
}
let record_end = if i < self.mdict.key_list.len() - 1 {
self.mdict.key_list[i + 1].0
} else {
record_block.len() as u64 + offset
};
i += 1;
let start_idx = (*record_start - offset) as usize;
let end_idx = (record_end - offset) as usize;
if start_idx < record_block.len() && end_idx <= record_block.len() {
let data = record_block[start_idx..end_idx].to_vec();
result.push((key_text.clone(), data));
}
}
offset += record_block.len() as u64;
size_counter += compressed_size;
}
if size_counter != record_block_size {
return Err(Error::Parse("Record block size mismatch".to_string()));
}
Ok(result)
}
pub fn len(&self) -> usize {
self.mdict.len()
}
pub fn is_empty(&self) -> bool {
self.mdict.is_empty()
}
pub fn keys(&self) -> impl Iterator<Item = &[u8]> {
self.mdict.keys()
}
pub fn header(&self) -> &HashMap<String, String> {
self.mdict.header()
}
}