1use std::{
2 collections::HashMap,
3 io::{self, Read},
4 str,
5};
6
7use adler32::adler32;
8
9use compress::zlib;
10use encoding::{all::UTF_16LE, label::encoding_from_whatwg_label, Encoding};
11use nom::{
12 bytes::complete::{take, take_till},
13 combinator::map,
14 multi::{count, length_data, many0},
15 number::complete::{be_u16, be_u32, be_u64, be_u8, le_u32},
16 sequence::tuple,
17 IResult, Slice,
18};
19use regex::Regex;
20use ripemd::{Digest, Ripemd128};
21use salsa20::{cipher::KeyIvInit, Salsa20};
22
23use crate::mdict::Mdx;
24
25#[derive(Debug)]
26pub(crate) struct KeyBlock {
27 pub(crate) entries: Vec<KeyEntry>,
28}
29
30#[derive(Debug)]
31pub struct KeyEntry {
32 pub offset: usize,
33 pub text: String,
34}
35
36#[derive(Debug)]
37pub struct Header {
38 version: Version,
39 encrypted: u8,
40 encoding: String,
41}
42
43#[derive(Debug)]
44struct KeyBlockHeader {
45 block_num: usize,
46 entry_num: usize,
47 decompressed_size: usize,
48 block_info_size: usize,
49 key_block_size: usize,
50}
51
52#[derive(Debug)]
53pub(crate) struct BlockEntryInfo {
54 pub(crate) compressed_size: usize,
55 pub(crate) decompressed_size: usize,
56}
57
58#[derive(Debug)]
59enum Version {
60 V1,
61 V2,
62 V3,
63}
64
65fn parse_header(input: &[u8]) -> IResult<&[u8], Header> {
66 let (input, (info, chksum)) = tuple((length_data(be_u32), le_u32))(input)?;
67
68 assert_eq!(adler32(info).unwrap(), chksum);
69
70 let info = UTF_16LE
71 .decode(info, encoding::DecoderTrap::Strict)
72 .unwrap();
73 let attrs = parse_key_value(info.as_str());
74
75 let version = attrs
76 .get("GeneratedByEngineVersion")
77 .unwrap()
78 .trim()
79 .slice(0..1)
80 .parse::<u8>()
81 .unwrap();
82
83 let version = match version {
84 1 => Version::V1,
85 2 => Version::V2,
86 3 => Version::V3,
87 _ => panic!("unsupported version"),
88 };
89
90 let encrypted = attrs
91 .get("Encrypted")
92 .and_then(|x| match x == "Yes" {
93 true => Some(1_u8),
94 false => x.as_str().parse().ok(),
95 })
96 .unwrap_or(0);
97
98 let encoding = attrs
99 .get("Encoding")
100 .unwrap_or(&"UTF-8".to_string())
101 .to_string();
102
103 Ok((
104 input,
105 Header {
106 version,
107 encrypted,
108 encoding,
109 },
110 ))
111}
112
113fn parse_key_value(s: &str) -> HashMap<String, String> {
114 let re = Regex::new(r#"(\w+)="((.|\r\n|[\r\n])*?)""#).unwrap();
115 let mut attrs = HashMap::new();
116 for cap in re.captures_iter(s) {
117 attrs.insert(cap[1].to_string(), cap[2].to_string());
118 }
119 attrs
120}
121
122fn parse_key_block_header_v2(input: &[u8]) -> IResult<&[u8], KeyBlockHeader> {
123 let (input, block_info_buf) = take(40_usize)(input)?;
124 let (input, chksum) = be_u32(input)?;
125 assert_eq!(adler32(block_info_buf).unwrap(), chksum);
126
127 let (_, res) = map(
128 tuple((be_u64, be_u64, be_u64, be_u64, be_u64)),
129 |(block_num, entry_num, decompressed_size, block_info_size, key_block_size)| {
130 KeyBlockHeader {
131 block_num: block_num as usize,
132 entry_num: entry_num as usize,
133 decompressed_size: decompressed_size as usize,
134 block_info_size: block_info_size as usize,
135 key_block_size: key_block_size as usize,
136 }
137 },
138 )(block_info_buf)?;
139 Ok((input, res))
140}
141
142fn parse_key_block_header_v1(input: &[u8]) -> IResult<&[u8], KeyBlockHeader> {
143 let (input, block_info_buf) = take(16_usize)(input)?;
144
145 let (_, res) = map(
146 tuple((be_u32, be_u32, be_u32, be_u32)),
147 |(block_num, entry_num, block_info_size, key_block_size)| KeyBlockHeader {
148 block_num: block_num as usize,
149 entry_num: entry_num as usize,
150 decompressed_size: block_info_size as usize,
151 block_info_size: block_info_size as usize,
152 key_block_size: key_block_size as usize,
153 },
154 )(block_info_buf)?;
155 Ok((input, res))
156}
157
158fn parse_key_block_header<'a>(
159 input: &'a [u8],
160 header: &'a Header,
161) -> IResult<&'a [u8], KeyBlockHeader> {
162 match header.version {
163 Version::V2 => parse_key_block_header_v2(input),
164 Version::V1 => parse_key_block_header_v1(input),
165 _ => panic!("unsupported version"),
166 }
167}
168
169fn parse_key_block_infos<'a>(
170 input: &'a [u8],
171 size: usize,
172 dict_header: &'a Header,
173) -> IResult<&'a [u8], Vec<BlockEntryInfo>> {
174 match &dict_header.version {
175 Version::V1 => parse_key_block_infos_v1(input, size),
176 Version::V2 => parse_key_block_infos_v2(input, size, dict_header),
177 _ => panic!("unsupported version"),
178 }
179}
180
181fn parse_key_block_infos_v1<'a>(
182 input: &'a [u8],
183 size: usize,
184) -> IResult<&'a [u8], Vec<BlockEntryInfo>> {
185 let (input, block_info) = take(size)(input)?;
186 let entry_infos = decode_key_block_info_v1(&block_info[..]);
187 Ok((input, entry_infos))
188}
189fn parse_key_block_infos_v2<'a>(
190 input: &'a [u8],
191 size: usize,
192 dict_header: &'a Header,
193) -> IResult<&'a [u8], Vec<BlockEntryInfo>> {
194 let (input, block_info) = take(size)(input)?;
195
196 assert_eq!(block_info.slice(0..4), b"\x02\x00\x00\x00");
197 let mut key_block_info = vec![];
198
199 if dict_header.encrypted == 2 {
201 let mut md = Ripemd128::new();
202 let mut v = Vec::from(block_info.slice(4..8));
203 let value: u32 = 0x3695;
204 v.extend_from_slice(&value.to_le_bytes());
205 md.update(v);
206 let key = md.finalize();
207 let mut d = Vec::from(&block_info[0..8]);
208 let decrypte = fast_decrypt(&block_info[8..], key.as_slice());
209 d.extend(decrypte);
210 zlib::Decoder::new(&d[8..])
211 .read_to_end(&mut key_block_info)
212 .unwrap();
213 }
214
215 let entry_infos = decode_key_block_info_v2(&key_block_info[..]);
216 Ok((input, entry_infos))
217}
218
219fn text_len_parser_v2(input: &[u8]) -> IResult<&[u8], u16> {
220 let (input, len) = be_u16(input)?;
221 Ok((input, len + 1))
222}
223
224fn text_len_parser_v1(input: &[u8]) -> IResult<&[u8], u8> {
225 be_u8(input)
226}
227
228fn decode_key_block_info_v1(input: &[u8]) -> Vec<BlockEntryInfo> {
229 let mut info_parser = many0(map(
230 tuple((
231 be_u32,
232 length_data(text_len_parser_v1),
233 length_data(text_len_parser_v1),
234 be_u32,
235 be_u32,
236 )),
237 |(_, _, _, compressed_size, decompressed_size)| BlockEntryInfo {
238 compressed_size: compressed_size as usize,
239 decompressed_size: decompressed_size as usize,
240 },
241 ));
242 let (remain, res) = info_parser(input).unwrap();
243 assert_eq!(remain.len(), 0);
244 res
245}
246
247fn decode_key_block_info_v2(input: &[u8]) -> Vec<BlockEntryInfo> {
248 let mut info_parser = many0(map(
249 tuple((
250 be_u64,
251 length_data(text_len_parser_v2),
252 length_data(text_len_parser_v2),
253 be_u64,
254 be_u64,
255 )),
256 |(_, _, _, compressed_size, decompressed_size)| BlockEntryInfo {
257 compressed_size: compressed_size as usize,
259 decompressed_size: decompressed_size as usize,
260 },
261 ));
262 let (remain, res) = info_parser(input).unwrap();
263 assert_eq!(remain.len(), 0);
264 res
265}
266
267fn parse_key_blocks<'a>(
268 input: &'a [u8],
269 size: usize,
270 header: &Header,
271 block_infos: &'a Vec<BlockEntryInfo>,
272) -> IResult<&'a [u8], Vec<KeyBlock>> {
273 let (input, buf) = take(size)(input)?;
274
275 let blocks = match &header.version {
276 Version::V1 => decode_blocks(buf, block_infos, &header),
277 Version::V2 => decode_blocks(buf, block_infos, &header),
278 Version::V3 => panic!("unsupported version"),
279 };
280
281 Ok((input, blocks))
282}
283
284fn decode_blocks(buf: &[u8], entry_infos: &Vec<BlockEntryInfo>, header: &Header) -> Vec<KeyBlock> {
285 let mut buf = buf;
286
287 let mut res = vec![];
288 for info in entry_infos.iter() {
289 let (remain, decompressed) =
290 block_parser(info.compressed_size, info.decompressed_size)(buf).unwrap();
291 let (_, entries) = match &header.version {
292 Version::V1 => parse_block_items_v1(&decompressed[..], &header.encoding).unwrap(),
293 Version::V2 => parse_block_items_v2(&decompressed[..], &header.encoding).unwrap(),
294 _ => panic!("unsupported version"),
295 };
296
297 buf = remain;
298 res.push(KeyBlock { entries });
299 }
300
301 res
302}
303
304fn parse_block_items_v1<'a>(
305 input: &'a [u8],
306 encoding: &'a str,
307) -> IResult<&'a [u8], Vec<KeyEntry>> {
308 let (remain, sep) = many0(map(
309 tuple((be_u32, take_till(|x| x == 0), take(1_usize))),
310 |(offset, buf, _)| {
311 let decoder = encoding_from_whatwg_label(encoding).unwrap();
312 let text = decoder.decode(buf, encoding::DecoderTrap::Ignore).unwrap();
313 KeyEntry {
314 offset: offset as usize,
315 text,
316 }
317 },
318 ))(input)?;
319
320 assert_eq!(remain.len(), 0);
321
322 Ok((remain, sep))
323}
324fn parse_block_items_v2<'a>(
325 input: &'a [u8],
326 encoding: &'a str,
327) -> IResult<&'a [u8], Vec<KeyEntry>> {
328 let (remain, sep) = many0(map(
329 tuple((be_u64, take_till(|x| x == 0), take(1_usize))),
330 |(offset, buf, _)| {
331 let decoder = encoding_from_whatwg_label(encoding).unwrap();
332 let text = decoder.decode(buf, encoding::DecoderTrap::Ignore).unwrap();
333 KeyEntry {
334 offset: offset as usize,
335 text,
336 }
337 },
338 ))(input)?;
339
340 assert_eq!(remain.len(), 0);
341
342 Ok((remain, sep))
343}
344
345fn block_parser_v1<'a>(size: usize) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Vec<u8>> {
346 map(
347 tuple((le_u32, take(4_usize), take(size - 8))),
348 |(enc, chksum, encrypted)| {
349 let enc_method = (enc >> 4) & 0xf;
350 let enc_size = (enc >> 8) & 0xff;
351 let comp_method = enc & 0xf;
352
353 let mut md = Ripemd128::new();
354 md.update(chksum);
355 let key = md.finalize();
356
357 let data: Vec<u8> = match enc_method {
358 0 => Vec::from(encrypted),
359 1 => fast_decrypt(encrypted, key.as_slice()),
360 2 => {
361 let mut decrypt = vec![];
362 let mut cipher = Salsa20::new(key.as_slice().into(), &[0; 8].into());
363
364 decrypt
365 }
366 _ => panic!("unknown enc method: {}", enc_method),
367 };
368
369 let decompressed = match comp_method {
370 0 => data,
371 2 => {
372 let mut v = vec![];
373 zlib::Decoder::new(&data[..]).read_to_end(&mut v).unwrap();
374 v
375 }
376 _ => panic!("unknown compression method: {}", comp_method),
377 };
378
379 decompressed
380 },
381 )
382}
383fn block_parser<'a>(
384 comp_size: usize,
385 decomp_size: usize,
386) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Vec<u8>> {
387 map(
388 tuple((le_u32, take(4_usize), take(comp_size - 8))),
389 move |(enc, chksum, encrypted)| {
390 let enc_method = (enc >> 4) & 0xf;
391 let enc_size = (enc >> 8) & 0xff;
392 let comp_method = enc & 0xf;
393
394 let mut md = Ripemd128::new();
395 md.update(chksum);
396 let key = md.finalize();
397
398 let data: Vec<u8> = match enc_method {
399 0 => Vec::from(encrypted),
400 1 => fast_decrypt(encrypted, key.as_slice()),
401 2 => {
402 let mut decrypt = vec![];
403 let mut cipher = Salsa20::new(key.as_slice().into(), &[0; 8].into());
404
405 decrypt
406 }
407 _ => panic!("unknown enc method: {}", enc_method),
408 };
409
410 let decompressed = match comp_method {
411 0 => data,
412 1 => {
413 let mut comp: Vec<u8> = vec![0xf0];
414 comp.extend_from_slice(&data[..]);
415 let lzo = minilzo_rs::LZO::init().unwrap();
416 lzo.decompress(&data[..], decomp_size).unwrap()
417 }
418 2 => {
419 let mut v = vec![];
420 zlib::Decoder::new(&data[..]).read_to_end(&mut v).unwrap();
421 v
422 }
423 _ => panic!("unknown compression method: {}", comp_method),
424 };
425
426 decompressed
427 },
428 )
429}
430
431fn parse_record_blocks<'a>(input: &'a [u8], header: &'a Header) -> IResult<&'a [u8], Vec<BlockEntryInfo>> {
432 match &header.version {
433 Version::V1 => parse_record_blocks_v1(input),
434 Version::V2 => parse_record_blocks_v2(input),
435 _ => panic!("unsupported version"),
436 }
437}
438
439fn parse_record_blocks_v1(input: &[u8]) -> IResult<&[u8], Vec<BlockEntryInfo>> {
440 let (input, records) = be_u32(input)?;
441 let (input, entries) = be_u32(input)?;
442 let (input, record_info_size) = be_u32(input)?;
443 let (input, record_buf_size) = be_u32(input)?;
444
445 assert_eq!(records * 8, record_info_size);
446
447 count(
448 map(
449 tuple((be_u32, be_u32)),
450 |(compressed_size, decompressed_size)| BlockEntryInfo {
451 compressed_size: compressed_size as usize,
452 decompressed_size: decompressed_size as usize,
453 },
454 ),
455 records as usize,
456 )(input)
457}
458fn parse_record_blocks_v2(input: &[u8]) -> IResult<&[u8], Vec<BlockEntryInfo>> {
459 let (input, records) = be_u64(input)?;
460 let (input, entries) = be_u64(input)?;
461 let (input, record_info_size) = be_u64(input)?;
462 let (input, record_buf_size) = be_u64(input)?;
463
464 assert_eq!(records * 16, record_info_size);
465
466 count(
467 map(
468 tuple((be_u64, be_u64)),
469 |(compressed_size, decompressed_size)| BlockEntryInfo {
470 compressed_size: compressed_size as usize,
471 decompressed_size: decompressed_size as usize,
472 },
473 ),
474 records as usize,
475 )(input)
476}
477
478fn fast_decrypt(encrypted: &[u8], key: &[u8]) -> Vec<u8> {
479 let mut buf = Vec::from(encrypted);
480 let mut prev = 0x36;
481 for i in 0..buf.len() {
482 let mut t = buf[i] >> 4 | buf[i] << 4;
483 t = t ^ prev ^ (i as u8) ^ key[i % key.len()];
484 prev = buf[i];
485 buf[i] = t;
486 }
487 buf
488}
489
490pub(crate) fn record_block_parser<'a>(
491 size: usize,
492 decomp_size:usize
493) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Vec<u8>> {
494 map(
495 tuple((le_u32, take(4_usize), take(size - 8))),
496 move |(enc, chksum, encrypted)| {
497 let enc_method = (enc >> 4) & 0xf;
498 let enc_size = (enc >> 8) & 0xff;
499 let comp_method = enc & 0xf;
500
501 let mut md = Ripemd128::new();
502 md.update(chksum);
503 let key = md.finalize();
504
505 let data: Vec<u8> = match enc_method {
506 0 => Vec::from(encrypted),
507 1 => fast_decrypt(encrypted, key.as_slice()),
508 2 => {
509 let mut decrypt = vec![];
510 let mut cipher = Salsa20::new(key.as_slice().into(), &[0; 8].into());
511
512 decrypt
513 }
514 _ => panic!("unknown enc method: {}", enc_method),
515 };
516
517 let decompressed = match comp_method {
518 0 => data,
519 1 => {
520 let lzo = minilzo_rs::LZO::init().unwrap();
521 lzo.decompress(&data[..], decomp_size).unwrap()
522 },
523 2 => {
524 let mut v = vec![];
525 zlib::Decoder::new(&data[..]).read_to_end(&mut v).unwrap();
526 v
527 }
528 _ => panic!("unknown compression method: {}", comp_method),
529 };
530
531 decompressed
532 },
533 )
534}
535
536pub fn parse(data: &[u8]) -> Mdx {
537 let (input, header) = parse_header(data).unwrap();
538 let (input, key_block_header) = parse_key_block_header(input, &header).unwrap();
539 let (input, key_block_infos) =
540 parse_key_block_infos(input, key_block_header.block_info_size, &header).unwrap();
541 let (input, key_blocks) = parse_key_blocks(
542 input,
543 key_block_header.key_block_size,
544 &header,
545 &key_block_infos,
546 )
547 .unwrap();
548 let (input, record_blocks) = parse_record_blocks(input, &header).unwrap();
549 Mdx {
550 key_blocks,
551 records_info: record_blocks,
552 records: Vec::from(input),
553 encoding: header.encoding,
554 encrypted: header.encrypted,
555 }
556}