mdict_parser/
mdict.rs

1use encoding::label::encoding_from_whatwg_label;
2use nom::{bytes::complete::take_till, IResult};
3use std::str;
4
5use super::parser::{record_block_parser, BlockEntryInfo, KeyBlock, KeyEntry};
6
7pub struct Mdx {
8    pub(crate) key_blocks: Vec<KeyBlock>,
9    pub(crate) records_info: Vec<BlockEntryInfo>,
10    pub(crate) records: Vec<u8>,
11    pub encoding: String,
12    pub encrypted: u8,
13}
14
15#[derive(Debug)]
16struct RecordOffset {
17    buf_offset: usize,
18    block_offset: usize,
19    record_size: usize,
20    decomp_size: usize
21}
22
23#[derive(Debug)]
24pub struct Record<'a> {
25    pub key: &'a str,
26    pub definition: String,
27}
28
29impl Mdx {
30    pub fn items(&self) -> impl Iterator<Item = Record> {
31        self.key_blocks
32            .iter()
33            .flat_map(|block| &block.entries)
34            .map(|entry| {
35                let def = self.find_definition(entry);
36                Record {
37                    key: &entry.text,
38                    definition: def,
39                }
40                // (entry.text.as_str(), def)
41            })
42    }
43
44    pub fn keys(&self) -> impl Iterator<Item = &KeyEntry> {
45        self.key_blocks.iter().flat_map(|block| &block.entries)
46    }
47
48    fn record_offset(&self, entry: &KeyEntry) -> Option<RecordOffset> {
49        let mut block_offset = 0;
50        let mut buf_offset = 0;
51        for i in &self.records_info {
52            if entry.offset <= block_offset + i.decompressed_size {
53                // return Some((item_offset, block_offset, i));
54                return Some(RecordOffset {
55                    buf_offset,
56                    block_offset: entry.offset - block_offset,
57                    record_size: i.compressed_size,
58                    decomp_size: i.decompressed_size
59                });
60            } else {
61                block_offset += i.decompressed_size;
62                buf_offset += i.compressed_size;
63            }
64        }
65        None
66    }
67
68    fn find_definition(&self, entry: &KeyEntry) -> String {
69        if let Some(offset) = self.record_offset(entry) {
70            let buf = &self.records[offset.buf_offset..];
71            let (_, decompressed) = record_block_parser(offset.record_size, offset.decomp_size)(buf).unwrap();
72            let result: IResult<&[u8], &[u8]> =
73                take_till(|x| x == 0)(&decompressed[offset.block_offset..]);
74            let (_, buf) = result.unwrap();
75            let decoder = encoding_from_whatwg_label(self.encoding.as_str()).unwrap();
76            let text = decoder.decode(buf,encoding::DecoderTrap::Strict).unwrap();
77            text
78        } else {
79            "".to_string()
80        }
81    }
82}