1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
use jpreprocess_core::{error::JPreprocessErrorKind, word_entry::WordEntry, JPreprocessResult};
use lindera_tokenizer::token::Token;

use super::{
    serializer::{jpreprocess::JPreprocessSerializer, lindera::LinderaSerializer},
    DictionaryFetcher, DictionarySerializer, DictionaryStore,
};

/// Default [`DictionaryFetcher`] of JPreprocess.
///
/// Holds the dictionary mode of both system and user dictionary,
/// and routes Token to either dictionary.
pub struct DefaultFetcher {
    system: WordDictionaryMode,
    user: Option<WordDictionaryMode>,
}

impl DefaultFetcher {
    pub fn new(system: WordDictionaryMode, user: Option<WordDictionaryMode>) -> Self {
        Self { system, user }
    }

    pub fn from_dictionaries<System, User>(system: &System, user: Option<&User>) -> Self
    where
        System: for<'a> DictionaryStore<'a>,
        User: for<'a> DictionaryStore<'a>,
    {
        Self {
            system: WordDictionaryMode::from_metadata(system.identifier()),
            user: user.map(|user| WordDictionaryMode::from_metadata(user.identifier())),
        }
    }
}

impl DictionaryFetcher for DefaultFetcher {
    fn get_word(&self, token: &Token) -> JPreprocessResult<WordEntry> {
        if token.word_id.is_unknown() {
            Ok(WordEntry::default())
        } else if token.word_id.is_system() {
            self.system
                .into_serializer()
                .deserialize(token.dictionary.get_bytes(token.word_id.0)?)
        } else if let Some(ref user_dict) = self.user {
            user_dict.into_serializer().deserialize(
                token
                    .user_dictionary
                    .ok_or(
                        JPreprocessErrorKind::WordNotFoundError.with_error(anyhow::anyhow!(
                "The word is flagged as UserDictionary, but Lindera UserDictionary is empty."
            )),
                    )?
                    .get_bytes(token.word_id.0)?,
            )
        } else {
            Err(
                JPreprocessErrorKind::WordNotFoundError.with_error(anyhow::anyhow!(
                    "The word is flagged as UserDictionary, but UserDictionary mode is not set."
                )),
            )
        }
    }
}

/// Dictionary serialization/deserialization mode.
#[derive(Clone, Copy, Debug)]
pub enum WordDictionaryMode {
    Lindera,
    JPreprocess,
    JPreprocessLegacyV051,
}

impl WordDictionaryMode {
    pub fn into_serializer(self) -> Box<dyn DictionarySerializer + Send + Sync> {
        match self {
            Self::Lindera => Box::new(LinderaSerializer),
            Self::JPreprocess => Box::new(JPreprocessSerializer),
            Self::JPreprocessLegacyV051 => {
                Box::new(crate::serializer::jpreprocess::legacy_0_5_1::JPreprocessSerializer)
            }
        }
    }

    pub fn from_metadata(metadata: Option<String>) -> Self {
        if let Some(metadata) = metadata {
            let segments: Vec<&str> = metadata.split(' ').collect();
            match *segments.as_slice() {
                ["JPreprocess", "v0.1.0" | "v0.1.1" | "v0.2.0"] => {
                    panic!(concat!(
                        "Incompatible Dictionary! ",
                        "Dictionaries built with JPreprocess versions before v0.3.0 ",
                        "are not compatible with this version of JPreprocess."
                    ))
                }
                ["JPreprocess", "v0.3.0" | "v0.4.0" | "v0.5.0" | "v0.5.1"] => {
                    return Self::JPreprocessLegacyV051
                }
                ["JPreprocess", ..] => return Self::JPreprocess,
                _ => (),
            }
        }
        Self::Lindera
    }
}