lipilekhika 1.1.2

A transliteration library for Indian Brahmic scripts
use alloc::string::String;
use alloc::vec::Vec;
// indexmap is built with `default-features = false` for no_std; without `std` there is no
// default hasher, so `indexmap::IndexMap<K, V>` needs a third type param. foldhash matches
// hashbrown's default hasher in this crate.
use foldhash::fast::RandomState as IndexMapHasher;
use hashbrown::HashMap;
use indexmap::IndexMap as IndexMapImpl;
use serde::{Deserialize, Serialize};

type IndexMap<K, V> = IndexMapImpl<K, V, IndexMapHasher>;

/*
In this file in places there are ...Json version and non-json version
The reasons are :-
- Deserializing `json` and `bincode` has some differences
- Json part is used for reading from *.json files and non-Json for bincode serailize/deserialize
- The non-Json also contains the final structure used at runtime, runtime added fields `serde(skip)`
*/

/// This will be used both for transliteration and typing.
///
/// Common struct for both `text_to_krama_map` and `typing_text_to_krama_map`
#[derive(Debug, Deserialize, Serialize)]
#[serde(deny_unknown_fields)]
pub struct TextToKramaMap {
    pub next: Option<Vec<String>>,
    pub krama: Option<Vec<i16>>,
    pub fallback_list_ref: Option<i16>,
    /// only in `typing_text_to_krama_map`
    pub custom_back_ref: Option<i16>,
}

#[derive(Debug, Deserialize, Clone, PartialEq)]
#[serde(tag = "type")]
pub enum ListJson {
    #[serde(rename = "anya")]
    Anya { krama_ref: Vec<i16> },
    #[serde(rename = "vyanjana")]
    Vyanjana { krama_ref: Vec<i16> },
    #[serde(rename = "mAtrA")]
    Matra { krama_ref: Vec<i16> },
    #[serde(rename = "svara")]
    Svara {
        krama_ref: Vec<i16>,
        #[serde(rename = "mAtrA_krama_ref")]
        matra_krama_ref: Vec<i16>,
    },
}

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub enum List {
    Anya {
        krama_ref: Vec<i16>,
    },
    Vyanjana {
        krama_ref: Vec<i16>,
    },
    Matra {
        krama_ref: Vec<i16>,
    },
    Svara {
        krama_ref: Vec<i16>,
        matra_krama_ref: Vec<i16>,
    },
}

impl From<ListJson> for List {
    fn from(value: ListJson) -> Self {
        match value {
            ListJson::Anya { krama_ref } => List::Anya { krama_ref },
            ListJson::Vyanjana { krama_ref } => List::Vyanjana { krama_ref },
            ListJson::Matra { krama_ref } => List::Matra { krama_ref },
            ListJson::Svara {
                krama_ref,
                matra_krama_ref,
            } => List::Svara {
                krama_ref,
                matra_krama_ref,
            },
        }
    }
}

#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct CommonScriptAttrJson {
    #[allow(dead_code)]
    pub script_name: String,
    #[allow(dead_code)]
    pub script_id: u8,
    pub krama_text_arr: Vec<(String, Option<i16>)>,
    pub krama_text_arr_index: Vec<usize>,
    pub text_to_krama_map: Vec<(String, TextToKramaMap)>,
    pub typing_text_to_krama_map: Vec<(String, TextToKramaMap)>,
    pub custom_script_chars_arr: Vec<(String, Option<i16>, Option<i16>)>,
    pub list: Vec<ListJson>,
}

#[derive(Debug, Deserialize, Serialize)]
#[serde(deny_unknown_fields)]
pub struct CommonScriptAttr {
    #[allow(dead_code)]
    pub script_name: String,
    #[allow(dead_code)]
    pub script_id: u8,
    pub krama_text_arr: Vec<(String, Option<i16>)>,
    pub krama_text_arr_index: Vec<usize>,
    pub text_to_krama_map: Vec<(String, TextToKramaMap)>,
    pub typing_text_to_krama_map: Vec<(String, TextToKramaMap)>,
    pub custom_script_chars_arr: Vec<(String, Option<i16>, Option<i16>)>,
    pub list: Vec<List>,

    // Store the index with the string key for faster lookup under O(1) time
    #[serde(skip)]
    #[allow(dead_code)]
    pub krama_text_lookup: HashMap<String, usize>,
    #[serde(skip)]
    #[allow(dead_code)]
    pub text_to_krama_lookup: HashMap<String, usize>,
    #[serde(skip)]
    #[allow(dead_code)]
    pub typing_text_to_krama_lookup: HashMap<String, usize>,
    #[serde(skip)]
    #[allow(dead_code)]
    pub custom_script_chars_lookup: HashMap<String, usize>,
}

impl From<CommonScriptAttrJson> for CommonScriptAttr {
    fn from(value: CommonScriptAttrJson) -> Self {
        Self {
            script_name: value.script_name,
            script_id: value.script_id,
            krama_text_arr: value.krama_text_arr,
            krama_text_arr_index: value.krama_text_arr_index,
            text_to_krama_map: value.text_to_krama_map,
            typing_text_to_krama_map: value.typing_text_to_krama_map,
            custom_script_chars_arr: value.custom_script_chars_arr,
            list: value.list.into_iter().map(Into::into).collect(),
            krama_text_lookup: HashMap::new(),
            text_to_krama_lookup: HashMap::new(),
            typing_text_to_krama_lookup: HashMap::new(),
            custom_script_chars_lookup: HashMap::new(),
        }
    }
}

/// JSON representation (matches the on-disk `*.json` files).
///
/// `#[serde(flatten)]` provides a convenient shape for JSON but is not supported
/// by `bincode`, so we parse into `ScriptDataJson` and convert into `ScriptData`.
#[derive(Debug, Deserialize)]
#[serde(tag = "script_type")]
pub enum ScriptDataJson {
    #[serde(rename = "brahmic")]
    Brahmic {
        #[serde(flatten)]
        common_script_attr: CommonScriptAttrJson,
        #[allow(dead_code)]
        schwa_property: bool,
        halant: String,
        nuqta: Option<String>,
    },
    #[serde(rename = "other")]
    Other {
        #[serde(flatten)]
        common_script_attr: CommonScriptAttrJson,
        #[allow(dead_code)]
        schwa_character: String,
    },
}

#[derive(Debug, Deserialize, Serialize)]
pub enum ScriptData {
    Brahmic {
        common_script_attr: CommonScriptAttr,
        #[allow(dead_code)]
        schwa_property: bool,
        halant: String,
        nuqta: Option<String>,
    },
    Other {
        common_script_attr: CommonScriptAttr,
        #[allow(dead_code)]
        schwa_character: String,
    },
}

impl From<ScriptDataJson> for ScriptData {
    fn from(value: ScriptDataJson) -> Self {
        match value {
            ScriptDataJson::Brahmic {
                common_script_attr,
                schwa_property,
                halant,
                nuqta,
            } => ScriptData::Brahmic {
                common_script_attr: common_script_attr.into(),
                schwa_property,
                halant,
                nuqta,
            },
            ScriptDataJson::Other {
                common_script_attr,
                schwa_character,
            } => ScriptData::Other {
                common_script_attr: common_script_attr.into(),
                schwa_character,
            },
        }
    }
}

#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum CustomOptionScriptTypeEnum {
    Brahmic,
    Other,
    All,
}

#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum CheckInEnum {
    Input,
    Output,
}

#[derive(Debug, Deserialize)]
#[serde(tag = "type")]
pub enum RuleJson {
    #[serde(rename = "replace_prev_krama_keys")]
    ReplacePrevKramaKeys {
        #[serde(default)]
        use_replace: Option<bool>,
        prev: Vec<i16>,
        following: Vec<i16>,
        replace_with: Vec<i16>,
        check_in: Option<CheckInEnum>,
    },
    #[serde(rename = "direct_replace")]
    DirectReplace {
        #[serde(default)]
        use_replace: Option<bool>,
        to_replace: Vec<Vec<i16>>,
        replace_with: Vec<i16>,
        replace_text: Option<String>,
        check_in: Option<CheckInEnum>,
    },
}

#[derive(Debug, Deserialize, Serialize, Clone)]
pub enum Rule {
    ReplacePrevKramaKeys {
        use_replace: Option<bool>,
        prev: Vec<i16>,
        following: Vec<i16>,
        replace_with: Vec<i16>,
        check_in: Option<CheckInEnum>,
    },
    DirectReplace {
        use_replace: Option<bool>,
        to_replace: Vec<Vec<i16>>,
        replace_with: Vec<i16>,
        replace_text: Option<String>,
        check_in: Option<CheckInEnum>,
    },
}

impl From<RuleJson> for Rule {
    fn from(value: RuleJson) -> Self {
        match value {
            RuleJson::ReplacePrevKramaKeys {
                use_replace,
                prev,
                following,
                replace_with,
                check_in,
            } => Rule::ReplacePrevKramaKeys {
                use_replace,
                prev,
                following,
                replace_with,
                check_in,
            },
            RuleJson::DirectReplace {
                use_replace,
                to_replace,
                replace_with,
                replace_text,
                check_in,
            } => Rule::DirectReplace {
                use_replace,
                to_replace,
                replace_with,
                replace_text,
                check_in,
            },
        }
    }
}

#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct CustomOptionsJson {
    pub from_script_name: Option<Vec<String>>,
    pub from_script_type: Option<CustomOptionScriptTypeEnum>,
    pub to_script_name: Option<Vec<String>>,
    pub to_script_type: Option<CustomOptionScriptTypeEnum>,
    #[allow(dead_code)]
    pub check_in: CheckInEnum,
    pub rules: Vec<RuleJson>,
}

impl From<CustomOptionsJson> for CustomOptions {
    fn from(value: CustomOptionsJson) -> Self {
        Self {
            from_script_name: value.from_script_name,
            from_script_type: value.from_script_type,
            to_script_name: value.to_script_name,
            to_script_type: value.to_script_type,
            check_in: value.check_in,
            rules: value.rules.into_iter().map(Into::into).collect(),
        }
    }
}

#[derive(Debug, Deserialize, Serialize, Clone)]
#[serde(deny_unknown_fields)]
pub struct CustomOptions {
    pub from_script_name: Option<Vec<String>>,
    pub from_script_type: Option<CustomOptionScriptTypeEnum>,
    pub to_script_name: Option<Vec<String>>,
    pub to_script_type: Option<CustomOptionScriptTypeEnum>,
    #[allow(dead_code)]
    pub check_in: CheckInEnum,
    pub rules: Vec<Rule>,
}

#[allow(dead_code)]
pub type CustomOptionMapJson = IndexMap<String, CustomOptionsJson>;
pub type CustomOptionMap = IndexMap<String, CustomOptions>;

#[derive(Debug, Deserialize, Serialize)]
#[serde(deny_unknown_fields)]
pub struct ScriptListDataJson {
    // IndexMap used to preserve order of keys
    pub scripts: IndexMap<String, u8>,
    pub langs: IndexMap<String, u8>,
    /// all langs are mapped to a script
    pub lang_script_map: IndexMap<String, String>,
    /// contains aliases which map to script
    pub script_alternates_map: IndexMap<String, String>,
}

#[derive(Serialize, Deserialize)]
pub struct ScriptListData {
    pub scripts: Vec<String>,
    pub langs: Vec<String>,
    pub lang_script_map: IndexMap<String, String>,
    pub script_alternates_map: IndexMap<String, String>,
}

impl From<ScriptListDataJson> for ScriptListData {
    fn from(value: ScriptListDataJson) -> Self {
        Self {
            scripts: value.scripts.iter().map(|k| k.0.clone()).collect(),
            langs: value.langs.iter().map(|k| k.0.clone()).collect(),
            lang_script_map: value.lang_script_map.clone(),
            script_alternates_map: value.script_alternates_map.clone(),
        }
    }
}