use crate::script_data::{CheckInEnum, CustomOptionScriptTypeEnum, List, Rule, ScriptData};
use crate::scripts::ScriptListEnum;
use crate::transliterate::helpers::{
self, InputTextCursor, PrevContextBuilder, PrevContextItem, ResultStringBuilder,
is_script_tamil_ext, is_ta_ext_superscript_tail, is_vedic_svara_tail,
};
use std::borrow::{Borrow, Cow};
use std::collections::HashMap;
#[inline]
fn char_eq_str(c: char, s: &str) -> bool {
let mut buf = [0u8; 4];
let encoded = c.encode_utf8(&mut buf);
encoded == s
}
struct TransliterateCtx<'a, R> {
from_script: &'a ScriptListEnum,
to_script: &'a ScriptListEnum,
from_script_data: &'a ScriptData,
to_script_data: &'a ScriptData,
trans_options: &'a HashMap<String, bool>,
custom_rules: &'a [R],
cursor: &'a mut InputTextCursor<'a>,
result: &'a mut ResultStringBuilder,
prev_context: &'a mut PrevContextBuilder<'a>,
prev_context_in_use: bool,
brahmic_halant: Option<&'a str>,
brahmic_nuqta: Option<&'a str>,
typing_mode: bool,
include_inherent_vowels: bool,
}
impl<'a, R> TransliterateCtx<'a, R>
where
R: Borrow<Rule>,
{
fn prev_context_cleanup(
&mut self,
item: Option<PrevContextItem<'a>>,
next: Option<&[String]>,
last_extra_call: Option<bool>,
) -> bool {
let last_extra_call = last_extra_call.unwrap_or_default();
let mut result_str_concat_status = false;
let brahmic_halant = self.brahmic_halant;
let brahmic_nuqta = self.brahmic_nuqta;
let item_text = item.as_ref().and_then(|(t, _)| t.as_deref()); let item_type = item.as_ref().and_then(|(_, t)| t.as_deref());
if ((brahmic_nuqta.is_some()
&& self
.prev_context
.type_at(-3)
.is_some_and(|k| k.is_vyanjana())
&& self.prev_context.text_at(-2) == brahmic_nuqta
&& self.prev_context.type_at(-1).is_some_and(|k| k.is_matra()))
|| (self
.prev_context
.type_at(-2)
.is_some_and(|k| k.is_vyanjana())
&& self.prev_context.type_at(-1).is_some_and(|k| k.is_matra())))
&& (item.is_none() || item_type.is_some_and(|k| k.is_anya()))
{
self.prev_context.clear();
}
if matches!(self.from_script_data, ScriptData::Brahmic { .. })
&& matches!(self.to_script_data, ScriptData::Other { .. })
{
let ta_ext_case = if is_script_tamil_ext(self.from_script) {
item_text.and_then(|k| k.chars().next())
!= self.brahmic_halant.and_then(|k| k.chars().next())
} else {
true
};
let vyanjana_case = (self.brahmic_nuqta.is_none() || item_text != self.brahmic_nuqta)
&& (self
.prev_context
.type_at(-1)
.is_some_and(|k| k.is_vyanjana())
|| (self.brahmic_halant.is_some()
&& self
.prev_context
.type_at(-2)
.is_some_and(|k| k.is_vyanjana())
&& self.prev_context.text_at(-1) == brahmic_nuqta));
let to_anya_or_null = (!item_type.is_some_and(|k| k.is_matra())
&& item_text != brahmic_halant)
|| item_type.is_some_and(|k| k.is_anya())
|| item_type.is_none();
if item_text != self.brahmic_halant
&& ta_ext_case
&& vyanjana_case
&& to_anya_or_null
&& let ScriptData::Other {
schwa_character, ..
} = self.to_script_data
{
self.result.emit(schwa_character);
}
} else if matches!(self.from_script_data, ScriptData::Other { .. })
&& matches!(self.to_script_data, ScriptData::Brahmic { .. })
{
if self
.prev_context
.type_at(-1)
.is_some_and(|k| k.is_vyanjana())
&& (item_type.is_some_and(|k| k.is_matra()) || item_type.is_some_and(|k| k.is_svara()))
{
let linked_matra: &str = item_type
.and_then(|item_type| match item_type {
List::Svara {
matra_krama_ref, ..
} => Some(
self
.to_script_data
.krama_text_or_empty(*matra_krama_ref.first().unwrap_or(&-1)),
),
_ => None,
})
.unwrap_or_else(|| item_text.unwrap_or(""));
if let ScriptData::Brahmic { halant, .. } = self.to_script_data {
self.result.emit_pieces_with_reorder(
&[linked_matra],
halant,
is_script_tamil_ext(self.to_script)
&& is_ta_ext_superscript_tail(self.result.last_char()),
);
result_str_concat_status = true;
}
} else if !self.include_inherent_vowels
&& (self
.prev_context
.type_at(-1)
.is_some_and(|k| k.is_vyanjana()))
&& !(item_text == brahmic_halant || item_type.is_some_and(|k| k.is_matra()))
{
if let (
Some(brahmic_halant),
ScriptData::Brahmic {
halant: to_halant, ..
},
) = (brahmic_halant, self.to_script_data)
{
let should_reorder = is_script_tamil_ext(self.to_script)
&& is_ta_ext_superscript_tail(self.result.last_char());
self
.result
.emit_pieces_with_reorder(&[brahmic_halant], to_halant, should_reorder);
if self.to_script == &ScriptListEnum::Sinhala
&& *self
.trans_options
.get("all_to_sinhala:use_conjunct_enabling_halant")
.unwrap_or(&false)
&& let Some(last_piece) = self.result.last_piece()
{
self.result.rewrite_at(-1, &{
let mut s = String::with_capacity(last_piece.len() + 3);
s.push_str(last_piece);
s.push('\u{200D}');
s
});
}
}
} else if self.include_inherent_vowels
&& item.is_some()
&& item_type.is_some_and(|k| k.is_vyanjana())
&& (self
.prev_context
.type_at(-1)
.is_some_and(|k| k.is_vyanjana())
|| (brahmic_nuqta.is_some()
&& self
.prev_context
.type_at(-2)
.is_some_and(|k| k.is_vyanjana())
&& self.prev_context.text_at(-1) == brahmic_nuqta))
&& let (
Some(brahmic_halant),
ScriptData::Brahmic {
halant: to_halant, ..
},
) = (brahmic_halant, self.to_script_data)
{
let should_reorder = is_script_tamil_ext(self.to_script)
&& is_ta_ext_superscript_tail(self.result.last_char());
self
.result
.emit_pieces_with_reorder(&[brahmic_halant], to_halant, should_reorder);
if *self.to_script == ScriptListEnum::Sinhala
&& *self
.trans_options
.get("all_to_sinhala:use_conjunct_enabling_halant")
.unwrap_or(&false)
&& let Some(last_piece) = self.result.last_piece()
{
self.result.rewrite_at(-1, &{
let mut s = String::with_capacity(last_piece.len() + 3);
s.push_str(last_piece);
s.push('\u{200D}');
s
});
}
}
}
let mut to_clear_context = false;
if self.typing_mode
&& next.map(|n| n.is_empty()).unwrap_or(true)
&& !last_extra_call
&& !(is_script_tamil_ext(self.to_script)
&& is_ta_ext_superscript_tail(self.result.last_char()))
{
to_clear_context = true;
if item_type.is_some_and(|k| k.is_vyanjana()) {
to_clear_context = false;
}
if to_clear_context {
self.prev_context.clear();
}
}
if ((!self.typing_mode) || (!last_extra_call && !to_clear_context))
&& let Some(item) = item
{
self.prev_context.push(item);
}
result_str_concat_status
}
fn apply_custom_trans_rules(&mut self, text_index: isize, delta: isize) {
let current_text_index = text_index + delta;
for rule_ref in self.custom_rules.iter() {
let rule = rule_ref.borrow();
match rule {
Rule::DirectReplace { use_replace, .. }
| Rule::ReplacePrevKramaKeys { use_replace, .. } => {
if use_replace == &Some(true) {
continue;
}
}
}
match rule {
Rule::ReplacePrevKramaKeys {
prev,
following,
replace_with,
check_in,
..
} => {
if prev.iter().any(|&n| n < 0) {
continue;
}
let is_check_in_input = !matches!(check_in, Some(CheckInEnum::Output));
if is_check_in_input {
if current_text_index < 0 || text_index < 0 {
continue;
}
let prev_match = self.from_script_data.match_prev_krama_sequence(
|i| self.cursor.peek_at_str(i as usize),
current_text_index,
prev,
);
if prev_match.matched
&& let Some(next_ch) = self.cursor.peek_at(text_index as usize)
{
let mut buf = [0u8; 4];
let next_ch_str = next_ch.encode_utf8(&mut buf);
if let Some(next_idx) = self.from_script_data.krama_index_of_text(next_ch_str) {
let next_i16 = next_idx as i16;
if following.contains(&next_i16) {
let pieces = self.to_script_data.replace_with_pieces(replace_with);
self
.result
.rewrite_tail_pieces(prev_match.matched_len, &pieces);
}
}
}
} else {
let Some(last_piece) = self.result.last_piece() else {
continue;
};
if let Some(following_idx) = self.to_script_data.krama_index_of_text(last_piece) {
if !following.contains(&(following_idx as i16)) {
continue;
}
let prev_match =
self
.to_script_data
.match_prev_krama_sequence(|i| self.result.peek_at(i), -2, prev);
if prev_match.matched {
let last_piece_owned = last_piece.to_owned();
let mut pieces = self.to_script_data.replace_with_pieces(replace_with);
pieces.push(last_piece_owned.as_str()); self
.result
.rewrite_tail_pieces(prev_match.matched_len + 1, &pieces);
}
}
}
}
Rule::DirectReplace {
to_replace,
replace_with,
replace_text,
check_in,
..
} => {
let lookup_data = if matches!(check_in, Some(CheckInEnum::Output)) {
self.to_script_data
} else {
self.from_script_data
};
for search_group in to_replace.iter() {
if search_group.iter().any(|&n| n < 0) {
continue;
}
let matched =
lookup_data.match_prev_krama_sequence(|i| self.result.peek_at(i), -1, search_group);
if !matched.matched {
continue;
}
if let Some(replace_text) = replace_text {
self
.result
.rewrite_tail_pieces(matched.matched_len, std::slice::from_ref(replace_text));
} else {
let pieces = lookup_data.replace_with_pieces(replace_with);
self
.result
.rewrite_tail_pieces(matched.matched_len, &pieces);
}
break;
}
}
}
}
}
}
fn custom_option_script_type_of(
script_data: &ScriptData,
) -> crate::script_data::CustomOptionScriptTypeEnum {
match script_data {
ScriptData::Brahmic { .. } => crate::script_data::CustomOptionScriptTypeEnum::Brahmic,
ScriptData::Other { .. } => crate::script_data::CustomOptionScriptTypeEnum::Other,
}
}
fn custom_option_script_type_matches(
expected: CustomOptionScriptTypeEnum,
actual: CustomOptionScriptTypeEnum,
) -> bool {
matches!(expected, CustomOptionScriptTypeEnum::All) || expected == actual
}
pub fn get_active_custom_options(
from_script_data: &ScriptData,
to_script_data: &ScriptData,
input_options: Option<&HashMap<String, bool>>,
) -> HashMap<String, bool> {
let Some(input_options) = input_options else {
return HashMap::new();
};
let from_script_name = &from_script_data.script_name;
let to_script_name = &to_script_data.script_name;
let custom_options_map = crate::script_data::get_custom_options_map();
let mut active: HashMap<String, bool> = HashMap::with_capacity(input_options.len());
let from_type = custom_option_script_type_of(from_script_data);
let to_type = custom_option_script_type_of(to_script_data);
for (key, enabled) in input_options.iter() {
let Some(option_info) = custom_options_map.get(key) else {
continue;
};
let from_matches = option_info
.from_script_type
.is_some_and(|t| custom_option_script_type_matches(t, from_type))
|| option_info
.from_script_name
.as_ref()
.is_some_and(|names| names.iter().any(|n| n == from_script_name));
if !from_matches {
continue;
}
let to_matches = option_info
.to_script_type
.is_some_and(|t| custom_option_script_type_matches(t, to_type))
|| option_info
.to_script_name
.as_ref()
.is_some_and(|names| names.iter().any(|n| n == to_script_name));
if from_matches && to_matches {
active.insert(key.clone(), *enabled);
}
}
active
}
#[derive(Debug, Clone)]
pub struct ResolvedTransliterationRules {
pub trans_options: HashMap<String, bool>,
pub custom_rules: Vec<&'static Rule>,
}
pub fn resolve_transliteration_rules(
from_script_data: &ScriptData,
to_script_data: &ScriptData,
transliteration_input_options: Option<&HashMap<String, bool>>,
) -> ResolvedTransliterationRules {
let trans_options = get_active_custom_options(
from_script_data,
to_script_data,
transliteration_input_options,
);
let custom_options_map = crate::script_data::get_custom_options_map();
let mut custom_rules: Vec<&'static Rule> = Vec::new();
for (key, enabled) in trans_options.iter() {
if !*enabled {
continue;
}
if let Some(opt) = custom_options_map.get(key) {
custom_rules.extend(opt.rules.iter());
}
}
ResolvedTransliterationRules {
trans_options,
custom_rules,
}
}
impl Rule {
fn check_should_use_replace(&self, allowed: CheckInEnum) -> bool {
match self {
Rule::ReplacePrevKramaKeys {
use_replace,
check_in,
..
}
| Rule::DirectReplace {
use_replace,
check_in,
..
} => use_replace == &Some(true) && check_in == &Some(allowed),
}
}
}
fn get_rule_replace_text(rule: &Rule, script_data: &ScriptData) -> String {
match rule {
Rule::ReplacePrevKramaKeys { replace_with, .. } | Rule::DirectReplace { replace_with, .. } => {
replace_with
.iter()
.map(|&k| {
if k < 0 {
""
} else {
script_data.krama_text_or_empty(k)
}
})
.collect::<String>()
}
}
}
fn apply_custom_replace_rules<'a, R: Borrow<Rule>>(
text: &'a str,
script_data: &ScriptData,
rules: &[R],
allowed_input_rule_type: CheckInEnum,
) -> Cow<'a, str> {
if rules.is_empty() {
return Cow::Borrowed(text);
}
let mut text: Cow<'a, str> = Cow::Borrowed(text);
for rule_ref in rules.iter() {
let rule = rule_ref.borrow();
if !rule.check_should_use_replace(allowed_input_rule_type) {
continue;
}
match rule {
Rule::ReplacePrevKramaKeys {
prev, following, ..
} => {
let prev_string = prev
.iter()
.map(|&p| script_data.krama_text_or_empty(p))
.collect::<String>();
let repl_text = get_rule_replace_text(rule, script_data);
for &follow_krama_index in following.iter() {
let follow_krama_string = script_data.krama_text_or_empty(follow_krama_index);
if follow_krama_string.is_empty() {
continue;
}
let search = {
let mut s = String::with_capacity(prev_string.len() + follow_krama_string.len());
s.push_str(&prev_string);
s.push_str(follow_krama_string);
s
};
if text.contains(&search) {
let replace = {
let mut s = String::with_capacity(repl_text.len() + follow_krama_string.len());
s.push_str(&repl_text);
s.push_str(follow_krama_string);
s
};
text = Cow::Owned(text.replace(&search, &replace));
}
}
}
Rule::DirectReplace {
to_replace,
replace_text,
..
} => {
let replace_with: Cow<str> = replace_text.as_deref().map_or_else(
|| Cow::Owned(get_rule_replace_text(rule, script_data)),
Cow::Borrowed,
);
for grp in to_replace.iter() {
let to_replace_string = grp
.iter()
.map(|&k| script_data.krama_text_or_empty(k))
.collect::<String>();
if !to_replace_string.is_empty() && text.contains(&*to_replace_string) {
text = Cow::Owned(text.replace(&to_replace_string, &replace_with));
}
}
}
}
}
text
}
const DEFAULT_USE_NATIVE_NUMERALS_MODE: bool = true;
const DEFAULT_INCLUDE_INHERENT_VOWEL_MODE: bool = false;
#[inline]
fn is_skip_char(c: char) -> bool {
matches!(
c,
' ' | '\n' | '\r' | '\t' | ',' | '~' | '!' | '@' | '?' | '%'
)
}
const MAX_CONTEXT_LENGTH: u8 = 3;
#[derive(Debug, Clone, Copy)]
pub struct TransliterationFnOptions {
pub typing_mode: bool,
pub use_native_numerals: bool,
pub include_inherent_vowel: bool,
}
impl Default for TransliterationFnOptions {
fn default() -> Self {
Self {
typing_mode: false,
use_native_numerals: DEFAULT_USE_NATIVE_NUMERALS_MODE,
include_inherent_vowel: DEFAULT_INCLUDE_INHERENT_VOWEL_MODE,
}
}
}
#[derive(Debug, Clone)]
pub struct TransliterationOutput {
pub output: String,
#[allow(dead_code)]
pub context_length: usize,
}
#[inline]
#[allow(dead_code)]
fn is_single_ascii_digit(s: &str) -> bool {
s.len() == 1 && s.chars().next().is_some_and(|c| c.is_ascii_digit())
}
#[allow(clippy::too_many_arguments)]
pub fn transliterate_text_core(
text: &str,
from_script: &ScriptListEnum,
to_script: &ScriptListEnum,
from_script_data: &ScriptData,
to_script_data: &ScriptData,
trans_options_in: &HashMap<String, bool>,
custom_rules: &[impl Borrow<Rule>],
options: Option<TransliterationFnOptions>,
) -> TransliterationOutput {
let opts = options.unwrap_or_default();
if opts.typing_mode && *from_script != ScriptListEnum::Normal {
panic!("Typing mode is only supported with Normal script as the input");
}
let trans_options = trans_options_in;
let text = if opts.typing_mode && *from_script == ScriptListEnum::Normal {
helpers::apply_typing_input_aliases(text, to_script)
} else {
Cow::Borrowed(text)
};
let text = apply_custom_replace_rules(
text.as_ref(),
from_script_data,
custom_rules,
CheckInEnum::Input,
);
let mut result = ResultStringBuilder::new();
let mut cursor = InputTextCursor::new(text.as_ref());
let mut prev_context = PrevContextBuilder::new(MAX_CONTEXT_LENGTH as usize);
let prev_context_in_use = (matches!(from_script_data, ScriptData::Brahmic { .. })
&& matches!(to_script_data, ScriptData::Other { .. }))
|| (matches!(from_script_data, ScriptData::Other { .. })
&& matches!(to_script_data, ScriptData::Brahmic { .. }))
|| (opts.typing_mode
&& *from_script == ScriptListEnum::Normal
&& matches!(to_script_data, ScriptData::Other { .. }));
let (brahmic_nuqta, brahmic_halant) = match (from_script_data, to_script_data) {
(ScriptData::Brahmic { nuqta, halant, .. }, ScriptData::Other { .. }) => {
(nuqta.as_deref(), Some(halant.as_str()))
}
(ScriptData::Other { .. }, ScriptData::Brahmic { nuqta, halant, .. }) => {
(nuqta.as_deref(), Some(halant.as_str()))
}
_ => (None, None),
};
let trans_opt_normal_to_all_use_typing_chars: bool = trans_options
.get("normal_to_all:use_typing_chars")
.copied()
.unwrap_or(false);
let use_typing_map = (trans_opt_normal_to_all_use_typing_chars || opts.typing_mode)
&& *from_script == ScriptListEnum::Normal;
let text_to_krama_lookup_script_data = if use_typing_map {
to_script_data
} else {
from_script_data
};
let from_text_to_krama_map = if use_typing_map {
&to_script_data.typing_text_to_krama_map
} else {
&from_script_data.text_to_krama_map
};
let mut ignore_ta_ext_sup_num_text_index: isize = -1;
let is_from_tamil_ext_ = is_script_tamil_ext(from_script);
let is_to_tamil_ext_ = is_script_tamil_ext(to_script);
let opt_preserve_specific_chars_ = *trans_options
.get("all_to_normal:preserve_specific_chars")
.unwrap_or(&false)
&& *to_script == ScriptListEnum::Normal;
let mut ctx = TransliterateCtx {
from_script,
to_script,
from_script_data,
to_script_data,
trans_options,
custom_rules,
cursor: &mut cursor,
result: &mut result,
prev_context: &mut prev_context,
prev_context_in_use,
brahmic_halant,
brahmic_nuqta,
typing_mode: opts.typing_mode,
include_inherent_vowels: opts.include_inherent_vowel,
};
let chars_len = ctx.cursor.char_count();
while ctx.cursor.pos() < chars_len {
let mut text_index = ctx.cursor.pos();
let ch = match ctx.cursor.peek() {
Some(v) => v,
None => break,
};
if ignore_ta_ext_sup_num_text_index != -1
&& (text_index as isize) >= ignore_ta_ext_sup_num_text_index
{
ignore_ta_ext_sup_num_text_index = -1;
ctx.cursor.advance(1);
continue;
}
if is_skip_char(ch) {
ctx.cursor.advance(1);
if ctx.prev_context_in_use {
ctx.prev_context_cleanup(Some((Some(Cow::Borrowed(" ")), None)), None, None);
ctx.prev_context.clear();
}
ctx.result.emit_char(ch);
continue;
}
if ch.is_ascii_digit() && !opts.use_native_numerals {
ctx.result.emit_char(ch);
ctx.cursor.advance(1);
const DIGIT_STRS: [&str; 10] = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"];
let digit_str = DIGIT_STRS[(ch as u8 - b'0') as usize];
let _ = ctx.prev_context_cleanup(Some((Some(Cow::Borrowed(digit_str)), None)), None, None);
continue;
}
if opt_preserve_specific_chars_ {
let mut ch_buf = [0u8; 4];
let ch_str = ch.encode_utf8(&mut ch_buf);
let idx = from_script_data.custom_script_char_index_of_text(ch_str);
if let Some(custom_idx) = idx {
let (custom_text, list_ref_opt, back_ref_opt) =
&from_script_data.custom_script_chars_arr[custom_idx];
let list_item = list_ref_opt
.and_then(|i| from_script_data.list.get(i as usize))
.map(Cow::Borrowed);
ctx.prev_context_cleanup(
Some((Some(Cow::Borrowed(custom_text.as_str())), list_item)),
None,
None,
);
let normal_text = back_ref_opt
.and_then(|i| from_script_data.typing_text_to_krama_map.get(i as usize))
.map(|(s, _)| s.as_str())
.unwrap_or_default();
ctx.result.emit(normal_text);
ctx.cursor.advance(custom_text.chars().count());
continue;
}
}
let mut text_to_krama_item_index: Option<usize>;
{
let mut scan_units: usize = 0;
let mut last_valid_vowel_match_index: Option<usize> = None;
let check_vowel_retraction = ctx.prev_context_in_use
&& matches!(from_script_data, ScriptData::Other { .. })
&& matches!(to_script_data, ScriptData::Brahmic { .. })
&& (ctx
.prev_context
.type_at(-1)
.is_some_and(|k| k.is_vyanjana())
|| (ctx.brahmic_nuqta.is_some()
&& ctx
.prev_context
.type_at(-2)
.is_some_and(|k| k.is_vyanjana())
&& ctx.prev_context.text_at(-1) == ctx.brahmic_nuqta));
loop {
let next = ctx.cursor.peek_at(text_index + scan_units + 1);
let next_char: Option<char> = next;
if ignore_ta_ext_sup_num_text_index != -1
&& next_char.is_some()
&& is_ta_ext_superscript_tail(next_char)
{
scan_units += 1; }
let end_index = text_index + scan_units + 1;
let char_to_search: Cow<'_, str> = if ignore_ta_ext_sup_num_text_index != -1 {
let a = ctx
.cursor
.slice(text_index, ignore_ta_ext_sup_num_text_index as usize)
.unwrap_or_default();
let b = if end_index > (ignore_ta_ext_sup_num_text_index as usize) {
ctx
.cursor
.slice((ignore_ta_ext_sup_num_text_index as usize) + 1, end_index)
.unwrap_or_default()
} else {
""
};
{
let mut s = String::with_capacity(a.len() + b.len());
s.push_str(a);
s.push_str(b);
Cow::Owned(s)
}
} else {
Cow::Borrowed(ctx.cursor.slice(text_index, end_index).unwrap_or_default())
};
let potential_match_index = text_to_krama_lookup_script_data
.text_to_krama_map_index(char_to_search.as_ref(), use_typing_map);
let Some(potential_match_index) = potential_match_index else {
text_to_krama_item_index = None;
break;
};
let potential_match = &from_text_to_krama_map[potential_match_index];
if check_vowel_retraction
&& let Some(krama) = &potential_match.1.krama
&& !krama.is_empty()
{
let krama_id = krama[0];
if krama_id >= 0 {
let list_idx = to_script_data
.krama_text_arr
.get(krama_id as usize)
.and_then(|(_, li)| *li);
let list_type = list_idx.and_then(|li| to_script_data.list.get(li as usize));
let is_single_vowel =
krama.len() == 1 && list_type.is_some_and(|t| t.is_svara() || t.is_matra());
if is_single_vowel {
last_valid_vowel_match_index = Some(potential_match_index);
} else if last_valid_vowel_match_index.is_some() {
text_to_krama_item_index = last_valid_vowel_match_index;
break;
}
}
}
if let Some(next_list) = &potential_match.1.next
&& !next_list.is_empty()
{
let nth_next = ctx.cursor.peek_at(end_index);
let nth_next_character: Option<char> = nth_next;
if is_from_tamil_ext_ && matches!(from_script_data, ScriptData::Brahmic { .. }) {
let n_1_th_next = if nth_next.is_some() {
ctx.cursor.peek_at(end_index + 1)
} else {
None
};
let n_1_th_next_character: Option<char> = n_1_th_next;
let n_2_th_next = if nth_next.is_some() && n_1_th_next.is_some() {
ctx.cursor.peek_at(end_index + 1 + 1)
} else {
None
};
let n_2_th_next_character: Option<char> = n_2_th_next;
if ignore_ta_ext_sup_num_text_index == -1
&& is_ta_ext_superscript_tail(n_1_th_next_character)
&& n_1_th_next_character.is_some_and(|c| next_list.iter().any(|x| char_eq_str(c, x)))
{
let mut sup_buf = [0u8; 4];
let sup = n_1_th_next_character
.map(|c| c.encode_utf8(&mut sup_buf) as &str)
.unwrap_or_default();
let search_str = {
let mut s = String::with_capacity(char_to_search.len() + sup.len());
s.push_str(&char_to_search);
s.push_str(sup);
s
};
let char_index = from_script_data.text_to_krama_map_index(&search_str, false);
let mut nth_buf = [0u8; 4];
let nth_char_text_index = nth_next_character
.and_then(|c| from_script_data.krama_index_of_text(c.encode_utf8(&mut nth_buf)));
if let (Some(char_index), Some(nth_char_text_index)) =
(char_index, nth_char_text_index)
{
text_to_krama_item_index = Some(char_index);
let nth_char_type = from_script_data
.krama_text_arr
.get(nth_char_text_index)
.and_then(|(_, li)| *li)
.and_then(|li| from_script_data.list.get(li as usize));
if let ScriptData::Brahmic { halant, .. } = from_script_data
&& (nth_next_character.is_some_and(|c| char_eq_str(c, halant))
|| nth_char_type.is_some_and(|k| k.is_matra()))
{
ignore_ta_ext_sup_num_text_index =
(end_index + if nth_next_character.is_some() { 1 } else { 0 }) as isize;
break;
}
}
}
else if ignore_ta_ext_sup_num_text_index == -1
&& is_ta_ext_superscript_tail(n_2_th_next_character)
&& n_2_th_next_character.is_some_and(|c| next_list.iter().any(|x| char_eq_str(c, x)))
{
let mut sup_buf = [0u8; 4];
let sup = n_2_th_next_character
.map(|c| c.encode_utf8(&mut sup_buf) as &str)
.unwrap_or_default();
let search_str = {
let mut s = String::with_capacity(char_to_search.len() + sup.len());
s.push_str(&char_to_search);
s.push_str(sup);
s
};
let char_index = from_script_data.text_to_krama_map_index(&search_str, false);
let mut nth_buf = [0u8; 4];
let nth_char_text_index = nth_next_character
.and_then(|c| from_script_data.krama_index_of_text(c.encode_utf8(&mut nth_buf)));
let mut n1_buf = [0u8; 4];
let n_1_th_char_text_index = n_1_th_next_character
.and_then(|c| from_script_data.krama_index_of_text(c.encode_utf8(&mut n1_buf)));
if let (Some(char_index), Some(nth_char_text_index), Some(n_1_th_char_text_index)) =
(char_index, nth_char_text_index, n_1_th_char_text_index)
{
text_to_krama_item_index = Some(char_index);
let nth_char_type = from_script_data
.krama_text_arr
.get(nth_char_text_index)
.and_then(|(_, li)| *li)
.and_then(|li| from_script_data.list.get(li as usize));
let n_1_th_char_type = from_script_data
.krama_text_arr
.get(n_1_th_char_text_index)
.and_then(|(_, li)| *li)
.and_then(|li| from_script_data.list.get(li as usize));
if nth_char_type.is_some_and(|k| k.is_matra())
&& n_1_th_char_type.is_some_and(|k| k.is_matra())
{
ignore_ta_ext_sup_num_text_index = (end_index
+ if nth_next_character.is_some() { 1 } else { 0 }
+ if n_1_th_next_character.is_some() {
1
} else {
0
}) as isize;
break;
}
}
}
if ignore_ta_ext_sup_num_text_index == -1
&& nth_next_character.is_some()
&& is_vedic_svara_tail(n_1_th_next_character)
&& is_ta_ext_superscript_tail(n_2_th_next_character)
&& n_2_th_next_character.is_some_and(|c| next_list.iter().any(|x| char_eq_str(c, x)))
{
let mut nth_buf = [0u8; 4];
let nth_char_text_index = nth_next_character
.and_then(|c| from_script_data.krama_index_of_text(c.encode_utf8(&mut nth_buf)));
if let Some(nth_char_text_index) = nth_char_text_index {
let nth_char_type = from_script_data
.krama_text_arr
.get(nth_char_text_index)
.and_then(|(_, li)| *li)
.and_then(|li| from_script_data.list.get(li as usize));
if nth_char_type.is_some_and(|k| k.is_matra()) {
let mut sup_buf2 = [0u8; 4];
let sup = n_2_th_next_character
.map(|c| c.encode_utf8(&mut sup_buf2) as &str)
.unwrap_or_default();
let search_str2 = {
let mut s = String::with_capacity(char_to_search.len() + sup.len());
s.push_str(&char_to_search);
s.push_str(sup);
s
};
let char_index = from_script_data.text_to_krama_map_index(&search_str2, false);
if let Some(char_index) = char_index {
text_to_krama_item_index = Some(char_index);
ignore_ta_ext_sup_num_text_index = (end_index
+ if nth_next_character.is_some() { 1 } else { 0 }
+ if n_1_th_next_character.is_some() {
1
} else {
0
}) as isize;
break;
}
}
}
}
}
if let Some(nth_ch) = nth_next_character
&& next_list.iter().any(|x| char_eq_str(nth_ch, x))
{
scan_units += 1; continue;
}
}
text_to_krama_item_index = Some(potential_match_index);
break;
}
}
let text_to_krama_item = text_to_krama_item_index.map(|i| &from_text_to_krama_map[i]);
if let Some(text_to_krama_item) = text_to_krama_item {
let (matched_text, map) = text_to_krama_item;
let is_type_vyanjana = map
.krama
.as_ref()
.and_then(|k| k.first())
.and_then(|ki| ctx.from_script_data.krama_text_arr.get(*ki as usize))
.and_then(|(_, li)| li.as_ref())
.and_then(|li| from_script_data.list.get(*li as usize))
.is_some_and(|l| l.is_vyanjana());
let matched_char_count = matched_text.chars().count();
let index_delete_length = if ignore_ta_ext_sup_num_text_index != -1
&& matched_char_count > 1
&& is_type_vyanjana
&& is_ta_ext_superscript_tail(matched_text.chars().last())
{
1
} else {
0
};
let matched_len_units = matched_char_count - index_delete_length;
ctx.cursor.advance(matched_len_units);
if (opts.typing_mode || trans_opt_normal_to_all_use_typing_chars)
&& let Some(custom_back_ref) = map.custom_back_ref
&& custom_back_ref >= 0
&& let Some(custom_item) = to_script_data
.custom_script_chars_arr
.get(custom_back_ref as usize)
{
ctx.result.emit(custom_item.0.as_str());
let list_item = custom_item
.1
.and_then(|li| to_script_data.list.get(li as usize))
.map(Cow::Borrowed);
ctx.prev_context_cleanup(
Some((Some(Cow::Borrowed(matched_text.as_str())), list_item)),
map.next.as_deref(),
None,
);
continue;
}
if let Some(krama) = &map.krama {
if krama.iter().any(|&k| k != -1) {
let mut pieces_buf = [""; 8];
let mut pieces_len = 0usize;
for &k in krama.iter() {
if k < 0 {
continue;
}
if pieces_len < pieces_buf.len() {
pieces_buf[pieces_len] = ctx.to_script_data.krama_text_or_empty(k);
pieces_len += 1;
}
}
let pieces = &pieces_buf[..pieces_len];
let mut result_concat_status = false;
if ctx.prev_context_in_use {
if matches!(from_script_data, ScriptData::Brahmic { .. })
&& matches!(to_script_data, ScriptData::Other { .. })
{
let mut item = map.fallback_list_ref.and_then(|i| {
if !(trans_opt_normal_to_all_use_typing_chars || opts.typing_mode) {
from_script_data.list.get(i as usize).map(Cow::Borrowed)
} else {
None
}
});
if item.is_none() && map.krama.as_ref().is_none_or(|krama| krama.is_empty()) {
item = None;
} else if item.is_none()
&& let Some(krama) = &map.krama
{
let list_refs: Vec<Option<&List>> = krama
.iter()
.map(|x| {
from_script_data
.krama_text_arr
.get(*x as usize)
.and_then(|k| k.1)
.and_then(|list_ref| from_script_data.list.get(list_ref as usize))
})
.collect();
if is_from_tamil_ext_
&& list_refs
.iter()
.any(|k| k.as_ref().is_some_and(|k| k.is_matra()))
&& list_refs
.iter()
.any(|k| k.as_ref().is_some_and(|k| k.is_vyanjana()))
{
if let Some(first) = list_refs.first() {
item = Some(Cow::Owned(List::Anya {
krama_ref: first
.map(|x| x.get_krama_ref().clone())
.unwrap_or(Vec::new()),
}));
}
} else if is_from_tamil_ext_
&& list_refs.len() > 1
&& list_refs.iter().any(|k| k.is_none())
{
if let Some(last) = list_refs.last() {
match last {
None => {
item = None;
}
Some(v) => {
item = Some(Cow::Borrowed(v));
}
}
}
} else {
if let Some(first) = list_refs.first() {
match first {
None => {
item = None;
}
Some(v) => {
item = Some(Cow::Borrowed(v));
}
}
}
}
}
result_concat_status = ctx.prev_context_cleanup(
Some((Some(Cow::Borrowed(matched_text.as_str())), item)),
None,
None,
);
} else if matches!(to_script_data, ScriptData::Brahmic { .. })
&& matches!(from_script_data, ScriptData::Other { .. })
{
let item: Option<Cow<'_, List>>;
if let Some(f) = map.fallback_list_ref {
item = to_script_data.list.get(f as usize).map(Cow::Borrowed);
} else {
item = if krama.is_empty() {
None
} else {
krama
.first()
.and_then(|k| to_script_data.krama_text_arr.get(*k as usize))
.and_then(|k| k.1.and_then(|i| to_script_data.list.get(i as usize)))
.map(Cow::Borrowed)
};
}
let next_list = if opts.typing_mode && *from_script == ScriptListEnum::Normal {
map.next.as_deref()
} else {
None
};
result_concat_status = ctx.prev_context_cleanup(
Some((Some(Cow::Borrowed(matched_text.as_str())), item)),
next_list,
None,
);
} else if opts.typing_mode
&& *from_script == ScriptListEnum::Normal
&& matches!(to_script_data, ScriptData::Other { .. })
{
result_concat_status = ctx.prev_context_cleanup(
Some((Some(Cow::Borrowed(matched_text.as_str())), None)),
map.next.as_deref(),
None,
);
}
}
if !result_concat_status {
if let ScriptData::Brahmic {
halant: to_halant, ..
} = to_script_data
{
if is_to_tamil_ext_ && is_ta_ext_superscript_tail(ctx.result.last_char()) {
if pieces.len() == 1 && pieces[0] == to_halant
|| map.krama.as_ref().is_some_and(|krama| {
krama.last().is_some_and(|last_i| {
to_script_data
.list
.get(*last_i as usize)
.is_some_and(|k| k.is_matra())
})
})
{
ctx.result.emit_pieces_with_reorder(pieces, to_halant, true);
} else if pieces
.last()
.and_then(|k| k.chars().last())
.is_some_and(|s| is_vedic_svara_tail(Some(s)))
{
let last = ctx.result.pop_last_char().unwrap_or_default();
ctx.result.emit_pieces(pieces);
ctx.result.emit_char(last);
} else {
ctx.result.emit_pieces(pieces);
}
} else {
ctx.result.emit_pieces(pieces);
}
} else {
ctx.result.emit_pieces(pieces);
}
}
ctx.apply_custom_trans_rules(ctx.cursor.pos() as isize, -(matched_len_units as isize));
continue;
} else
if krama.contains(&-1) {
ctx.result.emit(matched_text.as_str());
if opts.typing_mode {
ctx.prev_context_cleanup(
Some((Some(Cow::Borrowed(matched_text.as_str())), None)),
map.next.as_deref(),
None,
);
}
continue;
}
}
} else {
ctx.cursor.advance(1);
text_index = ctx.cursor.pos();
}
let char_to_search: Cow<'_, str> = text_to_krama_item
.map(|k| Cow::Borrowed(k.0.as_str()))
.unwrap_or_else(|| {
let mut buf = [0u8; 4];
Cow::Owned(ch.encode_utf8(&mut buf).to_owned())
});
let idx = from_script_data.krama_index_of_text(char_to_search.as_ref());
let Some(index) = idx else {
if ctx.prev_context_in_use {
ctx.prev_context_cleanup(Some((Some(char_to_search.clone()), None)), None, None);
ctx.prev_context.clear();
}
ctx.result.emit(char_to_search.as_ref());
continue;
};
let mut result_concat_status = false;
if ctx.prev_context_in_use {
if matches!(from_script_data, ScriptData::Brahmic { .. }) {
let list_idx = from_script_data
.krama_text_arr
.get(index)
.and_then(|(_, li)| *li);
let item =
list_idx.and_then(|li| from_script_data.list.get(li as usize).map(Cow::Borrowed));
result_concat_status =
ctx.prev_context_cleanup(Some((Some(char_to_search), item)), None, None);
} else if matches!(to_script_data, ScriptData::Brahmic { .. }) {
let list_idx = to_script_data
.krama_text_arr
.get(index)
.and_then(|(_, li)| *li);
let item = list_idx.and_then(|li| to_script_data.list.get(li as usize).map(Cow::Borrowed));
result_concat_status =
ctx.prev_context_cleanup(Some((Some(char_to_search), item)), None, None);
}
}
if !result_concat_status {
let pieces = [to_script_data.krama_text_or_empty(index as i16)];
if let ScriptData::Brahmic {
halant: to_halant, ..
} = to_script_data
{
if is_to_tamil_ext_ && is_ta_ext_superscript_tail(ctx.result.last_char()) {
if pieces[0] == to_halant
|| to_script_data
.krama_text_arr
.get(index)
.is_some_and(|krama| {
krama.1.is_some_and(|i| {
to_script_data
.list
.get(i as usize)
.is_some_and(|k| k.is_matra())
})
})
{
ctx
.result
.emit_pieces_with_reorder(&pieces, to_halant, true);
} else if pieces
.last()
.and_then(|k| k.chars().last())
.is_some_and(|s| is_vedic_svara_tail(Some(s)))
{
let last = ctx.result.pop_last_char().unwrap_or_default();
ctx.result.emit_pieces(&pieces);
ctx.result.emit_char(last);
} else {
ctx.result.emit_pieces(&pieces);
}
} else {
ctx.result.emit_pieces(&pieces);
}
} else {
ctx.result.emit_pieces(&pieces);
}
}
ctx.apply_custom_trans_rules(ctx.cursor.pos() as isize, -1);
}
if ctx.prev_context_in_use {
let _ = ctx.prev_context_cleanup(None, None, Some(true));
}
let output = ctx.result.to_string(); let output = apply_custom_replace_rules(
output.as_str(),
to_script_data,
custom_rules,
CheckInEnum::Output,
);
TransliterationOutput {
output: output.into_owned(),
context_length: ctx.prev_context.length(),
}
}
pub fn transliterate_text(
text: impl AsRef<str>,
from_script: ScriptListEnum,
to_script: ScriptListEnum,
transliteration_input_options: Option<&HashMap<String, bool>>,
options: Option<TransliterationFnOptions>,
) -> TransliterationOutput {
let text = text.as_ref();
let from_data = ScriptData::get_script_data(&from_script);
let to_data = ScriptData::get_script_data(&to_script);
let resolved = resolve_transliteration_rules(from_data, to_data, transliteration_input_options);
transliterate_text_core(
text,
&from_script,
&to_script,
from_data,
to_data,
&resolved.trans_options,
&resolved.custom_rules,
options,
)
}