ukraine 1.1.0

Glory to Ukraine. Library for transliterating Ukrainian Cyrillic text into Latin script representation
Documentation
/// Transliteration according to DSTU 9112:2021 system B.
/// https://en.wikipedia.org/wiki/DSTU_9112:2021
//

/// # Examples
/// ```rust
/// use ukraine::latin::transliterate_dstu9112b;
/// # fn main() {
/// let original = "Слава Україні. Героям слава!";
/// let transliterated = transliterate_dstu9112b(original);
/// assert_eq!(transliterated, "Slava Ukrajini. Gherojam slava!");
/// }
/// ```
pub fn transliterate_dstu9112b(text: &str) -> String {
    let chars: Vec<char> = text.chars().collect();
    let mut output = String::with_capacity(text.len() * 2);
    let mut index = 0usize;

    while index < chars.len() {
        let ch = chars[index];
        let next = chars.get(index + 1).copied();

        if let Some(consumed) = try_digraph(ch, next, &mut output) {
            index += consumed;
            continue;
        }

        let prev = if index > 0 {
            Some(chars[index - 1])
        } else {
            None
        };
        let prev2 = if index > 1 {
            Some(chars[index - 2])
        } else {
            None
        };

        match ch {
            'Щ' | 'щ' => {
                push_seq(&mut output, "shch", ch.is_uppercase());
            }
            'Ь' | 'ь' => {
                push_seq(&mut output, "j", ch.is_uppercase());
            }
            '' | '\'' => {
                output.push('\'');
            }
            'И' | 'и' => {
                let mapped = map_letter_i(ch, prev, prev2, next);
                output.push_str(mapped);
            }
            _ => {
                if let Some(mapped) = map_basic(ch) {
                    push_seq(&mut output, mapped, ch.is_uppercase());
                } else {
                    output.push(ch);
                }
            }
        }

        index += 1;
    }

    output
}

fn try_digraph(first: char, second: Option<char>, output: &mut String) -> Option<usize> {
    let second = second?;
    let uppercase = first.is_uppercase();

    match (first, second) {
        ('Д', 'ж') | ('Д', 'Ж') | ('д', 'ж') | ('д', 'Ж') => {
            push_seq(output, "dzh", uppercase);
            Some(2)
        }
        ('Д', 'з') | ('Д', 'З') | ('д', 'з') | ('д', 'З') => {
            push_seq(output, "dz", uppercase);
            Some(2)
        }
        _ => None,
    }
}

fn map_letter_i(
    ch: char,
    prev: Option<char>,
    prev2: Option<char>,
    next: Option<char>,
) -> &'static str {
    let uppercase = ch.is_uppercase();
    let soften_cluster = matches!(prev, Some('к') | Some('К'))
        && matches!(prev2, Some('ь') | Some('Ь'))
        && next.map_or(true, |c| !c.is_alphabetic());

    if soften_cluster {
        if uppercase {
            "U"
        } else {
            "u"
        }
    } else if uppercase {
        "Y"
    } else {
        "y"
    }
}

fn map_basic(ch: char) -> Option<&'static str> {
    match ch {
        'А' | 'а' => Some("a"),
        'Б' | 'б' => Some("b"),
        'В' | 'в' => Some("v"),
        'Г' | 'г' => Some("gh"),
        'Ґ' | 'ґ' => Some("g"),
        'Д' | 'д' => Some("d"),
        'Е' | 'е' => Some("e"),
        'Є' | 'є' => Some("je"),
        'Ж' | 'ж' => Some("zh"),
        'З' | 'з' => Some("z"),
        'И' | 'и' => Some("y"), // handled earlier but kept for completeness
        'І' | 'і' => Some("i"),
        'Ї' | 'ї' => Some("ji"),
        'Й' | 'й' => Some("j"),
        'К' | 'к' => Some("k"),
        'Л' | 'л' => Some("l"),
        'М' | 'м' => Some("m"),
        'Н' | 'н' => Some("n"),
        'О' | 'о' => Some("o"),
        'П' | 'п' => Some("p"),
        'Р' | 'р' => Some("r"),
        'С' | 'с' => Some("s"),
        'Т' | 'т' => Some("t"),
        'У' | 'у' => Some("u"),
        'Ф' | 'ф' => Some("f"),
        'Х' | 'х' => Some("kh"),
        'Ц' | 'ц' => Some("c"),
        'Ч' | 'ч' => Some("ch"),
        'Ш' | 'ш' => Some("sh"),
        'Ю' | 'ю' => Some("ju"),
        'Я' | 'я' => Some("ja"),
        _ => None,
    }
}

fn push_seq(output: &mut String, seq: &str, capitalize: bool) {
    if !capitalize {
        output.push_str(seq);
        return;
    }

    let mut chars = seq.chars();
    if let Some(first) = chars.next() {
        for upper in first.to_uppercase() {
            output.push(upper);
        }
    }
    for rest in chars {
        for lower in rest.to_lowercase() {
            output.push(lower);
        }
    }
}