opltypes 0.2.0

Datatypes for the OpenPowerlifting database format.
Documentation
//! Implements writing system detection.

/// Writing systems for characters, for categorization.
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum WritingSystem {
    Cyrillic,
    Greek,
    Japanese,
    Korean,
    Latin,
}

impl Default for WritingSystem {
    fn default() -> WritingSystem {
        WritingSystem::Latin
    }
}

/// Get the WritingSystem for the current character.
///
/// Returns `Latin` if unknown.
pub fn writing_system(c: char) -> WritingSystem {
    match c as u32 {
        // ASCII. Checking the common case first improves performance.
        0x0..=0x7F => WritingSystem::Latin,
        // Greek.
        0x370..=0x3FF => WritingSystem::Greek,
        // Cyrillic.
        0x400..=0x4FF => WritingSystem::Cyrillic,

        // CJK Radicals Supplement.
        0x2E80..=0x2EFF => WritingSystem::Japanese,
        // Some valid punctuation symbols.
        0x3005..=0x3006 => WritingSystem::Japanese,
        // Hiragana.
        0x3040..=0x309F => WritingSystem::Japanese,
        // Katakana.
        0x30A0..=0x30FF => WritingSystem::Japanese,
        // CJK Unified Ideographs Extension A.
        0x3400..=0x4DBF => WritingSystem::Japanese,
        // CJK Unified Ideographs.
        0x4E00..=0x9FFF => WritingSystem::Japanese,
        // CJK Compatibility Ideographs.
        0xF900..=0xFAFF => WritingSystem::Japanese,
        // CJK Compatibility Forms.
        0xFE30..=0xFE4F => WritingSystem::Japanese,
        // CJK Unified Ideographs Extension B.
        0x20000..=0x2A6DF => WritingSystem::Japanese,
        // CJK Unified Ideographs Extensions C, D, and E.
        0x2A700..=0x2CEAF => WritingSystem::Japanese,
        // CJK Compatibility Ideographs Supplement.
        0x2F800..=0x2FA1F => WritingSystem::Japanese,

        // Hangul Syllables.
        0xAC00..=0xD7AF => WritingSystem::Korean,
        // Hangul Jamo.
        0x1100..=0x11FF => WritingSystem::Korean,
        // Hangul Compatibility Jamo.
        0x3130..=0x318F => WritingSystem::Korean,
        // Hangul Jamo Extended-A.
        0xA960..=0xA97F => WritingSystem::Korean,
        // Hangul Jamo Extended B.
        0xD7B0..=0xD7FF => WritingSystem::Korean,

        // Character is either Latin or not a letter.
        _ => WritingSystem::Latin,
    }
}

/// Returns the likely writing system of a string.
///
/// The first non-Latin character encountered is considered representative.
pub fn infer_writing_system(s: &str) -> WritingSystem {
    s.chars()
        .find_map(|c| match writing_system(c) {
            WritingSystem::Latin => None,
            other => Some(other),
        })
        .unwrap_or(WritingSystem::Latin)
}