Documentation
use std::cmp::Ordering;

include!(concat!(env!("OUT_DIR"), "/field_consts.rs"));
include!(concat!(env!("OUT_DIR"), "/binary_data.rs"));
include!(concat!(env!("OUT_DIR"), "/metadata_data.rs"));
include!(concat!(env!("OUT_DIR"), "/category_data.rs"));

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Idx(pub u32);

fn get_u32(data: &[u8], i: usize) -> u32 {
    u32::from_le_bytes(data[i * 4..][..4].try_into().unwrap())
}

fn get_u16(data: &[u8], i: usize) -> u16 {
    u16::from_le_bytes(data[i * 2..][..2].try_into().unwrap())
}

pub fn num_entries() -> usize {
    CODEPOINT_DATA.len() / 4
}

pub fn codepoint(idx: Idx) -> u32 {
    get_u32(CODEPOINT_DATA, idx.0 as usize)
}

pub fn entry_str(idx: Idx, field: usize) -> &'static str {
    let off = get_u32(OFFSET_DATA, idx.0 as usize * NUM_FIELDS + field) as usize;
    let len = get_u16(LENGTH_DATA, idx.0 as usize * NUM_FIELDS + field) as usize;
    std::str::from_utf8(&STRING_DATA[off..off + len]).unwrap()
}

pub fn entry_name(idx: Idx) -> &'static str {
    let off = get_u32(NAME_OFFSET_DATA, idx.0 as usize);
    let len = get_u16(NAME_LENGTH_DATA, idx.0 as usize);
    std::str::from_utf8(&STRING_DATA[off as usize..][..len as usize]).unwrap()
}
pub fn entry_source(idx: Idx) -> &'static str {
    entry_str(idx, FIELD_SOURCE)
}
pub fn entry_category(idx: Idx) -> &'static str {
    entry_str(idx, FIELD_CATEGORY)
}
pub fn entry_block(idx: Idx) -> &'static str {
    entry_str(idx, FIELD_BLOCK)
}
pub fn entry_icon_set(idx: Idx) -> &'static str {
    entry_str(idx, FIELD_ICON_SET)
}

pub fn category_of(cp: u32) -> Option<&'static str> {
    CATEGORY_DATA
        .binary_search_by(|&(start, end, _)| {
            if cp < start {
                Ordering::Greater
            } else if cp > end {
                Ordering::Less
            } else {
                Ordering::Equal
            }
        })
        .ok()
        .map(|i| CATEGORY_DATA[i].2)
}

pub(crate) fn lower_bound(cp: u32) -> usize {
    let data = CODEPOINT_DATA;
    let n = data.len() / 4;
    let mut left = 0usize;
    let mut right = n;
    while left < right {
        let mid = left + (right - left) / 2;
        if get_u32(data, mid) < cp {
            left = mid + 1;
        } else {
            right = mid;
        }
    }
    left
}

pub fn lookup(cp: u32) -> Option<Idx> {
    let i = lower_bound(cp);
    let data = CODEPOINT_DATA;
    if i < data.len() / 4 && get_u32(data, i) == cp {
        Some(Idx(i as u32))
    } else {
        None
    }
}

include!(concat!(env!("OUT_DIR"), "/name_lookup.rs"));

pub fn lookup_name(cp: u32) -> Option<&'static str> {
    NAME_LOOKUP
        .binary_search_by_key(&cp, |&(c, _)| c)
        .ok()
        .map(|i| NAME_LOOKUP[i].1)
}

pub fn lookup_str(s: &str) -> Option<Idx> {
    let cp = parse_cp_str(s)?;
    lookup(cp)
}

pub fn list_sources() -> &'static [&'static str] {
    SOURCES
}

pub fn list_icon_sets() -> &'static [&'static str] {
    ICON_SETS
}

pub fn icon_set_description(name: &str) -> &'static str {
    match name {
        "cod" => "Codicons",
        "custom" => "Seti and original",
        "dev" => "Devicons",
        "extra" => "Extra glyphs",
        "fa" => "Font Awesome",
        "fae" => "Font Awesome Extension",
        "iec" => "Power Symbols IEC",
        "indent" | "indentation" => "Extra glyphs",
        "linux" => "Font Logos",
        "md" => "Material Design",
        "oct" => "Octicons",
        "pl" => "Powerline Symbols",
        "ple" => "Powerline Extra",
        "pom" => "Pomicons",
        "seti" => "Seti and original",
        "weather" => "Weather Icons",
        _ => "",
    }
}

pub fn parse_cp_str(s: &str) -> Option<u32> {
    let s = s.trim();
    if let Some(rest) = s.strip_prefix("U+").or_else(|| s.strip_prefix("u+")) {
        return u32::from_str_radix(rest, 16).ok();
    }
    if let Some(rest) = s.strip_prefix("0x") {
        return u32::from_str_radix(rest, 16).ok();
    }
    let first = s.chars().next()?;
    if s.len() == first.len_utf8() || !first.is_ascii() {
        return Some(u32::from(first));
    }
    u32::from_str_radix(s, 16).ok()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn lookup_hit_lower_bound() {
        let idx = lookup(0x0041).expect("A should exist");
        assert_eq!(entry_name(idx), "LATIN CAPITAL LETTER A");
    }

    #[test]
    fn lookup_hit_emoji() {
        let idx = lookup(0x1F600).expect("grinning face should exist");
        assert_eq!(entry_name(idx), "GRINNING FACE");
    }

    #[test]
    fn lookup_miss_surrogate() {
        assert!(lookup(0xD800).is_none());
    }

    #[test]
    fn lookup_miss_above_range() {
        assert!(lookup(0xFFFFFF).is_none());
    }

    #[test]
    fn lookup_miss_unsassigned() {
        assert!(lookup(0x0378).is_none());
    }

    #[test]
    fn lookup_str_uplus_format() {
        let idx = lookup_str("U+0041").expect("U+0041 should resolve");
        assert_eq!(codepoint(idx), 0x41);
    }

    #[test]
    fn lookup_str_uplus_lowercase() {
        let idx = lookup_str("u+0041").expect("u+0041 should resolve");
        assert_eq!(codepoint(idx), 0x41);
    }

    #[test]
    fn lookup_str_0x_format() {
        let idx = lookup_str("0x0041").expect("0x0041 should resolve");
        assert_eq!(codepoint(idx), 0x41);
    }

    #[test]
    fn lookup_str_hex_only() {
        let idx = lookup_str("0041").expect("0041 should resolve");
        assert_eq!(codepoint(idx), 0x41);
    }

    #[test]
    fn lookup_str_single_ascii_char() {
        let idx = lookup_str("A").expect("A should resolve");
        assert_eq!(codepoint(idx), 0x41);
    }

    #[test]
    fn lookup_str_single_non_ascii_char() {
        let idx = lookup_str("\u{1F600}").expect("emoji should resolve");
        assert_eq!(codepoint(idx), 0x1F600);
    }

    #[test]
    fn lookup_str_trimmed() {
        let idx = lookup_str("  U+0041  ").expect("trimmed should resolve");
        assert_eq!(codepoint(idx), 0x41);
    }

    #[test]
    fn lookup_str_not_found() {
        assert!(lookup_str("ZZZZ_NOT_A_CODEPOINT").is_none());
    }

    #[test]
    fn parse_cp_str_bare_hex_multi_char_ascii() {
        assert_eq!(parse_cp_str("0041"), Some(0x0041));
    }

    #[test]
    fn parse_cp_str_too_long_multi_char_ascii() {
        assert_eq!(parse_cp_str("AB"), Some(0xAB));
    }

    #[test]
    fn parse_cp_str_empty() {
        assert_eq!(parse_cp_str(""), None);
    }

    #[test]
    fn entries_are_sorted() {
        let n = num_entries();
        for i in 1..n {
            let prev = codepoint(Idx(i as u32 - 1));
            let cur = codepoint(Idx(i as u32));
            assert!(
                prev <= cur,
                "entries not sorted at index {i}: {prev} > {cur}"
            );
        }
    }

    #[test]
    fn entries_lookup_roundtrip() {
        let idx = lookup(0x0041).expect("A should exist");
        assert_eq!(codepoint(idx), 0x0041);
        assert_eq!(entry_str(idx, FIELD_GLYPH), "A");
        assert_eq!(entry_name(idx), "LATIN CAPITAL LETTER A");
    }
}