ibm437 0.4.0 - Docs.rs

//! Unicode to IBM437 glyph mapping.
//!
//! The IBM437 character set contains 256 glyphs arranged in a spritesheet
//! of [`CHARS_PER_ROW`] columns (16) and 16 rows. This module provides
//! [`char_offset_impl`], which converts any Unicode `char` into its
//! glyph offset in that spritesheet.
//!
//! Most Latin-1 code points map to their own value (identity mapping).
//! Box-drawing characters, Greek letters, mathematical symbols and a few
//! other Unicode ranges are remapped to their IBM437 positions.
//! Unsupported characters fall back to `'?'`.

/// Number of character glyphs per row in the font spritesheet.
pub const CHARS_PER_ROW: usize = 16;

/// Bitmap: bit=1 means this Latin-1 code point maps to itself (Msb0).
/// 32 bytes total, covers 0x00..=0xFF.
#[rustfmt::skip]
const LATIN1_IDENTITY: [u8; 32] = [
    0x80, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, // 0x00..0x3F
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, // 0x40..0x7F
    0x00, 0x00, 0x00, 0x00, 0xF5, 0x00, 0xE7, 0x3D, // 0x80..0xBF
    0x0F, 0x40, 0x42, 0x09, 0xEF, 0xFF, 0x7B, 0x79, // 0xC0..0xFF
];

/// Maps a Unicode `char` to its glyph offset in the IBM437 font spritesheet.
///
/// The returned value is an index in `0..=255` that encodes a position
/// in the spritesheet: column = `offset % CHARS_PER_ROW`,
/// row = `offset / CHARS_PER_ROW`.
///
/// Returns the offset of `'?'` for unsupported characters.
///
/// ```
/// use ibm437::char_offset_impl;
///
/// assert_eq!(char_offset_impl('A'), 0x41);
/// assert_eq!(char_offset_impl('█'), 0xD7);
/// assert_eq!(char_offset_impl('🦀'), b'?' as usize); // unsupported
/// ```
#[inline(always)]
pub const fn char_offset_impl(c: char) -> usize {
    let cp = c as usize;

    if cp < 256 {
        // 3 Latin-1 characters that are remapped (not identity)
        if cp >= 0xAA && cp <= 0xAC {
            return 0x89 + cp - 0xAA;
        }
        // Check identity bitmap
        if LATIN1_IDENTITY[cp / 8] & (1 << (7 - cp % 8)) != 0 {
            return cp;
        }
        return b'?' as usize;
    }

    // Extended Unicode — ranges (offset = base + delta)
    match c {
        '═'..='╬' => 0x01 + cp - 0x2550,
        '←'..='↕' => 0x7F + cp - 0x2190,
        '▐'..='▓' => 0x85 + cp - 0x2590,
        '☺'..='☼' => 0x8C + cp - 0x263A,
        'δ'..='ε' => 0x8F + cp - 0x03B4,
        '∙'..='√' => 0x91 + cp - 0x2219,
        '∞'..='∟' => 0x93 + cp - 0x221E,
        '≤'..='≥' => 0x95 + cp - 0x2264,
        '⌠'..='⌡' => 0x97 + cp - 0x2320,
        '◘'..='◙' => 0x99 + cp - 0x25D8,
        '♥'..='♦' => 0x9B + cp - 0x2665,
        'σ'..='τ' => 0x9D + cp - 0x03C3,
        '♪'..='♫' => 0x1E + cp - 0x266A,

        // Extended Unicode — individuals
        'ƒ' => 0x9F,
        'Γ' => 0xA4,
        'Θ' => 0xA6,
        'Σ' => 0xA8,
        'Φ' => 0xA9,
        'Ω' => 0xAA,
        'α' => 0xAB,
        'π' => 0xAC,
        'φ' => 0xAD,
        '•' => 0xAE,
        '‼' => 0xAF,
        'ⁿ' => 0xB3,
        '₧' => 0xB4,
        '↨' => 0xB8,
        '∩' => 0xB9,
        '≈' => 0xBE,
        '≡' => 0xC0,
        '⌂' => 0xC1,
        '⌐' => 0xC2,
        '─' => 0xC3,
        '│' => 0xC8,
        '┌' => 0xCA,
        '┐' => 0xCB,
        '└' => 0xCC,
        '┘' => 0xCD,
        '├' => 0xCE,
        '┤' => 0xCF,
        '┬' => 0xD0,
        '┴' => 0xD2,
        '┼' => 0xD3,
        '▀' => 0xD4,
        '▄' => 0xD5,
        '█' => 0xD7,
        '▌' => 0xD8,
        '■' => 0xD9,
        '▬' => 0xDA,
        '▲' => 0xDB,
        '►' => 0xDD,
        '▼' => 0xDE,
        '◄' => 0xE3,
        '○' => 0xF0,
        '♀' => 0xF5,
        '♂' => 0xF8,
        '♠' => 0xFD,
        '♣' => 0xFE,

        _ => b'?' as usize,
    }
}

#[cfg(test)]
mod char_offset_test {
    use super::char_offset_impl;

    #[test]
    fn test_unique_and_exhaustive() {
        let mut chars = include_str!("Characters_src.txt")
            .lines()
            .map(|l| l.chars())
            .flatten()
            .collect::<Vec<char>>();

        // Replace 2 of the 3 spaces
        chars[0] = '\u{0000}';
        let last = chars.len() - 1;
        chars[last] = '\u{A0}';

        let mut hit = [false; 256];

        for c in chars.clone().into_iter() {
            let offs = char_offset_impl(c) as usize;
            hit[offs] = true;
        }

        for (i, is_hit) in hit.iter().enumerate() {
            assert!(is_hit, "Offset has no character: {}", i);
        }

        let mut hit = [false; 256];
        for c in chars.into_iter() {
            let offs = char_offset_impl(c) as usize;

            assert_eq!(
                false, hit[offs],
                "Duplicate offset for {} ({})",
                c, c as u32
            );
            hit[offs] = true;
        }
    }
}