string-width 0.1.0

Accurate Unicode string width calculation for terminal applications, handling emoji, East Asian characters, combining marks, and ANSI escape sequences
Documentation
/// Unicode constants for string width calculation
///
/// This module contains all the Unicode code points and ranges used
/// throughout the string width calculation library.
///
/// Variation selector constants
///
/// Variation selectors are Unicode characters that modify the presentation
/// of the preceding character, particularly for emoji vs text display.
pub mod variation_selectors {
    /// Text presentation selector (U+FE0E)
    pub const TEXT_PRESENTATION: u32 = 0xFE0E;
    /// Emoji presentation selector (U+FE0F)
    pub const EMOJI_PRESENTATION: u32 = 0xFE0F;
}

/// Keycap sequence constants
///
/// Constants for keycap emoji sequences (0️⃣-9️⃣, *️⃣, #️⃣).
pub mod keycap {
    /// Combining enclosing keycap (U+20E3)
    pub const COMBINING_ENCLOSING: u32 = 0x20E3;
    /// Digit zero (U+0030)
    pub const DIGIT_ZERO: u32 = 0x0030;
    /// Digit nine (U+0039)
    pub const DIGIT_NINE: u32 = 0x0039;
    /// Asterisk (U+002A)
    pub const ASTERISK: u32 = 0x002A;
    /// Number sign (U+0023)
    pub const NUMBER_SIGN: u32 = 0x0023;
}

/// Regional indicator constants
///
/// Regional indicators are used in pairs to represent country and region flags.
pub mod regional_indicators {
    /// Start of regional indicator range (U+1F1E6)
    pub const START: u32 = 0x1F1E6;
    /// End of regional indicator range (U+1F1FF)
    pub const END: u32 = 0x1F1FF;
}

/// Unicode ranges for characters with default emoji presentation
///
/// Characters in these ranges are displayed as emoji by default,
/// without requiring a variation selector.
pub mod emoji_ranges {
    use std::ops::RangeInclusive;

    /// Emoticons block (U+1F600-U+1F64F)
    pub const EMOTICONS: RangeInclusive<u32> = 0x1F600..=0x1F64F;
    /// Miscellaneous Symbols and Pictographs (U+1F300-U+1F5FF)
    pub const MISC_SYMBOLS_PICTOGRAPHS: RangeInclusive<u32> = 0x1F300..=0x1F5FF;
    /// Transport and Map Symbols (U+1F680-U+1F6FF)
    pub const TRANSPORT_MAP: RangeInclusive<u32> = 0x1F680..=0x1F6FF;
    /// Alchemical Symbols (U+1F700-U+1F77F)
    pub const ALCHEMICAL: RangeInclusive<u32> = 0x1F700..=0x1F77F;
    /// Geometric Shapes Extended (U+1F780-U+1F7FF)
    pub const GEOMETRIC_EXTENDED: RangeInclusive<u32> = 0x1F780..=0x1F7FF;
    /// Supplemental Arrows-C (U+1F800-U+1F8FF)
    pub const SUPPLEMENTAL_ARROWS_C: RangeInclusive<u32> = 0x1F800..=0x1F8FF;
    /// Supplemental Symbols and Pictographs (U+1F900-U+1F9FF)
    pub const SUPPLEMENTAL_SYMBOLS: RangeInclusive<u32> = 0x1F900..=0x1F9FF;
    /// Chess Symbols (U+1FA00-U+1FA6F)
    pub const CHESS: RangeInclusive<u32> = 0x1FA00..=0x1FA6F;
    /// Symbols and Pictographs Extended-A (U+1FA70-U+1FAFF)
    pub const SYMBOLS_EXTENDED_A: RangeInclusive<u32> = 0x1FA70..=0x1FAFF;
}

/// Unicode ranges for combining marks
///
/// Combining marks are characters that combine with base characters
/// to form composite glyphs, typically used for diacritics.
pub mod combining_marks {
    use std::ops::RangeInclusive;

    /// Combining Diacritical Marks (U+0300-U+036F)
    pub const DIACRITICAL: RangeInclusive<u32> = 0x0300..=0x036F;
    /// Combining Diacritical Marks Extended (U+1AB0-U+1AFF)
    pub const DIACRITICAL_EXTENDED: RangeInclusive<u32> = 0x1AB0..=0x1AFF;
    /// Combining Diacritical Marks Supplement (U+1DC0-U+1DFF)
    pub const DIACRITICAL_SUPPLEMENT: RangeInclusive<u32> = 0x1DC0..=0x1DFF;
    /// Combining Diacritical Marks for Symbols (U+20D0-U+20FF)
    pub const DIACRITICAL_SYMBOLS: RangeInclusive<u32> = 0x20D0..=0x20FF;
    /// Combining Half Marks (U+FE20-U+FE2F)
    pub const HALF_MARKS: RangeInclusive<u32> = 0xFE20..=0xFE2F;
}

/// Unicode ranges for prepend characters
///
/// Prepend characters are part of the following grapheme cluster
/// and are commonly found in Arabic and other scripts.
pub mod prepend_chars {
    use std::ops::RangeInclusive;

    /// Arabic prepend characters (U+0600-U+061F)
    pub const ARABIC_PREPEND: RangeInclusive<u32> = 0x0600..=0x061F;
    /// Arabic diacritics (U+064B-U+065F)
    pub const ARABIC_DIACRITICS: RangeInclusive<u32> = 0x064B..=0x065F;
    /// Arabic supplement prepend (U+06D6-U+06ED)
    pub const ARABIC_SUPPLEMENT: RangeInclusive<u32> = 0x06D6..=0x06ED;
}

/// Format and control character constants
///
/// Format characters are invisible characters that affect text layout
/// and formatting but don't contribute to display width.
pub mod format_chars {
    /// Soft hyphen (U+00AD)
    pub const SOFT_HYPHEN: u32 = 0x00AD;
    /// Zero width space (U+200B)
    pub const ZERO_WIDTH_SPACE: u32 = 0x200B;

    /// Right-to-left mark (U+200F)
    pub const RIGHT_TO_LEFT_MARK: u32 = 0x200F;
    /// Zero width no-break space (U+FEFF)
    pub const ZERO_WIDTH_NO_BREAK_SPACE: u32 = 0xFEFF;
}

/// Halfwidth and Fullwidth Forms block
///
/// This Unicode block contains characters with specific width properties
/// that may affect the overall width of grapheme clusters.
pub mod halfwidth_fullwidth {
    use std::ops::RangeInclusive;

    /// Halfwidth and Fullwidth Forms block (U+FF00-U+FFEF)
    pub const BLOCK: RangeInclusive<u32> = 0xFF00..=0xFFEF;
}

/// Unicode ranges for characters that become emoji when combined with VS16
///
/// These characters don't have default emoji presentation but can be
/// displayed as emoji when followed by the emoji variation selector (VS16).
pub mod emoji_with_vs16_ranges {
    use std::ops::RangeInclusive;

    /// Copyright and registered trademark symbols
    pub const COPYRIGHT_REGISTERED: RangeInclusive<u32> = 0x00A9..=0x00AE;
    /// Double exclamation and exclamation question marks
    pub const EXCLAMATION_MARKS: RangeInclusive<u32> = 0x203C..=0x2049;
    /// Directional arrows
    pub const ARROWS: RangeInclusive<u32> = 0x2194..=0x2199;
    /// Return arrows
    pub const RETURN_ARROWS: RangeInclusive<u32> = 0x21A9..=0x21AA;
    /// Watch and hourglass
    pub const WATCH_HOURGLASS: RangeInclusive<u32> = 0x231A..=0x231B;
    /// Media control symbols
    pub const MEDIA_CONTROLS: RangeInclusive<u32> = 0x23E9..=0x23F3;
    /// Small squares
    pub const SMALL_SQUARES: RangeInclusive<u32> = 0x25FD..=0x25FE;
    /// Weather symbols
    pub const WEATHER_BASIC: RangeInclusive<u32> = 0x2600..=0x2601;
    /// Umbrella and coffee
    pub const UMBRELLA_COFFEE: RangeInclusive<u32> = 0x2614..=0x2615;
    /// Radioactive and biohazard
    pub const HAZARD_SYMBOLS: RangeInclusive<u32> = 0x2622..=0x2623;
    /// Peace and yin yang
    pub const PEACE_YIN_YANG: RangeInclusive<u32> = 0x262E..=0x262F;
    /// Dharma wheel and smiling face
    pub const DHARMA_SMILE: RangeInclusive<u32> = 0x2638..=0x263A;
    /// Zodiac signs
    pub const ZODIAC: RangeInclusive<u32> = 0x2648..=0x2653;
    /// Card suits (partial)
    pub const CARD_SUITS_1: RangeInclusive<u32> = 0x2665..=0x2666;
    /// Tools and science symbols
    pub const TOOLS_SCIENCE: RangeInclusive<u32> = 0x2692..=0x2697;
    /// Atom and fleur-de-lis
    pub const ATOM_FLEUR: RangeInclusive<u32> = 0x269B..=0x269C;
    /// Warning and zap
    pub const WARNING_ZAP: RangeInclusive<u32> = 0x26A0..=0x26A1;
    /// Circles
    pub const CIRCLES: RangeInclusive<u32> = 0x26AA..=0x26AB;
    /// Coffin and funeral urn
    pub const FUNERAL: RangeInclusive<u32> = 0x26B0..=0x26B1;
    /// Sports balls
    pub const SPORTS_BALLS: RangeInclusive<u32> = 0x26BD..=0x26BE;
    /// Snowman and sun behind cloud
    pub const WEATHER_EXTENDED: RangeInclusive<u32> = 0x26C4..=0x26C5;
    /// Ophiuchus and pick
    pub const OPHIUCHUS_PICK: RangeInclusive<u32> = 0x26CE..=0x26CF;
    /// Chains and no entry
    pub const CHAINS_NO_ENTRY: RangeInclusive<u32> = 0x26D3..=0x26D4;
    /// Religious buildings
    pub const RELIGIOUS_BUILDINGS: RangeInclusive<u32> = 0x26E9..=0x26EA;
    /// Mountain and sailboat
    pub const MOUNTAIN_SAILBOAT: RangeInclusive<u32> = 0x26F0..=0x26F5;
    /// Skier and tent
    pub const SKIER_TENT: RangeInclusive<u32> = 0x26F7..=0x26FA;
    /// Airplane to pencil
    pub const AIRPLANE_PENCIL: RangeInclusive<u32> = 0x2708..=0x270D;
    /// Eight-spoked asterisk
    pub const ASTERISK_VARIANTS: RangeInclusive<u32> = 0x2733..=0x2734;
    /// Question marks
    pub const QUESTION_MARKS: RangeInclusive<u32> = 0x2753..=0x2755;
    /// Hearts
    pub const HEARTS: RangeInclusive<u32> = 0x2763..=0x2764;
    /// Plus and minus
    pub const PLUS_MINUS: RangeInclusive<u32> = 0x2795..=0x2797;
    /// Curved arrows
    pub const CURVED_ARROWS: RangeInclusive<u32> = 0x2934..=0x2935;
    /// Basic arrows
    pub const BASIC_ARROWS: RangeInclusive<u32> = 0x2B05..=0x2B07;
    /// Large squares
    pub const LARGE_SQUARES: RangeInclusive<u32> = 0x2B1B..=0x2B1C;

    /// Individual characters that become emoji with VS16
    ///
    /// A list of specific Unicode code points that can be displayed as emoji
    /// when followed by the emoji variation selector (U+FE0F).
    pub const INDIVIDUAL_CHARS: &[u32] = &[
        0x2122, // Trademark
        0x2139, // Information
        0x2328, // Keyboard
        0x23CF, // Eject
        0x260E, // Phone
        0x2611, // Ballot box with check
        0x2618, // Shamrock
        0x261D, // Pointing finger
        0x2620, // Skull and crossbones
        0x2626, // Orthodox cross
        0x262A, // Star and crescent
        0x2640, // Female sign
        0x2642, // Male sign
        0x2660, // Spade suit
        0x2663, // Club suit
        0x2668, // Hot springs
        0x267B, // Recycling symbol
        0x267F, // Wheelchair symbol
        0x2699, // Gear
        0x26C8, // Thunder cloud and rain
        0x26D1, // Helmet with white cross
        0x26FD, // Fuel pump
        0x2702, // Scissors
        0x2705, // White heavy check mark
        0x270F, // Pencil
        0x2712, // Black nib
        0x2714, // Heavy check mark
        0x2716, // Heavy multiplication x
        0x271D, // Latin cross
        0x2721, // Star of David
        0x2728, // Sparkles
        0x2744, // Snowflake
        0x2747, // Sparkle
        0x274C, // Cross mark
        0x274E, // Negative squared cross mark
        0x2757, // Heavy exclamation mark
        0x27A1, // Black rightwards arrow
        0x27B0, // Curly loop
        0x27BF, // Double curly loop
        0x2B50, // White medium star
        0x2B55, // Heavy large circle
        0x3030, // Wavy dash
        0x303D, // Part alternation mark
        0x3297, // Circled ideograph congratulation
        0x3299, // Circled ideograph secret
    ];
}