east_asian_width/
lib.rs

1#![deny(missing_docs)]
2//! # East Asian Width
3//!
4//! A Rust library for determining the display width of Unicode characters in East Asian contexts.
5//! This is particularly useful for terminal applications, text editors, and other software that
6//! needs to properly align text containing CJK (Chinese, Japanese, Korean) characters.
7//!
8//! ## Features
9//!
10//! - **Fast lookups**: Pre-generated lookup tables for O(1) character width determination
11//! - **Unicode compliant**: Based on the official Unicode East Asian Width property
12//! - **Flexible API**: Support for both simple and configurable width calculations
13//! - **Zero dependencies**: No runtime dependencies for the core library
14//!
15//! ## Usage
16//!
17//! ### Basic Usage
18//!
19//! ```rust
20//! use east_asian_width::{east_asian_width, east_asian_width_type, DisplayWidth};
21//!
22//! // Get the display width of a character
23//! assert_eq!(east_asian_width('A' as u32), DisplayWidth::Narrow);  // Narrow/Neutral = 1
24//! assert_eq!(east_asian_width('字' as u32), DisplayWidth::Wide); // Wide = 2
25//!
26//! // Convert to numeric values when needed
27//! assert_eq!(east_asian_width('A' as u32).as_u8(), 1);
28//! assert_eq!(east_asian_width('字' as u32).as_u8(), 2);
29//!
30//! // Get the East Asian Width category
31//! assert_eq!(east_asian_width_type('A' as u32), "narrow");
32//! assert_eq!(east_asian_width_type('字' as u32), "wide");
33//! ```
34//!
35//! ### Handling Ambiguous Characters
36//!
37//! Some characters are classified as "ambiguous" and can be displayed as either narrow or wide
38//! depending on the context. You can control this behavior:
39//!
40//! ```rust
41//! use east_asian_width::{east_asian_width, DisplayWidth};
42//!
43//! let ambiguous_char = 0x00A1; // ¡ (inverted exclamation mark)
44//!
45//! // Default: ambiguous characters are treated as narrow (width 1)
46//! assert_eq!(east_asian_width(ambiguous_char), DisplayWidth::Narrow);
47//!
48//! // Treat ambiguous characters as wide (width 2)
49//! assert_eq!(east_asian_width((ambiguous_char, true)), DisplayWidth::Wide);
50//!
51//! // Convert to numeric values
52//! assert_eq!(east_asian_width(ambiguous_char).as_u8(), 1);
53//! assert_eq!(east_asian_width((ambiguous_char, true)).as_u8(), 2);
54//! ```
55//!
56//! ## Character Categories
57//!
58//! The library recognizes six East Asian Width categories:
59//!
60//! - **Narrow (Na)**: Characters that are always narrow (width 1)
61//! - **Neutral (N)**: Characters that don't have East Asian context (width 1)
62//! - **Halfwidth (H)**: Characters that are narrow in East Asian context (width 1)
63//! - **Ambiguous (A)**: Characters that can be narrow or wide depending on context
64//! - **Wide (W)**: Characters that are always wide (width 2)
65//! - **Fullwidth (F)**: Characters that are wide in East Asian context (width 2)
66
67/// Error types and validation functions for East Asian Width operations.
68pub mod error;
69
70/// Contains the generated Unicode lookup tables and functions.
71///
72/// This module provides low-level access to the Unicode East Asian Width data
73/// through pre-generated lookup functions. These functions are used internally
74/// by the high-level API but can also be used directly for performance-critical code.
75pub mod lookup;
76use lookup::get_category;
77
78// Re-export error types and validation
79pub use error::{EastAsianWidthError, validate_code_point};
80
81// Re-export lookup functions for compatibility
82pub use lookup::{is_ambiguous, is_full_width, is_wide};
83
84#[cfg(test)]
85mod tests;
86
87/// Represents the display width of a character in East Asian contexts
88#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
89pub enum DisplayWidth {
90    /// Narrow character (width 1)
91    Narrow = 1,
92    /// Wide character (width 2)
93    Wide = 2,
94}
95
96impl DisplayWidth {
97    /// Returns the numeric width value
98    pub const fn as_u8(self) -> u8 {
99        self as u8
100    }
101
102    /// Returns the numeric width value as usize (common for string operations)
103    pub const fn as_usize(self) -> usize {
104        self as usize
105    }
106}
107
108impl From<DisplayWidth> for u8 {
109    fn from(width: DisplayWidth) -> Self {
110        width.as_u8()
111    }
112}
113
114impl From<DisplayWidth> for usize {
115    fn from(width: DisplayWidth) -> Self {
116        width.as_usize()
117    }
118}
119
120/// Trait to allow different parameter types for the [`east_asian_width`] function.
121///
122/// This trait enables the function to accept either a simple `u32` code point or a tuple
123/// `(u32, bool)` where the boolean indicates whether ambiguous characters should be treated as wide.
124///
125/// # Implementations
126///
127/// - `u32`: Treats ambiguous characters as narrow (width 1)
128/// - `(u32, bool)`: Uses the boolean to determine ambiguous character handling
129pub trait EastAsianWidthInput {
130    /// Returns the Unicode code point.
131    fn code_point(&self) -> u32;
132
133    /// Returns whether ambiguous characters should be treated as wide.
134    fn ambiguous_as_wide(&self) -> bool;
135}
136
137impl EastAsianWidthInput for u32 {
138    fn code_point(&self) -> u32 {
139        *self
140    }
141
142    fn ambiguous_as_wide(&self) -> bool {
143        false
144    }
145}
146
147impl EastAsianWidthInput for (u32, bool) {
148    fn code_point(&self) -> u32 {
149        self.0
150    }
151
152    fn ambiguous_as_wide(&self) -> bool {
153        self.1
154    }
155}
156
157/// Returns the East Asian Width category for a Unicode code point
158///
159/// # Arguments
160/// * `code_point` - A Unicode code point (0-0x10FFFF)
161///
162/// # Returns
163/// The category as a string: "ambiguous", "fullwidth", "halfwidth", "neutral", "narrow", or "wide"
164///
165/// # Panics
166/// Panics if the code point is outside the valid Unicode range
167pub fn east_asian_width_type(code_point: u32) -> &'static str {
168    validate_code_point(code_point).expect("Invalid Unicode code point");
169    get_category(code_point)
170}
171
172/// Fallible version of `east_asian_width_type`
173///
174/// # Arguments
175/// * `code_point` - A Unicode code point (0-0x10FFFF)
176///
177/// # Returns
178/// * `Ok(category)` - The category as a string
179/// * `Err(EastAsianWidthError)` - If the code point is invalid
180pub fn try_east_asian_width_type(code_point: u32) -> Result<&'static str, EastAsianWidthError> {
181    validate_code_point(code_point)?;
182    Ok(get_category(code_point))
183}
184
185/// Returns the display width of a Unicode code point in East Asian contexts
186///
187/// # Arguments
188/// * `input` - Either a `u32` code point or `(u32, bool)` tuple where bool indicates ambiguous_as_wide
189///
190/// # Returns
191/// * `DisplayWidth::Wide` for wide/fullwidth characters (and ambiguous if `ambiguous_as_wide` is true)
192/// * `DisplayWidth::Narrow` for all other characters
193///
194/// # Examples
195/// ```
196/// use east_asian_width::{east_asian_width, DisplayWidth};
197///
198/// // Basic usage with just code point
199/// assert_eq!(east_asian_width(0x5B57), DisplayWidth::Wide); // '字' is wide
200/// assert_eq!(east_asian_width(0x5B57).as_u8(), 2); // Convert to numeric value
201///
202/// // With ambiguous_as_wide option
203/// assert_eq!(east_asian_width((0x00A1, true)), DisplayWidth::Wide);  // ambiguous as wide
204/// assert_eq!(east_asian_width((0x00A1, false)), DisplayWidth::Narrow); // ambiguous as narrow
205/// ```
206///
207/// # Panics
208/// Panics if the code point is outside the valid Unicode range
209pub fn east_asian_width<T: EastAsianWidthInput>(input: T) -> DisplayWidth {
210    try_east_asian_width(input).expect("Invalid Unicode code point")
211}
212
213/// Fallible version of `east_asian_width`
214///
215/// # Arguments
216/// * `input` - Either a `u32` code point or `(u32, bool)` tuple where bool indicates ambiguous_as_wide
217///
218/// # Returns
219/// * `Ok(DisplayWidth)` - The display width (Narrow or Wide)
220/// * `Err(EastAsianWidthError)` - If the code point is invalid
221///
222/// # Examples
223/// ```
224/// use east_asian_width::{try_east_asian_width, DisplayWidth};
225///
226/// // Valid code points
227/// assert_eq!(try_east_asian_width(0x5B57).unwrap(), DisplayWidth::Wide); // '字' is wide
228/// assert_eq!(try_east_asian_width((0x00A1, true)).unwrap(), DisplayWidth::Wide);  // ambiguous as wide
229/// assert_eq!(try_east_asian_width(0x5B57).unwrap().as_u8(), 2); // Convert to numeric
230///
231/// // Invalid code point
232/// assert!(try_east_asian_width(0x110000).is_err());
233/// ```
234pub fn try_east_asian_width<T: EastAsianWidthInput>(
235    input: T,
236) -> Result<DisplayWidth, EastAsianWidthError> {
237    let code_point = input.code_point();
238    validate_code_point(code_point)?;
239
240    Ok(
241        if is_full_width(code_point)
242            || is_wide(code_point)
243            || (input.ambiguous_as_wide() && is_ambiguous(code_point))
244        {
245            DisplayWidth::Wide
246        } else {
247            DisplayWidth::Narrow
248        },
249    )
250}