use east_asian_width::east_asian_width;
use regex::Regex;
use std::sync::OnceLock;
use unicode_segmentation::UnicodeSegmentation;
use crate::character_classification::{find_main_character_optimized, is_halfwidth_fullwidth};
use crate::emoji::is_rgi_emoji;
use crate::options::{AmbiguousWidthTreatment, StringWidthOptions};
const ZERO_WIDTH_PATTERN: &str = concat!(
r"^[\u{0000}-\u{001F}", r"\u{007F}-\u{009F}", r"\u{00AD}", r"\u{0300}-\u{036F}", r"\u{200B}-\u{200F}", r"\u{202A}-\u{202E}", r"\u{2060}-\u{206F}", r"\u{FE00}-\u{FE0F}", r"\u{FE20}-\u{FE2F}", r"\u{FEFF}", r"\u{FFF9}-\u{FFFB}", r"\u{E0000}-\u{E007F}", r"]+$"
);
const CONTROL_CHAR_MAX: u32 = 0x1F;
const DEL_CHAR: u32 = 0x7F;
const C1_CONTROL_MAX: u32 = 0x9F;
static ZERO_WIDTH_CLUSTER_REGEX: OnceLock<Regex> = OnceLock::new();
fn get_zero_width_regex() -> &'static Regex {
ZERO_WIDTH_CLUSTER_REGEX
.get_or_init(|| Regex::new(ZERO_WIDTH_PATTERN).expect("Zero-width regex should be valid"))
}
fn is_zero_width_cluster(segment: &str) -> bool {
get_zero_width_regex().is_match(segment)
}
fn calculate_trailing_width(segment: &str, ambiguous_as_wide: bool) -> usize {
if segment.len() <= 1 {
return 0;
}
segment
.chars()
.skip(1)
.filter(|&ch| is_halfwidth_fullwidth(ch))
.map(|ch| east_asian_width((ch as u32, ambiguous_as_wide)).as_usize())
.sum()
}
pub fn string_width_with_options(input: &str, options: StringWidthOptions) -> usize {
if input.is_empty() {
return 0;
}
let processed_input = prepare_input(input, options.count_ansi);
calculate_grapheme_widths(&processed_input, &options)
}
fn prepare_input(input: &str, count_ansi: bool) -> std::borrow::Cow<'_, str> {
if count_ansi {
std::borrow::Cow::Borrowed(input)
} else {
let stripped_bytes = strip_ansi_escapes::strip(input);
let stripped = String::from_utf8_lossy(&stripped_bytes).into_owned();
std::borrow::Cow::Owned(stripped)
}
}
fn calculate_grapheme_widths(input: &str, options: &StringWidthOptions) -> usize {
if input.is_empty() {
return 0;
}
let ambiguous_as_wide = options.ambiguous_width == AmbiguousWidthTreatment::Wide;
input
.graphemes(true)
.map(|segment| calculate_segment_width(segment, options, ambiguous_as_wide))
.sum()
}
fn calculate_segment_width(
segment: &str,
options: &StringWidthOptions,
ambiguous_as_wide: bool,
) -> usize {
if !options.count_ansi && is_zero_width_cluster(segment) {
return 0;
}
if is_rgi_emoji(segment) {
return 2;
}
if options.count_ansi {
calculate_ansi_aware_width(segment, ambiguous_as_wide)
} else {
calculate_standard_width(segment, ambiguous_as_wide)
}
}
fn calculate_ansi_aware_width(segment: &str, ambiguous_as_wide: bool) -> usize {
segment
.chars()
.map(|ch| {
let code_point = ch as u32;
if code_point <= CONTROL_CHAR_MAX || (DEL_CHAR..=C1_CONTROL_MAX).contains(&code_point) {
1 } else {
east_asian_width((code_point, ambiguous_as_wide)).as_usize()
}
})
.sum()
}
fn calculate_standard_width(segment: &str, ambiguous_as_wide: bool) -> usize {
let main_char_info = find_main_character_optimized(segment);
match main_char_info {
Some(info) => {
let base_width = east_asian_width((info.code_point, ambiguous_as_wide)).as_usize();
let trailing_width = calculate_trailing_width(segment, ambiguous_as_wide);
base_width + trailing_width
}
None => 0,
}
}
pub trait DisplayWidth {
fn display_width(&self) -> usize;
fn display_width_with_options(&self, options: StringWidthOptions) -> usize;
}
impl DisplayWidth for str {
fn display_width(&self) -> usize {
string_width_with_options(self, StringWidthOptions::default())
}
fn display_width_with_options(&self, options: StringWidthOptions) -> usize {
string_width_with_options(self, options)
}
}
impl DisplayWidth for String {
fn display_width(&self) -> usize {
self.as_str().display_width()
}
fn display_width_with_options(&self, options: StringWidthOptions) -> usize {
self.as_str().display_width_with_options(options)
}
}
pub fn string_width(input: &str) -> usize {
input.display_width()
}