use compact_str::{CompactString, ToCompactString};
use smallvec::SmallVec;
use unicode_segmentation::UnicodeSegmentation;
use super::width::TextWidth;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TextSegment {
pub grapheme: CompactString,
pub width: usize,
}
impl TextSegment {
pub fn new(grapheme: impl Into<CompactString>) -> Self {
let g: CompactString = grapheme.into();
let width = TextWidth::width(g.as_str());
Self { grapheme: g, width }
}
pub fn from_char(c: char) -> Self {
Self::new(c.to_compact_string())
}
#[inline]
pub fn is_empty(&self) -> bool {
self.grapheme.is_empty()
}
#[inline]
pub fn is_wide(&self) -> bool {
self.width > 1
}
#[inline]
pub fn is_zero_width(&self) -> bool {
self.width == 0
}
}
pub struct TextSegmentIter<'a> {
inner: unicode_segmentation::Graphemes<'a>,
}
impl<'a> TextSegmentIter<'a> {
pub fn new(s: &'a str) -> Self {
Self {
inner: s.graphemes(true),
}
}
}
impl<'a> Iterator for TextSegmentIter<'a> {
type Item = TextSegment;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(TextSegment::new)
}
}
pub fn segment(s: &str) -> TextSegmentIter<'_> {
TextSegmentIter::new(s)
}
pub fn segment_vec(s: &str) -> SmallVec<[TextSegment; 16]> {
segment(s).collect()
}
pub struct SegmentedText {
pub segments: SmallVec<[TextSegment; 16]>,
pub total_width: usize,
pub grapheme_count: usize,
}
impl SegmentedText {
pub fn new(s: &str) -> Self {
let segments: SmallVec<[TextSegment; 16]> = segment(s).collect();
let total_width = segments.iter().map(|s| s.width).sum();
let grapheme_count = segments.len();
Self {
segments,
total_width,
grapheme_count,
}
}
pub fn get(&self, index: usize) -> Option<&TextSegment> {
self.segments.get(index)
}
pub fn index_at_column(&self, column: usize) -> Option<usize> {
let mut col = 0;
for (i, seg) in self.segments.iter().enumerate() {
if col + seg.width > column {
return Some(i);
}
col += seg.width;
}
None
}
pub fn column_at_index(&self, index: usize) -> usize {
self.segments.iter().take(index).map(|s| s.width).sum()
}
pub fn slice(&self, start: usize, end: usize) -> CompactString {
self.segments
.iter()
.skip(start)
.take(end - start)
.map(|s| s.grapheme.as_str())
.collect()
}
pub fn slice_columns(&self, start_col: usize, end_col: usize) -> CompactString {
let mut result = CompactString::default();
let mut col = 0;
for seg in &self.segments {
if col >= end_col {
break;
}
if col + seg.width > start_col {
result.push_str(&seg.grapheme);
}
col += seg.width;
}
result
}
}
#[cfg(test)]
mod tests {
use super::{segment, SegmentedText};
#[test]
fn test_segment_ascii() {
let segments: Vec<_> = segment("Hello").collect();
assert_eq!(segments.len(), 5);
assert_eq!(segments[0].grapheme.as_str(), "H");
assert_eq!(segments[0].width, 1);
}
#[test]
fn test_segment_cjk() {
let segments: Vec<_> = segment("あいう").collect();
assert_eq!(segments.len(), 3);
assert_eq!(segments[0].grapheme.as_str(), "あ");
assert_eq!(segments[0].width, 2);
}
#[test]
fn test_segment_mixed() {
let segments: Vec<_> = segment("Hi世界").collect();
assert_eq!(segments.len(), 4);
assert_eq!(segments[0].width, 1); assert_eq!(segments[2].width, 2); }
#[test]
fn test_segment_emoji() {
let segments: Vec<_> = segment("👋🏻").collect();
assert_eq!(segments.len(), 1);
}
#[test]
fn test_segmented_text() {
let st = SegmentedText::new("Hello世界");
assert_eq!(st.grapheme_count, 7);
assert_eq!(st.total_width, 9); }
#[test]
fn test_index_at_column() {
let st = SegmentedText::new("Hi世界");
assert_eq!(st.index_at_column(0), Some(0)); assert_eq!(st.index_at_column(1), Some(1)); assert_eq!(st.index_at_column(2), Some(2)); assert_eq!(st.index_at_column(3), Some(2)); assert_eq!(st.index_at_column(4), Some(3)); }
#[test]
fn test_column_at_index() {
let st = SegmentedText::new("Hi世界");
assert_eq!(st.column_at_index(0), 0);
assert_eq!(st.column_at_index(1), 1);
assert_eq!(st.column_at_index(2), 2);
assert_eq!(st.column_at_index(3), 4); }
#[test]
fn test_column_at_index_japanese_only() {
let st = SegmentedText::new("あいうえお");
assert_eq!(st.grapheme_count, 5);
assert_eq!(st.total_width, 10);
assert_eq!(st.column_at_index(0), 0);
assert_eq!(st.column_at_index(1), 2);
assert_eq!(st.column_at_index(2), 4);
assert_eq!(st.column_at_index(3), 6);
assert_eq!(st.column_at_index(4), 8);
assert_eq!(st.column_at_index(5), 10); }
#[test]
fn test_column_at_index_mixed() {
let st = SegmentedText::new("aあb");
assert_eq!(st.grapheme_count, 3);
assert_eq!(st.total_width, 4);
assert_eq!(st.column_at_index(0), 0); assert_eq!(st.column_at_index(1), 1); assert_eq!(st.column_at_index(2), 3); assert_eq!(st.column_at_index(3), 4); }
#[test]
fn test_slice_columns_japanese() {
let st = SegmentedText::new("あいうえお");
assert_eq!(st.slice_columns(2, 8), "いうえ");
assert_eq!(st.slice_columns(0, 4), "あい");
assert_eq!(st.slice_columns(1, 6), "あいう");
}
#[test]
fn test_cursor_wrapping_calculation() {
let st = SegmentedText::new("あいうえおかきくけこ"); let area_width = 10;
let cursor_idx = 10;
let cursor_col = st.column_at_index(cursor_idx);
assert_eq!(cursor_col, 20);
let cursor_line = cursor_col / area_width;
let cursor_col_in_line = cursor_col % area_width;
assert_eq!(cursor_line, 2);
assert_eq!(cursor_col_in_line, 0);
let cursor_col = st.column_at_index(5);
assert_eq!(cursor_col, 10);
let cursor_line = cursor_col / area_width;
let cursor_col_in_line = cursor_col % area_width;
assert_eq!(cursor_line, 1);
assert_eq!(cursor_col_in_line, 0);
let cursor_col = st.column_at_index(3);
assert_eq!(cursor_col, 6);
let cursor_line = cursor_col / area_width;
let cursor_col_in_line = cursor_col % area_width;
assert_eq!(cursor_line, 0);
assert_eq!(cursor_col_in_line, 6);
}
#[test]
fn test_slice() {
let st = SegmentedText::new("Hello");
assert_eq!(st.slice(1, 4), "ell");
}
}