use unicode_segmentation::UnicodeSegmentation;
#[inline]
pub fn prev_grapheme_boundary(s: &str, pos: usize) -> usize {
if pos == 0 || s.is_empty() {
return 0;
}
let pos = pos.min(s.len());
let mut last_boundary = 0;
for (idx, _) in s.grapheme_indices(true) {
if idx >= pos {
break;
}
last_boundary = idx;
}
last_boundary
}
#[inline]
pub fn next_grapheme_boundary(s: &str, pos: usize) -> usize {
if pos >= s.len() || s.is_empty() {
return s.len();
}
for (idx, grapheme) in s.grapheme_indices(true) {
let end = idx + grapheme.len();
if idx >= pos {
return end;
}
if end > pos {
return end;
}
}
s.len()
}
#[inline]
pub fn grapheme_at(s: &str, pos: usize) -> Option<(&str, usize, usize)> {
if pos >= s.len() || s.is_empty() {
return None;
}
for (idx, grapheme) in s.grapheme_indices(true) {
let end = idx + grapheme.len();
if idx <= pos && pos < end {
return Some((grapheme, idx, end));
}
}
None
}
#[inline]
pub fn grapheme_count(s: &str) -> usize {
s.graphemes(true).count()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ascii_navigation() {
let s = "hello";
assert_eq!(prev_grapheme_boundary(s, 0), 0);
assert_eq!(prev_grapheme_boundary(s, 1), 0);
assert_eq!(prev_grapheme_boundary(s, 3), 2);
assert_eq!(next_grapheme_boundary(s, 0), 1);
assert_eq!(next_grapheme_boundary(s, 4), 5);
assert_eq!(next_grapheme_boundary(s, 5), 5);
}
#[test]
fn test_thai_navigation() {
let s = "ที่";
assert_eq!(s.len(), 9);
assert_eq!(grapheme_count(s), 1);
assert_eq!(next_grapheme_boundary(s, 0), 9);
assert_eq!(prev_grapheme_boundary(s, 9), 0);
assert_eq!(next_grapheme_boundary(s, 3), 9);
assert_eq!(prev_grapheme_boundary(s, 3), 0);
}
#[test]
fn test_thai_multiple_graphemes() {
let s = "ที่นี่";
assert_eq!(s.len(), 18);
assert_eq!(grapheme_count(s), 2);
assert_eq!(next_grapheme_boundary(s, 0), 9);
assert_eq!(next_grapheme_boundary(s, 9), 18);
assert_eq!(prev_grapheme_boundary(s, 18), 9);
assert_eq!(prev_grapheme_boundary(s, 9), 0);
}
#[test]
fn test_emoji_navigation() {
let s = "👨👩👧";
assert_eq!(grapheme_count(s), 1);
assert_eq!(next_grapheme_boundary(s, 0), s.len());
assert_eq!(prev_grapheme_boundary(s, s.len()), 0);
}
#[test]
fn test_combining_diacritics() {
let s = "e\u{0301}"; assert_eq!(s.chars().count(), 2); assert_eq!(grapheme_count(s), 1);
assert_eq!(next_grapheme_boundary(s, 0), s.len());
assert_eq!(prev_grapheme_boundary(s, s.len()), 0);
}
#[test]
fn test_mixed_content() {
let s = "aที่b";
assert_eq!(s.len(), 11);
assert_eq!(grapheme_count(s), 3);
assert_eq!(next_grapheme_boundary(s, 0), 1); assert_eq!(next_grapheme_boundary(s, 1), 10); assert_eq!(next_grapheme_boundary(s, 10), 11);
assert_eq!(prev_grapheme_boundary(s, 11), 10); assert_eq!(prev_grapheme_boundary(s, 10), 1); assert_eq!(prev_grapheme_boundary(s, 1), 0); }
#[test]
fn test_grapheme_at() {
let s = "aที่b";
let (g, start, end) = grapheme_at(s, 0).unwrap();
assert_eq!(g, "a");
assert_eq!((start, end), (0, 1));
let (g, start, end) = grapheme_at(s, 1).unwrap();
assert_eq!(g, "ที่");
assert_eq!((start, end), (1, 10));
let (g, start, end) = grapheme_at(s, 5).unwrap(); assert_eq!(g, "ที่");
assert_eq!((start, end), (1, 10));
let (g, start, end) = grapheme_at(s, 10).unwrap();
assert_eq!(g, "b");
assert_eq!((start, end), (10, 11));
assert!(grapheme_at(s, 11).is_none()); }
#[test]
fn test_empty_string() {
let s = "";
assert_eq!(prev_grapheme_boundary(s, 0), 0);
assert_eq!(next_grapheme_boundary(s, 0), 0);
assert_eq!(grapheme_count(s), 0);
assert!(grapheme_at(s, 0).is_none());
}
}