runefix_core/
atom.rs

1//! Atom segmentation (width-aware layout units).
2//!
3//! This module defines [`atoms()`], a runefix-specific alternative to graphemes.
4//! It segments a string into visual display units for terminal and TUI rendering.
5//!
6//! This helps solve alignment bugs in monospaced environments caused by emoji and CJK widths.
7
8use crate::RuneDisplayWidth;
9
10/// Splits the input string into **layout atoms** — visual units used for width-aware layout.
11///
12/// This is a **runefix-specific segmentation**, based on actual display width, not linguistic boundaries.
13/// It differs from [`graphemes()`] (which follows Unicode UAX #29) by focusing purely on units that affect layout:
14///
15/// - Characters with width = 0 (e.g., combining marks, control codes) are grouped with their leading base
16/// - Emoji sequences (e.g. ZWJ, variation selectors) are preserved as atomic units
17/// - Output is suitable for TUI rendering, Markdown table layout, and CLI alignment
18///
19/// # Example
20/// ```
21/// use runefix_core::atoms;
22/// assert_eq!(atoms("👩‍❤️‍💋‍👨"), vec!["👩", "\u{200d}", "❤", "\u{fe0f}", "\u{200d}", "💋", "\u{200d}", "👨"]);
23/// ```
24///
25/// # Note
26/// This function is **not** Unicode-compliant segmentation. For that, see [`graphemes()`].
27pub fn atoms(s: &str) -> Vec<&str> {
28    let mut atoms = Vec::new(); // Store resulting display atoms
29    let mut start = 0; // Current segment start position
30
31    for (i, c) in s.char_indices() {
32        // Determine if this char has visual width
33        let w = c.width();
34
35        if w > 0 {
36            if start < i {
37                // Push preceding zero-width chars (e.g. ZWJ, marks)
38                atoms.push(&s[start..i]);
39            }
40
41            // Push current width-bearing char as an atom
42            atoms.push(&s[i..i + c.len_utf8()]);
43            start = i + c.len_utf8();
44        }
45    }
46
47    if start < s.len() {
48        // Push trailing zero-width sequence if any
49        atoms.push(&s[start..]);
50    }
51
52    atoms
53}