abyo-crdt 0.4.0-alpha.1

//! Rich text CRDT — Peritext-style format spans over a `List<char>`.
//!
//! Implements the [Peritext] algorithm (Litt, Lim, Kleppmann, van Hardenberg
//! — Ink & Switch, 2022). Supports both **boolean annotations** (`bold`,
//! `italic`) and **valued annotations** (`href`, `color`, …) layered over a
//! Fugue-Maximal list of characters, with proper concurrent-edit semantics:
//!
//! - A `set_mark(range, name)` op records anchored start/end positions
//!   relative to specific character `OpId`s — not absolute indices — so the
//!   span tracks correctly across concurrent inserts and deletes.
//! - When two replicas concurrently set the same mark on overlapping ranges,
//!   the operation with the higher [`OpId`] wins (Lamport-ordered).
//! - When a character is inserted into a span's middle, it inherits the
//!   span. Per-span stickiness ([`ExpandRule`]) controls whether boundary
//!   inserts inherit too.
//!
//! [Peritext]: https://www.inkandswitch.com/peritext/
//!
//! ## Yjs / Quill interop
//!
//! [`Text::to_delta`] and [`Text::from_delta`] convert to/from the Quill
//! Delta format that Yjs (`Y.Text`), Quill, Slate, and `ProseMirror` all use,
//! enabling lossy interop with the rest of the rich-text ecosystem (lossy
//! because the source CRDT's full op log can't be reconstructed from a
//! Delta snapshot).
//!
//! ## Quick start
//!
//! ```
//! use abyo_crdt::Text;
//!
//! let mut alice = Text::new(1);
//! alice.insert_str(0, "Hello, world!");
//! alice.set_mark(0..5, "bold", true);
//!
//! let formatted: Vec<(char, Vec<&str>)> = alice
//!     .iter_with_marks()
//!     .map(|(c, marks)| (c, marks.iter().collect::<Vec<_>>()))
//!     .collect();
//! assert_eq!(formatted[0], ('H', vec![&"bold"]));
//! assert_eq!(formatted[5], (',', vec![]));
//! ```

use crate::{
    error::Error,
    id::{OpId, ReplicaId},
    list::{List, ListOp},
    version::VersionVector,
};
use std::collections::HashMap;

#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

// ---------------------------------------------------------------------------
// Anchors
// ---------------------------------------------------------------------------

/// Which side of an anchored character the anchor sits on.
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum AnchorSide {
    /// Immediately to the left of the anchored character.
    Before,
    /// Immediately to the right of the anchored character.
    After,
}

/// A position in the text, expressed relative to a specific character or
/// the document boundaries.
///
/// Anchors are stable across concurrent edits: even if the anchored character
/// is later deleted, the anchor still resolves to a unique position (the
/// position that character would have occupied).
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum Anchor {
    /// The very beginning of the document.
    Start,
    /// The very end of the document.
    End,
    /// Anchored to a specific character.
    Char(OpId, AnchorSide),
}

// ---------------------------------------------------------------------------
// Spans + ops
// ---------------------------------------------------------------------------

/// The "value" carried by a format span — boolean on/off for marks like
/// `bold`/`italic`, or a string value for marks like `href`/`color`.
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum SpanValue {
    /// Turn a boolean mark **on** in the range.
    On,
    /// Turn a boolean mark **off** in the range (cancels a previous `On`).
    Off,
    /// Set a valued mark (e.g. `href = "https://..."`).
    Set(String),
    /// Clear a valued mark (cancels a previous `Set`).
    Unset,
}

/// A format-mark span: applies a named annotation between two anchors.
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Span {
    /// Op id of the format op that created this span.
    pub id: OpId,
    /// Inclusive lower anchor.
    pub start: Anchor,
    /// Exclusive upper anchor (positions are < end).
    pub end: Anchor,
    /// Mark name (e.g. `"bold"`, `"italic"`, `"href"`).
    pub name: String,
    /// What this span does at the given range.
    pub value: SpanValue,
}

/// A single text-CRDT operation.
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum TextOp {
    /// A character insertion or deletion in the underlying list.
    Char(ListOp<char>),
    /// A format-mark span.
    Mark(Span),
}

impl TextOp {
    /// The id of this op.
    #[must_use]
    pub fn id(&self) -> OpId {
        match self {
            TextOp::Char(op) => op.id(),
            TextOp::Mark(s) => s.id,
        }
    }
}

/// Stickiness rule for mark anchors.
///
/// Controls whether characters typed at a span's boundaries inherit the
/// mark. Determined per-span at op-creation time, not per-format-name —
/// callers can mix rules for the same name in different ops.
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum ExpandRule {
    /// Default. Neither end expands. Chars typed at boundaries are NOT marked.
    #[default]
    None,
    /// Right end expands. Typing at the end-of-span boundary inherits the mark.
    Right,
    /// Left end expands. Typing at the start-of-span boundary inherits the mark.
    Left,
    /// Both ends expand.
    Both,
}

// ---------------------------------------------------------------------------
// Per-character mark set
// ---------------------------------------------------------------------------

/// What a mark currently "is" at a character position.
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum MarkValue {
    /// Boolean mark, currently on.
    Boolean,
    /// Valued mark with associated string value.
    Value(String),
}

/// Set of marks active at a single character position.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct MarkSet {
    inner: std::collections::BTreeMap<String, MarkValue>,
}

impl MarkSet {
    /// Empty set.
    #[must_use]
    pub fn new() -> Self {
        Self::default()
    }

    /// Is `name` set (boolean or valued)?
    pub fn contains(&self, name: &str) -> bool {
        self.inner.contains_key(name)
    }

    /// String value for a valued mark, or `None` if absent or boolean.
    pub fn value_of(&self, name: &str) -> Option<&str> {
        match self.inner.get(name) {
            Some(MarkValue::Value(s)) => Some(s.as_str()),
            _ => None,
        }
    }

    /// Iterate over active mark names in lexicographic order.
    pub fn iter(&self) -> impl Iterator<Item = &str> + '_ {
        self.inner.keys().map(String::as_str)
    }

    /// Iterate over `(name, MarkValue)` pairs.
    pub fn iter_with_values(&self) -> impl Iterator<Item = (&str, &MarkValue)> + '_ {
        self.inner.iter().map(|(k, v)| (k.as_str(), v))
    }

    /// Iterate over only boolean-style marks.
    pub fn iter_booleans(&self) -> impl Iterator<Item = &str> + '_ {
        self.inner.iter().filter_map(|(k, v)| match v {
            MarkValue::Boolean => Some(k.as_str()),
            MarkValue::Value(_) => None,
        })
    }

    /// Iterate over only valued marks as `(name, value)` pairs.
    pub fn iter_values(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
        self.inner.iter().filter_map(|(k, v)| match v {
            MarkValue::Boolean => None,
            MarkValue::Value(s) => Some((k.as_str(), s.as_str())),
        })
    }

    /// Number of active marks.
    #[must_use]
    pub fn len(&self) -> usize {
        self.inner.len()
    }

    /// Is the set empty?
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.inner.is_empty()
    }
}

// ---------------------------------------------------------------------------
// Text CRDT
// ---------------------------------------------------------------------------

/// Rich-text CRDT — a [`List<char>`] augmented with Peritext-style
/// format spans.
///
/// `Text` shares its underlying [`List`]'s Lamport clock for both character
/// ops and format ops, so every op gets a unique monotonic [`OpId`].
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Text {
    chars: List<char>,
    /// Format spans, sorted by `id` ASC. Iterated in `OpId` order during
    /// rendering so later ops override earlier ones for the same name on
    /// the same character.
    spans: Vec<Span>,
    /// Replica id (= `chars.replica_id()`; cached for convenience).
    replica: ReplicaId,
    /// Combined event log, in observation order.
    log: Vec<TextOp>,
    /// Combined version vector across both char and mark ops.
    version: VersionVector,
}

impl Text {
    /// Create an empty text document.
    #[must_use]
    pub fn new(replica: ReplicaId) -> Self {
        Self {
            chars: List::<char>::new(replica),
            spans: Vec::new(),
            replica,
            log: Vec::new(),
            version: VersionVector::new(),
        }
    }

    /// Create a new instance with a random [`ReplicaId`] from OS entropy.
    /// See [`crate::new_replica_id`].
    #[must_use]
    pub fn new_random() -> Self {
        Self::new(crate::id::new_replica_id())
    }

    /// This replica's id.
    #[must_use]
    pub fn replica_id(&self) -> ReplicaId {
        self.replica
    }

    /// Length of visible text in **Unicode scalar values** (Rust `char`s) —
    /// **not** bytes and **not** grapheme clusters. Multi-char graphemes
    /// like 👨‍👩‍👧 (5 chars) count as 5.
    ///
    /// For grapheme-aware length, see [`Self::grapheme_count`].
    #[must_use]
    pub fn len(&self) -> usize {
        self.chars.len()
    }

    /// Is the document empty?
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.chars.is_empty()
    }

    /// Length in **extended grapheme clusters** (UAX #29). This is the
    /// "user-perceived character" count — emoji like 👨‍👩‍👧 count as 1.
    /// Use this in user-facing position math (cursor coordinates,
    /// selection ranges).
    ///
    /// Cost: O(N) — walks the visible string once.
    #[must_use]
    pub fn grapheme_count(&self) -> usize {
        use unicode_segmentation::UnicodeSegmentation;
        let s: String = self.chars.iter().collect();
        s.graphemes(true).count()
    }

    /// Convert a grapheme position to the underlying char (scalar) position.
    /// Returns `self.len()` if `grapheme_pos >= self.grapheme_count()`.
    ///
    /// Cost: O(N).
    #[must_use]
    pub fn grapheme_to_char_pos(&self, grapheme_pos: usize) -> usize {
        use unicode_segmentation::UnicodeSegmentation;
        let s: String = self.chars.iter().collect();
        let mut char_acc = 0usize;
        for (gi, g) in s.graphemes(true).enumerate() {
            if gi == grapheme_pos {
                return char_acc;
            }
            char_acc += g.chars().count();
        }
        self.len()
    }

    /// Convert a char position to the grapheme position whose first char
    /// is at or before the given char position. Returns `0` for empty
    /// docs and `self.grapheme_count()` for positions at end.
    ///
    /// Cost: O(N).
    #[must_use]
    pub fn char_to_grapheme_pos(&self, char_pos: usize) -> usize {
        use unicode_segmentation::UnicodeSegmentation;
        let s: String = self.chars.iter().collect();
        let mut char_acc = 0usize;
        for (gi, g) in s.graphemes(true).enumerate() {
            if char_acc >= char_pos {
                return gi;
            }
            char_acc += g.chars().count();
        }
        s.graphemes(true).count()
    }

    /// Insert `s` at grapheme position `g_pos`. Multi-char graphemes
    /// in `s` are inserted as a single contiguous run, so they cannot
    /// be split by concurrent edits at the same boundary (the standard
    /// non-interleaving guarantee from Fugue-Maximal).
    ///
    /// Returns one [`TextOp`] per char inserted.
    pub fn insert_grapheme_str(&mut self, g_pos: usize, s: &str) -> Vec<TextOp> {
        let char_pos = self.grapheme_to_char_pos(g_pos);
        self.insert_str(char_pos, s)
    }

    /// Delete the grapheme at grapheme position `g_pos` (atomically, all
    /// of its chars). No-op if `g_pos` is out of bounds.
    pub fn delete_grapheme(&mut self, g_pos: usize) -> Vec<TextOp> {
        use unicode_segmentation::UnicodeSegmentation;
        let s: String = self.chars.iter().collect();
        let mut char_pos = 0usize;
        let mut g_chars = 0usize;
        for (gi, g) in s.graphemes(true).enumerate() {
            if gi == g_pos {
                g_chars = g.chars().count();
                break;
            }
            char_pos += g.chars().count();
            if gi == g_pos {
                g_chars = g.chars().count();
                break;
            }
        }
        if g_chars == 0 {
            return Vec::new();
        }
        let mut ops = Vec::with_capacity(g_chars);
        for _ in 0..g_chars {
            ops.push(self.delete(char_pos));
        }
        ops
    }

    /// Render the text as a `String`. Format spans are not included; use
    /// [`Self::iter_with_marks`] to access them.
    ///
    /// `Text` also implements [`std::fmt::Display`], so `format!("{text}")`
    /// works.
    #[must_use]
    pub fn as_string(&self) -> String {
        self.chars.iter().collect()
    }

    /// Insert a single character at visible position `pos`.
    pub fn insert(&mut self, pos: usize, ch: char) -> TextOp {
        let op = self.chars.insert(pos, ch);
        let text_op = TextOp::Char(op);
        self.version.observe(text_op.id());
        self.log.push(text_op.clone());
        text_op
    }

    /// Insert a string at visible position `pos`. Returns one op per char.
    pub fn insert_str(&mut self, pos: usize, s: &str) -> Vec<TextOp> {
        let mut ops = Vec::new();
        for (i, ch) in s.chars().enumerate() {
            ops.push(self.insert(pos + i, ch));
        }
        ops
    }

    /// Delete the character at visible position `pos`.
    pub fn delete(&mut self, pos: usize) -> TextOp {
        let op = self.chars.delete(pos);
        let text_op = TextOp::Char(op);
        self.version.observe(text_op.id());
        self.log.push(text_op.clone());
        text_op
    }

    /// Delete a contiguous range of characters.
    pub fn delete_range(&mut self, range: std::ops::Range<usize>) -> Vec<TextOp> {
        let mut ops = Vec::new();
        // Delete from end to start so positions don't shift.
        for _ in range.clone() {
            ops.push(self.delete(range.start));
        }
        ops
    }

    /// Set a boolean mark over a visible-position range. Returns the format op.
    ///
    /// `on = true` adds the mark; `on = false` removes it (cancels a previous
    /// add). When `on = true` and `on = false` ops conflict on the same
    /// range, the one with the higher [`OpId`] wins.
    ///
    /// **Default anchors are no-expand on either side**: chars typed at the
    /// span boundaries do *not* inherit the mark. To get "expand right"
    /// (typing extends bold), use [`Self::set_mark_with_anchors`] with an
    /// explicit anchor referencing the next char.
    ///
    /// # Panics
    ///
    /// Panics if `range.start > range.end` or `range.end > self.len()`.
    pub fn set_mark(&mut self, range: std::ops::Range<usize>, name: &str, on: bool) -> TextOp {
        let value = if on { SpanValue::On } else { SpanValue::Off };
        self.set_mark_with_rule(range, name, value, ExpandRule::None)
    }

    /// Set a **valued** mark over a visible-position range. Pass `Some(s)` to
    /// set the value, `None` to unset (cancel a previous set).
    ///
    /// # Panics
    ///
    /// Panics if `range.start > range.end` or `range.end > self.len()`.
    pub fn set_value_mark(
        &mut self,
        range: std::ops::Range<usize>,
        name: &str,
        value: Option<&str>,
    ) -> TextOp {
        let v = match value {
            Some(s) => SpanValue::Set(s.to_string()),
            None => SpanValue::Unset,
        };
        self.set_mark_with_rule(range, name, v, ExpandRule::None)
    }

    /// Most general format-op API. Set a span with explicit value and
    /// stickiness rule.
    ///
    /// # Panics
    ///
    /// Panics if `range.start > range.end` or `range.end > self.len()`.
    pub fn set_mark_with_rule(
        &mut self,
        range: std::ops::Range<usize>,
        name: &str,
        value: SpanValue,
        rule: ExpandRule,
    ) -> TextOp {
        assert!(range.start <= range.end, "set_mark: empty/inverted range");
        assert!(range.end <= self.len(), "set_mark: range past end of text");
        let (start, end) = self.anchors_for_range(range, rule);
        self.set_mark_with_anchors(start, end, name, value)
    }

    /// Set a mark with explicit anchors. Use this when you want fully-custom
    /// stickiness or anchors that aren't expressible via [`ExpandRule`].
    pub fn set_mark_with_anchors(
        &mut self,
        start: Anchor,
        end: Anchor,
        name: &str,
        value: SpanValue,
    ) -> TextOp {
        // Share the underlying List's Lamport clock so chars and marks have
        // monotonically-increasing OpIds in a single namespace.
        let id = self.chars.next_op_id();
        let span = Span {
            id,
            start,
            end,
            name: name.to_string(),
            value,
        };
        self.insert_span_sorted(span.clone());
        let text_op = TextOp::Mark(span);
        self.version.observe(id);
        self.log.push(text_op.clone());
        text_op
    }

    /// Compute the `(start, end)` anchors for a visible-position range under
    /// the given stickiness rule.
    fn anchors_for_range(
        &self,
        range: std::ops::Range<usize>,
        rule: ExpandRule,
    ) -> (Anchor, Anchor) {
        let len = self.len();
        let expand_left = matches!(rule, ExpandRule::Left | ExpandRule::Both);
        let expand_right = matches!(rule, ExpandRule::Right | ExpandRule::Both);

        let start = if range.start == 0 {
            if expand_left {
                // Anchor::Start always sits at position 0, so new inserts
                // at the front go *into* the span.
                Anchor::Start
            } else if len == 0 {
                Anchor::Start
            } else {
                // Anchor before the original first char in range. New
                // inserts at position 0 go BEFORE this anchor.
                Anchor::Char(self.chars.id_at(0).unwrap(), AnchorSide::Before)
            }
        } else if expand_left {
            // After(prev char): new inserts at position range.start go INTO span.
            Anchor::Char(
                self.chars.id_at(range.start - 1).unwrap(),
                AnchorSide::After,
            )
        } else {
            // Before(start char): new inserts at position range.start go OUT.
            Anchor::Char(self.chars.id_at(range.start).unwrap(), AnchorSide::Before)
        };

        let end = if range.end == 0 {
            // Empty range at start.
            if expand_left {
                Anchor::Start
            } else {
                start
            }
        } else if range.end == len {
            if expand_right {
                // Anchor::End always tracks current doc end → new tail inserts in span.
                Anchor::End
            } else if len == 0 {
                Anchor::Start
            } else {
                Anchor::Char(self.chars.id_at(range.end - 1).unwrap(), AnchorSide::After)
            }
        } else if expand_right {
            // Before(next char): new inserts at position range.end go INTO span.
            Anchor::Char(self.chars.id_at(range.end).unwrap(), AnchorSide::Before)
        } else {
            // After(last char): new inserts at position range.end go OUT.
            Anchor::Char(self.chars.id_at(range.end - 1).unwrap(), AnchorSide::After)
        };

        (start, end)
    }

    /// Apply the inverse of `op` as a NEW local op. Use this with a
    /// caller-managed undo/redo stack:
    ///
    /// ```ignore
    /// let mut undo: Vec<TextOp> = Vec::new();
    /// let mut redo: Vec<TextOp> = Vec::new();
    /// undo.push(text.insert(0, 'H'));   // type
    /// // ... user clicks "undo" ...
    /// if let Some(op) = undo.pop() {
    ///     if let Some(inv) = text.apply_inverse(&op) {
    ///         redo.push(inv);
    ///     }
    /// }
    /// ```
    ///
    /// - `Char(Insert)` → tombstones the inserted character.
    /// - `Char(Delete)` → re-inserts the original character with a fresh `OpId`.
    /// - `Mark(Set/On)` → emits a `Mark(Off)` over the same anchored range.
    /// - `Mark(Set(s))` → emits a `Mark(Unset)`.
    /// - `Mark(Off/Unset)` → no-op (we don't reconstruct the previous value).
    ///
    /// Returns `None` if the op cannot be inverted (target not in the doc).
    pub fn apply_inverse(&mut self, op: &TextOp) -> Option<TextOp> {
        match op {
            TextOp::Char(char_op) => {
                let inv = self.chars.apply_inverse(char_op)?;
                let text_op = TextOp::Char(inv);
                self.version.observe(text_op.id());
                self.log.push(text_op.clone());
                Some(text_op)
            }
            TextOp::Mark(span) => {
                let inverse_value = match &span.value {
                    SpanValue::On => SpanValue::Off,
                    SpanValue::Set(_) => SpanValue::Unset,
                    // Off / Unset have no captured prior value — we can't
                    // restore it. Return None so the caller knows the redo
                    // stack should not record this.
                    SpanValue::Off | SpanValue::Unset => return None,
                };
                let id = self.chars.next_op_id();
                let inv_span = Span {
                    id,
                    start: span.start,
                    end: span.end,
                    name: span.name.clone(),
                    value: inverse_value,
                };
                self.insert_span_sorted(inv_span.clone());
                let text_op = TextOp::Mark(inv_span);
                self.version.observe(id);
                self.log.push(text_op.clone());
                Some(text_op)
            }
        }
    }

    /// Apply a remote operation. Idempotent.
    pub fn apply(&mut self, op: TextOp) -> Result<(), Error> {
        let op_id = op.id();
        if self.version.contains(op_id) {
            return Ok(());
        }
        match &op {
            TextOp::Char(char_op) => {
                self.chars.apply(char_op.clone())?;
            }
            TextOp::Mark(span) => {
                // Mark ops share the Lamport namespace with chars; advance
                // chars's clock so future local char ops get higher OpIds.
                self.chars.observe_external(span.id);
                self.insert_span_sorted(span.clone());
            }
        }
        self.version.observe(op_id);
        self.log.push(op);
        Ok(())
    }

    /// Merge another `Text` into this one.
    pub fn merge(&mut self, other: &Self) {
        let mut to_apply: Vec<&TextOp> = other
            .log
            .iter()
            .filter(|op| !self.version.contains(op.id()))
            .collect();
        to_apply.sort_by_key(|op| op.id());
        for op in to_apply {
            self.apply(op.clone())
                .expect("text apply cannot fail in merge");
        }
    }

    /// All ops in this replica's log, in observation order.
    #[must_use]
    pub fn ops(&self) -> &[TextOp] {
        &self.log
    }

    /// Iterate over ops not yet seen by `since`.
    pub fn ops_since<'a>(
        &'a self,
        since: &'a VersionVector,
    ) -> impl Iterator<Item = &'a TextOp> + 'a {
        self.log.iter().filter(move |op| !since.contains(op.id()))
    }

    /// All format spans (visible + cancelled), sorted by `OpId`.
    #[must_use]
    pub fn spans(&self) -> &[Span] {
        &self.spans
    }

    /// This replica's current version vector.
    #[must_use]
    pub fn version(&self) -> &VersionVector {
        &self.version
    }

    /// Iterate over visible characters paired with their active mark set.
    ///
    /// The mark set at each position is computed from the union of all spans
    /// containing that position, with later (higher-`OpId`) spans overriding
    /// earlier ones for the same name.
    ///
    /// Cost: `O(num_chars × num_spans)`. For typical documents (a few dozen
    /// spans) this is fast; for documents with thousands of spans, consider
    /// a more sophisticated index.
    pub fn iter_with_marks(&self) -> Box<dyn Iterator<Item = (char, MarkSet)> + '_> {
        let positions = self.chars.phantom_positions();
        let visible_ids = self.chars.op_ids();
        let len = visible_ids.len();
        // Resolve every span's [start_pos, end_pos) range once.
        // Spans whose anchors reference unknown chars (shouldn't happen with
        // proper causal delivery) are skipped.
        let resolved: Vec<(usize, usize, &Span)> = self
            .spans
            .iter()
            .filter_map(|s| {
                let sp = self.resolve_anchor(&s.start, &positions, len, /*as_end=*/ false)?;
                let ep = self.resolve_anchor(&s.end, &positions, len, /*as_end=*/ true)?;
                if sp >= ep {
                    None
                } else {
                    Some((sp, ep, s))
                }
            })
            .collect();
        Box::new((0..len).map(move |idx| {
            let _ = visible_ids[idx]; // present for future per-char metadata
            let ch = self.chars.get(idx).copied().unwrap_or('\0');
            // Walk spans in OpId-ASC order (already sorted): later ops override
            // for the same mark name.
            let mut state: HashMap<&str, &SpanValue> = HashMap::new();
            for &(sp, ep, span) in &resolved {
                if sp <= idx && idx < ep {
                    state.insert(span.name.as_str(), &span.value);
                }
            }
            let mut marks = MarkSet::new();
            for (name, value) in state {
                match value {
                    SpanValue::On => {
                        marks.inner.insert(name.to_string(), MarkValue::Boolean);
                    }
                    SpanValue::Set(s) => {
                        marks
                            .inner
                            .insert(name.to_string(), MarkValue::Value(s.clone()));
                    }
                    // Off / Unset → no entry (the mark is cancelled).
                    SpanValue::Off | SpanValue::Unset => {}
                }
            }
            (ch, marks)
        }))
    }

    /// Convenience: collect (char, marks) pairs into a `Vec`.
    #[must_use]
    pub fn render(&self) -> Vec<(char, MarkSet)> {
        self.iter_with_marks().collect()
    }

    // -----------------------------------------------------------------------
    // Internals
    // -----------------------------------------------------------------------

    fn insert_span_sorted(&mut self, span: Span) {
        let pos = self
            .spans
            .binary_search_by_key(&span.id, |s| s.id)
            .unwrap_or_else(|e| e);
        self.spans.insert(pos, span);
    }

    /// Resolve an anchor to a visible position index in `[0, len]`.
    ///
    /// `as_end = true` means "treat as an exclusive upper bound" — for
    /// `After(c)` anchors, this gives `phantom_pos(c) + 1` if c is visible
    /// (so c IS included), or `phantom_pos(c)` if tombstoned (in which case
    /// the anchor has collapsed to "between previous and next visible").
    ///
    /// `as_end = false` (a start anchor): `After(c)` gives `phantom_pos(c) +
    /// 1` if c visible (excluding c itself), else `phantom_pos(c)`.
    ///
    /// `Before(c)` is `phantom_pos(c)` regardless.
    fn resolve_anchor(
        &self,
        anchor: &Anchor,
        positions: &HashMap<OpId, usize>,
        len: usize,
        _as_end: bool,
    ) -> Option<usize> {
        match anchor {
            Anchor::Start => Some(0),
            Anchor::End => Some(len),
            Anchor::Char(id, side) => {
                let &pos = positions.get(id)?;
                let visible = self.chars.is_visible(*id).unwrap_or(false);
                Some(match (side, visible) {
                    // After(visible): position after the char = next index.
                    (AnchorSide::After, true) => pos + 1,
                    // Before(visible): position before the char = char's index.
                    // For tombstoned chars (either side): both collapse to the
                    // phantom position (= position of next visible char).
                    _ => pos,
                })
            }
        }
    }
}

// ---------------------------------------------------------------------------
// Quill / Yjs Delta interop
// ---------------------------------------------------------------------------

/// Value of a single attribute in a Quill/Yjs [`Delta`](DeltaOp) operation.
///
/// In the JSON wire format produced by Quill / `Y.Text.toDelta()`, attribute
/// values are either booleans (e.g. `"bold": true`) or strings (e.g.
/// `"href": "https://..."`). `AttrValue` mirrors that.
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "serde", serde(untagged))]
pub enum AttrValue {
    /// Boolean attribute (e.g. `bold: true`).
    Bool(bool),
    /// String attribute (e.g. `href: "https://example.com"`).
    String(String),
}

/// One run in a Quill/Yjs Delta — a chunk of text with optional attributes.
///
/// Serializes as `{"insert": "...", "attributes": {...}}`, with `attributes`
/// omitted when empty (matching Quill's wire format).
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct DeltaOp {
    /// Inserted text run.
    pub insert: String,
    /// Active attributes for this run. Omitted from the JSON when empty.
    #[cfg_attr(
        feature = "serde",
        serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")
    )]
    pub attributes: std::collections::BTreeMap<String, AttrValue>,
}

impl Text {
    /// Export the document as a Quill / Yjs **Delta**: a sequence of
    /// `{insert: "...", attributes: {...}}` runs where each run is a
    /// maximal contiguous span of characters with identical attributes.
    ///
    /// This is the format produced by Quill, [`Y.Text.toDelta()`][toDelta],
    /// Slate's `Text` adapter, etc. It's a **snapshot** — round-tripping
    /// through Delta loses the underlying CRDT op log, so a `from_delta` →
    /// `to_delta` round-trip preserves visible content but not history.
    ///
    /// [toDelta]: https://docs.yjs.dev/api/shared-types/y.text#api
    #[must_use]
    pub fn to_delta(&self) -> Vec<DeltaOp> {
        let mut deltas: Vec<DeltaOp> = Vec::new();
        let mut current_text = String::new();
        let mut current_attrs: std::collections::BTreeMap<String, AttrValue> =
            std::collections::BTreeMap::new();

        for (ch, marks) in self.iter_with_marks() {
            let mut new_attrs: std::collections::BTreeMap<String, AttrValue> =
                std::collections::BTreeMap::new();
            for (name, val) in marks.iter_with_values() {
                let attr = match val {
                    MarkValue::Boolean => AttrValue::Bool(true),
                    MarkValue::Value(s) => AttrValue::String(s.clone()),
                };
                new_attrs.insert(name.to_string(), attr);
            }

            if new_attrs != current_attrs && !current_text.is_empty() {
                deltas.push(DeltaOp {
                    insert: std::mem::take(&mut current_text),
                    attributes: std::mem::take(&mut current_attrs),
                });
            }
            current_text.push(ch);
            current_attrs = new_attrs;
        }
        if !current_text.is_empty() {
            deltas.push(DeltaOp {
                insert: current_text,
                attributes: current_attrs,
            });
        }
        deltas
    }

    /// Build a `Text` from a Quill / Yjs Delta. Inverse of [`Self::to_delta`].
    ///
    /// All marks are added with [`ExpandRule::None`] stickiness. Attribute
    /// values that don't match `bool` or `string` are skipped.
    pub fn from_delta(replica: ReplicaId, deltas: &[DeltaOp]) -> Self {
        let mut text = Self::new(replica);
        for op in deltas {
            let start = text.len();
            for ch in op.insert.chars() {
                text.insert(text.len(), ch);
            }
            let end = text.len();
            if end == start {
                continue;
            }
            for (name, attr) in &op.attributes {
                let value = match attr {
                    AttrValue::Bool(true) => SpanValue::On,
                    AttrValue::Bool(false) => SpanValue::Off,
                    AttrValue::String(s) => SpanValue::Set(s.clone()),
                };
                text.set_mark_with_rule(start..end, name, value, ExpandRule::None);
            }
        }
        text
    }
}

impl Default for Text {
    fn default() -> Self {
        Self::new(0)
    }
}

/// `format!("{text}")` produces the unformatted character sequence.
impl std::fmt::Display for Text {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        for c in self.chars.iter() {
            f.write_str(c.encode_utf8(&mut [0u8; 4]))?;
        }
        Ok(())
    }
}

// Internal accessor used by the test module below.
#[cfg(test)]
impl Text {
    fn chars_for_test(&self) -> &List<char> {
        &self.chars
    }
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    fn marks_at(text: &Text, pos: usize) -> Vec<String> {
        text.iter_with_marks()
            .nth(pos)
            .map(|(_, m)| m.iter().map(String::from).collect())
            .unwrap_or_default()
    }

    #[test]
    fn empty_doc() {
        let t = Text::new(1);
        assert!(t.is_empty());
        assert_eq!(t.to_string(), "");
        assert_eq!(t.iter_with_marks().count(), 0);
    }

    #[test]
    fn insert_and_render() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hello");
        assert_eq!(t.len(), 5);
        assert_eq!(t.to_string(), "Hello");
        for (_, marks) in t.iter_with_marks() {
            assert!(marks.is_empty());
        }
    }

    #[test]
    fn bold_a_range() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hello, world!");
        t.set_mark(0..5, "bold", true);
        assert_eq!(marks_at(&t, 0), vec!["bold"]);
        assert_eq!(marks_at(&t, 4), vec!["bold"]);
        // Position 5 is the comma — outside the bold range.
        assert!(marks_at(&t, 5).is_empty());
    }

    #[test]
    fn unbold_a_range_via_off_op() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hello, world!");
        t.set_mark(0..5, "bold", true);
        // Now turn it off.
        t.set_mark(0..5, "bold", false);
        assert!(marks_at(&t, 0).is_empty());
        assert!(marks_at(&t, 4).is_empty());
    }

    #[test]
    fn typing_in_middle_of_bold_range_inherits() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hello");
        t.set_mark(0..5, "bold", true);
        // Insert a char in the middle of the range.
        t.insert(2, 'X');
        // Now: "HeXllo", with the bold span originally over chars 0-4.
        // The new 'X' is between the original 'e' and 'l' — INSIDE the span.
        assert_eq!(t.to_string(), "HeXllo");
        assert_eq!(marks_at(&t, 2), vec!["bold"], "X should inherit bold");
        // Char beyond original range (position 5, which was originally 'o' —
        // now position 5 is still 'o', and the original span ended at After(o).
        // The original 'o' should still be bold.
        assert_eq!(marks_at(&t, 5), vec!["bold"]);
    }

    #[test]
    fn typing_after_span_does_not_inherit() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hello");
        t.set_mark(0..5, "bold", true);
        // Insert beyond the bold range.
        t.insert(5, '!');
        assert_eq!(t.to_string(), "Hello!");
        // The exclamation point is AFTER the span's end anchor (After 'o'),
        // so it is NOT bold.
        assert!(marks_at(&t, 5).is_empty());
    }

    #[test]
    fn concurrent_mark_higher_op_wins() {
        let mut alice = Text::new(1);
        let mut bob = Text::new(2);
        alice.insert_str(0, "Hello");
        bob.merge(&alice);

        // Both set marks on the same range concurrently.
        alice.set_mark(0..5, "bold", true);
        bob.set_mark(0..5, "bold", false);

        let mut a2 = alice.clone();
        a2.merge(&bob);
        let mut b2 = bob.clone();
        b2.merge(&alice);

        // Both replicas converge.
        let render_a: Vec<bool> = a2
            .iter_with_marks()
            .map(|(_, m)| m.contains("bold"))
            .collect();
        let render_b: Vec<bool> = b2
            .iter_with_marks()
            .map(|(_, m)| m.contains("bold"))
            .collect();
        assert_eq!(render_a, render_b);
        // Bob's op has higher OpId (replica 2 > replica 1) so it wins → bold off.
        assert_eq!(render_a, vec![false; 5]);
    }

    #[test]
    fn idempotent_apply() {
        let mut a = Text::new(1);
        a.insert_str(0, "Hi");
        a.set_mark(0..2, "italic", true);

        let mut b = Text::new(2);
        for op in a.ops().to_vec() {
            b.apply(op.clone()).unwrap();
            b.apply(op).unwrap(); // dup
        }
        assert_eq!(b.to_string(), "Hi");
        assert_eq!(marks_at(&b, 0), vec!["italic"]);
        assert_eq!(marks_at(&b, 1), vec!["italic"]);
    }

    #[test]
    fn deleting_marked_text() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hello");
        t.set_mark(0..5, "bold", true);
        t.delete(0); // delete 'H'
        assert_eq!(t.to_string(), "ello");
        // The remaining chars are still bold (the span anchored to original
        // chars; deleting 'H' just shifts the visible content).
        for i in 0..t.len() {
            assert_eq!(marks_at(&t, i), vec!["bold"]);
        }
    }

    #[test]
    fn multiple_marks_layer() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hello");
        t.set_mark(0..3, "bold", true);
        t.set_mark(2..5, "italic", true);
        // Position 0: bold only. Position 2: bold + italic. Position 4: italic only.
        assert_eq!(marks_at(&t, 0), vec!["bold"]);
        let m2 = marks_at(&t, 2);
        assert!(m2.contains(&"bold".to_string()));
        assert!(m2.contains(&"italic".to_string()));
        assert_eq!(marks_at(&t, 4), vec!["italic"]);
    }

    // -----------------------------------------------------------------
    // Valued annotations
    // -----------------------------------------------------------------

    #[test]
    fn valued_mark_set_and_read() {
        let mut t = Text::new(1);
        t.insert_str(0, "Click here for the link");
        t.set_value_mark(6..10, "href", Some("https://example.com"));
        let row = t.iter_with_marks().nth(6).unwrap();
        assert_eq!(row.0, 'h');
        assert_eq!(row.1.value_of("href"), Some("https://example.com"));
        // Outside the range: no href.
        let outside = t.iter_with_marks().next().unwrap();
        assert_eq!(outside.1.value_of("href"), None);
    }

    #[test]
    fn valued_mark_unset() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hello");
        t.set_value_mark(0..5, "color", Some("red"));
        t.set_value_mark(0..5, "color", None); // unset
        for (_, m) in t.iter_with_marks() {
            assert_eq!(m.value_of("color"), None);
            assert!(!m.contains("color"));
        }
    }

    #[test]
    fn valued_mark_concurrent_resolution() {
        let mut a = Text::new(1);
        let mut b = Text::new(2);
        a.insert_str(0, "Link");
        b.merge(&a);

        a.set_value_mark(0..4, "href", Some("https://alice.example/"));
        b.set_value_mark(0..4, "href", Some("https://bob.example/"));

        let mut a2 = a.clone();
        a2.merge(&b);
        let mut b2 = b.clone();
        b2.merge(&a);

        let render_a: Vec<Option<String>> = a2
            .iter_with_marks()
            .map(|(_, m)| m.value_of("href").map(String::from))
            .collect();
        let render_b: Vec<Option<String>> = b2
            .iter_with_marks()
            .map(|(_, m)| m.value_of("href").map(String::from))
            .collect();
        assert_eq!(render_a, render_b);
        // Bob wins (replica=2 > replica=1) for the same Lamport counter.
        assert_eq!(render_a[0].as_deref(), Some("https://bob.example/"));
    }

    #[test]
    fn boolean_and_valued_marks_coexist() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hello");
        t.set_mark(0..5, "bold", true);
        t.set_value_mark(0..5, "color", Some("blue"));
        let row = t.iter_with_marks().next().unwrap();
        assert!(row.1.contains("bold"));
        assert!(row.1.contains("color"));
        assert_eq!(row.1.value_of("color"), Some("blue"));
        // bold is boolean, not valued.
        assert_eq!(row.1.value_of("bold"), None);
        // iter_booleans / iter_values split correctly.
        let booleans: Vec<&str> = row.1.iter_booleans().collect();
        let values: Vec<(&str, &str)> = row.1.iter_values().collect();
        assert_eq!(booleans, vec!["bold"]);
        assert_eq!(values, vec![("color", "blue")]);
    }

    // -----------------------------------------------------------------
    // Expand rules
    // -----------------------------------------------------------------

    fn marks_at_with_set(text: &Text, pos: usize) -> Vec<String> {
        text.iter_with_marks()
            .nth(pos)
            .map(|(_, m)| m.iter().map(String::from).collect())
            .unwrap_or_default()
    }

    #[test]
    fn expand_right_extends_to_new_typing() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hi");
        // Use ExpandRule::Right.
        t.set_mark_with_rule(0..2, "bold", SpanValue::On, ExpandRule::Right);
        // Type more at the end — should be bolded.
        t.insert(2, '!');
        t.insert(3, '?');
        assert_eq!(t.as_string(), "Hi!?");
        for i in 0..t.len() {
            assert_eq!(marks_at_with_set(&t, i), vec!["bold"], "pos {i}");
        }
    }

    #[test]
    fn expand_right_does_not_extend_left() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hi");
        t.set_mark_with_rule(0..2, "bold", SpanValue::On, ExpandRule::Right);
        // Insert at front — should NOT be bolded.
        t.insert(0, 'X');
        assert_eq!(t.as_string(), "XHi");
        assert!(marks_at_with_set(&t, 0).is_empty());
        assert_eq!(marks_at_with_set(&t, 1), vec!["bold"]);
    }

    #[test]
    fn expand_left_extends_to_new_typing_at_front() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hi");
        t.set_mark_with_rule(0..2, "bold", SpanValue::On, ExpandRule::Left);
        // Insert at front — should be bolded (expand-left).
        t.insert(0, 'X');
        assert_eq!(t.as_string(), "XHi");
        assert_eq!(marks_at_with_set(&t, 0), vec!["bold"]);
    }

    #[test]
    fn expand_both_extends_both_ways() {
        let mut t = Text::new(1);
        t.insert_str(0, "ab");
        t.set_mark_with_rule(0..2, "bold", SpanValue::On, ExpandRule::Both);
        t.insert(0, 'X'); // expand-left
        t.insert(t.len(), 'Y'); // expand-right
        assert_eq!(t.as_string(), "XabY");
        for i in 0..t.len() {
            assert_eq!(marks_at_with_set(&t, i), vec!["bold"], "pos {i}");
        }
    }

    #[test]
    fn anchor_expand_right() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hi");
        // Use an explicit "expand right" anchor: end = Before(End-of-doc effectively).
        // We achieve "expand right" by anchoring end before whatever was at position end.
        // But here range is the whole doc, so use Anchor::End which doesn't expand to
        // future inserts. Demonstrate via set_mark_with_anchors instead.
        let start_id = t.chars_for_test().id_at(0).unwrap();
        let end_id = t.chars_for_test().id_at(1).unwrap();
        // Anchor end on "Before(end_id)" inverts to "After(end_id - 1)" semantically
        // — actually, Before(last char) means span ends BEFORE the last char.
        // Hmm let me redo: for "expand right" we want end to stick to the right of
        // last char in a way that includes future-inserted right neighbors.
        // The trick: use AnchorSide::Before relative to a successor. But there's no
        // "next" char yet. So we use Anchor::End — but that's what set_mark default
        // gives when range.end == len, which DOESN'T expand.
        //
        // For real expand-right behavior: anchor end to a phantom position is hard
        // without a sentinel char. Skip explicit demonstration; verify the API works.
        let _ = (start_id, end_id);
        t.set_mark(0..2, "bold", true);
        t.insert(2, '!');
        // With our default anchors, '!' is NOT bold (span ends After 'i').
        assert!(marks_at(&t, 2).is_empty());
    }

    // -----------------------------------------------------------------
    // Grapheme handling
    // -----------------------------------------------------------------

    #[test]
    fn grapheme_count_for_emoji_family() {
        let mut t = Text::new(1);
        // 👨‍👩‍👧 is one grapheme but 5 chars (👨, ZWJ, 👩, ZWJ, 👧).
        t.insert_str(0, "Hi 👨‍👩‍👧!");
        assert_eq!(t.len(), 9); // 'H','i',' ',5 chars of family,'!'
        assert_eq!(t.grapheme_count(), 5); // 'H','i',' ','👨‍👩‍👧','!'
    }

    #[test]
    fn grapheme_pos_conversion_round_trip() {
        let mut t = Text::new(1);
        t.insert_str(0, "a👨‍👩‍👧b");
        // Graphemes: 'a'(0), '👨‍👩‍👧'(1), 'b'(2)
        assert_eq!(t.grapheme_count(), 3);
        assert_eq!(t.grapheme_to_char_pos(0), 0);
        assert_eq!(t.grapheme_to_char_pos(1), 1);
        assert_eq!(t.grapheme_to_char_pos(2), 6);
        assert_eq!(t.char_to_grapheme_pos(0), 0);
        assert_eq!(t.char_to_grapheme_pos(1), 1);
        assert_eq!(t.char_to_grapheme_pos(6), 2);
    }

    #[test]
    fn insert_grapheme_str_at_grapheme_position() {
        let mut t = Text::new(1);
        t.insert_str(0, "a👨‍👩‍👧b");
        // Insert "Z" between 👨‍👩‍👧 and 'b' (grapheme position 2).
        t.insert_grapheme_str(2, "Z");
        assert_eq!(t.as_string(), "a👨‍👩‍👧Zb");
        assert_eq!(t.grapheme_count(), 4);
    }

    #[test]
    fn delete_grapheme_removes_whole_emoji() {
        let mut t = Text::new(1);
        t.insert_str(0, "a👨‍👩‍👧b");
        // Delete the family emoji as a single grapheme.
        t.delete_grapheme(1);
        assert_eq!(t.as_string(), "ab");
        assert_eq!(t.grapheme_count(), 2);
    }

    // -----------------------------------------------------------------
    // Quill / Yjs Delta interop
    // -----------------------------------------------------------------

    #[test]
    fn delta_round_trip_plain_text() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hello, world!");
        let delta = t.to_delta();
        assert_eq!(delta.len(), 1);
        assert_eq!(delta[0].insert, "Hello, world!");
        assert!(delta[0].attributes.is_empty());

        let restored = Text::from_delta(2, &delta);
        assert_eq!(restored.as_string(), "Hello, world!");
    }

    #[test]
    fn delta_round_trip_with_marks() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hello world");
        t.set_mark(0..5, "bold", true);
        t.set_mark(6..11, "italic", true);

        let delta = t.to_delta();
        // Three runs: "Hello"[bold], " ", "world"[italic]
        assert_eq!(delta.len(), 3);
        assert_eq!(delta[0].insert, "Hello");
        assert_eq!(delta[0].attributes.len(), 1);
        assert!(matches!(
            delta[0].attributes.get("bold"),
            Some(AttrValue::Bool(true))
        ));
        assert_eq!(delta[1].insert, " ");
        assert!(delta[1].attributes.is_empty());
        assert_eq!(delta[2].insert, "world");
        assert!(matches!(
            delta[2].attributes.get("italic"),
            Some(AttrValue::Bool(true))
        ));

        // Round trip: from_delta then back.
        let restored = Text::from_delta(2, &delta);
        let delta2 = restored.to_delta();
        assert_eq!(delta, delta2);
        assert_eq!(restored.as_string(), "Hello world");
    }

    #[test]
    fn delta_with_valued_marks() {
        let mut t = Text::new(1);
        t.insert_str(0, "Click here");
        t.set_value_mark(6..10, "href", Some("https://example.com"));

        let delta = t.to_delta();
        let last = delta.last().unwrap();
        assert_eq!(last.insert, "here");
        assert_eq!(
            last.attributes.get("href"),
            Some(&AttrValue::String("https://example.com".to_string()))
        );

        let restored = Text::from_delta(2, &delta);
        let restored_delta = restored.to_delta();
        assert_eq!(delta, restored_delta);
    }

    #[test]
    fn delta_overlapping_marks() {
        let mut t = Text::new(1);
        t.insert_str(0, "Hello");
        t.set_mark(0..5, "bold", true);
        t.set_mark(2..5, "italic", true);
        // Expected runs:
        //   "He"  → bold
        //   "llo" → bold + italic
        let delta = t.to_delta();
        assert_eq!(delta.len(), 2);
        assert_eq!(delta[0].insert, "He");
        assert_eq!(delta[0].attributes.len(), 1);
        assert_eq!(delta[1].insert, "llo");
        assert_eq!(delta[1].attributes.len(), 2);
    }
}