Skip to main content

agent_doc/
crdt.rs

1//! # Module: crdt
2//!
3//! ## Spec
4//! - `CrdtDoc::from_text(content)`: construct a new Yrs document pre-populated with text.
5//! - `CrdtDoc::to_text()`: extract current UTF-8 text from the document.
6//! - `CrdtDoc::apply_edit(offset, delete_len, insert)`: apply a local delta edit (delete then
7//!   insert at byte offset) within a single transaction.
8//! - `CrdtDoc::encode_state()`: serialize full document state as a v1 update byte vector for
9//!   persistence or cross-doc merge.
10//! - `CrdtDoc::decode_state(bytes)`: deserialize a previously encoded state into a new doc.
11//! - `merge(base_state, ours_text, theirs_text)`: three-way CRDT merge.
12//!   1. Decodes base state (or starts from empty if `None`).
13//!   2. Detects stale base: if base shares <50% common prefix with both sides, replaces base
14//!      with `ours_text` to prevent duplicate insertions.
15//!   3. Advances base to the line-snapped common prefix of ours and theirs when that prefix
16//!      extends beyond the current base — prevents duplication of shared new content.
17//!   4. Computes `similar`-based line diffs from base → ours and base → theirs.
18//!   5. Replays diffs onto two independent Yrs docs (agent=client_id 1, user=client_id 2).
19//!   6. Merges by applying theirs' incremental update into the agent doc.
20//!   7. Runs `dedup_adjacent_blocks` to remove identical adjacent paragraph-level blocks.
21//!   8. Returns the merged string (always conflict-free).
22//! - `dedup_adjacent_blocks(text)`: removes duplicate adjacent blocks (separated by `\n\n`)
23//!   where a block has ≥2 non-empty lines, to clean up CRDT double-insertion artifacts.
24//! - `compact(state)`: decode then re-encode a CRDT state to GC tombstones.
25//!
26//! ## Agentic Contracts
27//! - `merge` is always conflict-free — it never produces conflict markers.
28//! - Agent content (client_id 1) appears before user content (client_id 2) at identical
29//!   insertion points due to Yrs' deterministic client-ID ordering.
30//! - Short-circuit: if `ours_text == theirs_text`, `merge` returns immediately without any
31//!   CRDT operations.
32//! - Stale base detection prevents duplicate insertions across multiple merge cycles.
33//! - Shared common prefix (line-boundary snapped) is never inserted twice.
34//! - `dedup_adjacent_blocks` only removes blocks with ≥2 non-empty lines; single-line
35//!   repeated content (e.g., `---`) is not deduplicated to avoid false positives.
36//! - `encode_state` / `decode_state` are inverse operations: round-trip preserves all text.
37//! - `compact` is idempotent: compacting already-compact state is a no-op in terms of text.
38//!
39//! ## Evals
40//! - `roundtrip_text`: `from_text(s).to_text() == s` for arbitrary content.
41//! - `encode_decode_roundtrip`: encode then decode preserves text exactly.
42//! - `apply_edit_insert`: insert at offset 0 prepends correctly.
43//! - `apply_edit_delete`: delete range removes exact byte count.
44//! - `apply_edit_replace`: delete + insert at same offset replaces content.
45//! - `merge_both_append`: both sides add different text → both present, no conflict.
46//! - `merge_agent_ordering`: agent and user insert at same position → agent content first.
47//! - `merge_identical_sides`: ours == theirs → short-circuit, result equals either side.
48//! - `merge_no_base_state`: `None` base → valid merged result, both sides present.
49//! - `merge_stale_base_no_duplicates`: base is stale (< 50% common prefix) → no duplication.
50//! - `merge_shared_prefix_no_duplication`: ours and theirs share new content beyond base →
51//!   shared content appears exactly once.
52//! - `dedup_adjacent_blocks_removes_duplicate`: two identical adjacent multi-line blocks →
53//!   deduplicated to one.
54//! - `dedup_adjacent_blocks_preserves_short`: single-line repeated blocks left intact.
55//! - `compact_preserves_text`: compact state → decoded text unchanged.
56
57use anyhow::{Context, Result};
58use yrs::updates::decoder::Decode;
59use yrs::{Doc, GetString, ReadTxn, Text, TextRef, Transact, Update};
60
61const TEXT_KEY: &str = "content";
62
63/// CRDT document wrapping a Yjs `Doc` for conflict-free merging.
64pub struct CrdtDoc {
65    doc: Doc,
66}
67
68impl CrdtDoc {
69    /// Create a new CRDT document initialized with the given text content.
70    pub fn from_text(content: &str) -> Self {
71        let doc = Doc::new();
72        let text = doc.get_or_insert_text(TEXT_KEY);
73        let mut txn = doc.transact_mut();
74        text.insert(&mut txn, 0, content);
75        drop(txn);
76        CrdtDoc { doc }
77    }
78
79    /// Extract the current text content from the CRDT document.
80    pub fn to_text(&self) -> String {
81        let text = self.doc.get_or_insert_text(TEXT_KEY);
82        let txn = self.doc.transact();
83        text.get_string(&txn)
84    }
85
86    /// Apply a local edit: delete `delete_len` chars at `offset`, then insert `insert` there.
87    #[allow(dead_code)] // Used in tests and Phase 4 stream write-back
88    pub fn apply_edit(&self, offset: u32, delete_len: u32, insert: &str) {
89        let text = self.doc.get_or_insert_text(TEXT_KEY);
90        let mut txn = self.doc.transact_mut();
91        if delete_len > 0 {
92            text.remove_range(&mut txn, offset, delete_len);
93        }
94        if !insert.is_empty() {
95            text.insert(&mut txn, offset, insert);
96        }
97    }
98
99    /// Encode the full document state (for persistence).
100    pub fn encode_state(&self) -> Vec<u8> {
101        let txn = self.doc.transact();
102        txn.encode_state_as_update_v1(&yrs::StateVector::default())
103    }
104
105    /// Decode a previously encoded state into a new CrdtDoc.
106    pub fn decode_state(bytes: &[u8]) -> Result<Self> {
107        let doc = Doc::new();
108        let update = Update::decode_v1(bytes)
109            .map_err(|e| anyhow::anyhow!("failed to decode CRDT state: {}", e))?;
110        let mut txn = doc.transact_mut();
111        txn.apply_update(update)
112            .map_err(|e| anyhow::anyhow!("failed to apply CRDT update: {}", e))?;
113        drop(txn);
114        Ok(CrdtDoc { doc })
115    }
116}
117
118/// Merge two concurrent text versions against a common base using CRDT.
119///
120/// Creates three CRDT actors: base, ours, theirs.
121/// Applies each side's edits as diffs from the base, then merges updates.
122/// Returns the merged text (conflict-free).
123///
124/// **Stale base detection:** If the CRDT base text doesn't match either ours
125/// or theirs as a prefix/substring, the base is stale. In that case, we use
126/// `ours_text` as the base to prevent duplicate insertions.
127pub fn merge(base_state: Option<&[u8]>, ours_text: &str, theirs_text: &str) -> Result<String> {
128    // Short-circuit: if both sides are identical, no merge needed
129    if ours_text == theirs_text {
130        eprintln!("[crdt] ours == theirs, skipping merge");
131        return Ok(ours_text.to_string());
132    }
133
134    // Bootstrap base doc from state or empty
135    let base_doc = if let Some(bytes) = base_state {
136        CrdtDoc::decode_state(bytes)
137            .context("failed to decode base CRDT state")?
138    } else {
139        CrdtDoc::from_text("")
140    };
141    let mut base_text = base_doc.to_text();
142
143    eprintln!(
144        "[crdt] merge: base_len={} ours_len={} theirs_len={}",
145        base_text.len(),
146        ours_text.len(),
147        theirs_text.len()
148    );
149
150    // Stale base detection: if the base text doesn't share a common prefix
151    // with both sides, it's stale. Use ours as the base instead.
152    // This prevents duplicate insertions when both sides contain text
153    // that the stale base doesn't have.
154    let ours_common = common_prefix_len(&base_text, ours_text);
155    let theirs_common = common_prefix_len(&base_text, theirs_text);
156    let base_len = base_text.len();
157
158    if base_len > 0
159        && (ours_common as f64 / base_len as f64) < 0.5
160        && (theirs_common as f64 / base_len as f64) < 0.5
161    {
162        eprintln!(
163            "[crdt] Stale CRDT base detected (common prefix: ours={}%, theirs={}%). Using ours as base.",
164            (ours_common * 100) / base_len,
165            (theirs_common * 100) / base_len
166        );
167        base_text = ours_text.to_string();
168    }
169
170    // Advance base to the common prefix of ours and theirs when it extends
171    // beyond the current base.
172    //
173    // When both ours and theirs independently added the same text beyond the
174    // stale base (e.g., both contain a user prompt that the base doesn't have),
175    // the CRDT treats each insertion as independent and includes both, causing
176    // duplication. Fix: use the common prefix of ours and theirs as the effective
177    // base, so shared additions are not treated as independent insertions.
178    //
179    // This handles the common pattern where:
180    //   base   = "old content"
181    //   ours   = "old content + user prompt + agent response"
182    //   theirs = "old content + user prompt + small edit"
183    // Without fix: user prompt appears twice (from both sides).
184    // With fix: base advances to "old content + user prompt", ours' diff is
185    //           just the agent response, theirs' diff is just the small edit.
186    let mutual_prefix = common_prefix_len(ours_text, theirs_text);
187    if mutual_prefix > base_text.len() {
188        // Snap to a line boundary to avoid splitting mid-line/mid-word.
189        // Without this, the shared prefix can include partial formatting
190        // sequences (e.g., a leading `*` from `**bold**`), causing the
191        // CRDT merge to separate that character from the rest of the
192        // formatting, producing garbled text like `*Soft-bristle brush only**`
193        // instead of `**Soft-bristle brush only**`.
194        let snap = &ours_text[..mutual_prefix];
195        let snapped = match snap.rfind('\n') {
196            Some(pos) if pos >= base_text.len() => pos + 1,
197            _ => base_text.len(), // no suitable line boundary — don't advance
198        };
199        if snapped > base_text.len() {
200            eprintln!(
201                "[crdt] Advancing base to shared prefix (base_len={} → {})",
202                base_text.len(),
203                snapped
204            );
205            base_text = ours_text[..snapped].to_string();
206        }
207    }
208
209    // Compute diffs from base to each side
210    let ours_ops = compute_edit_ops(&base_text, ours_text);
211    let theirs_ops = compute_edit_ops(&base_text, theirs_text);
212
213    // Create two independent docs from the base state.
214    // If base was overridden (stale detection), rebuild from the new base_text.
215    let base_encoded = if base_text == base_doc.to_text() {
216        base_doc.encode_state()
217    } else {
218        CrdtDoc::from_text(&base_text).encode_state()
219    };
220
221    // Agent gets lower client ID (1) so Yrs natively places agent content
222    // BEFORE human content when both insert at the same position.
223    // Yrs orders concurrent inserts by client ID: lower client ID goes first.
224    let ours_doc = Doc::with_client_id(1);
225    {
226        let update = Update::decode_v1(&base_encoded)
227            .map_err(|e| anyhow::anyhow!("decode error: {}", e))?;
228        let mut txn = ours_doc.transact_mut();
229        txn.apply_update(update)
230            .map_err(|e| anyhow::anyhow!("apply error: {}", e))?;
231    }
232
233    let theirs_doc = Doc::with_client_id(2);
234    {
235        let update = Update::decode_v1(&base_encoded)
236            .map_err(|e| anyhow::anyhow!("decode error: {}", e))?;
237        let mut txn = theirs_doc.transact_mut();
238        txn.apply_update(update)
239            .map_err(|e| anyhow::anyhow!("apply error: {}", e))?;
240    }
241
242    // Apply ours edits
243    {
244        let text = ours_doc.get_or_insert_text(TEXT_KEY);
245        let mut txn = ours_doc.transact_mut();
246        apply_ops(&text, &mut txn, &ours_ops);
247    }
248
249    // Apply theirs edits
250    {
251        let text = theirs_doc.get_or_insert_text(TEXT_KEY);
252        let mut txn = theirs_doc.transact_mut();
253        apply_ops(&text, &mut txn, &theirs_ops);
254    }
255
256    // Merge: apply theirs' changes into ours
257    let ours_sv = {
258        let txn = ours_doc.transact();
259        txn.state_vector()
260    };
261    let theirs_update = {
262        let txn = theirs_doc.transact();
263        txn.encode_state_as_update_v1(&ours_sv)
264    };
265    {
266        let update = Update::decode_v1(&theirs_update)
267            .map_err(|e| anyhow::anyhow!("decode error: {}", e))?;
268        let mut txn = ours_doc.transact_mut();
269        txn.apply_update(update)
270            .map_err(|e| anyhow::anyhow!("apply error: {}", e))?;
271    }
272
273    // Read merged result. With agent=client_id(2) and human=client_id(1),
274    // Yrs natively places agent content before human content at the same
275    // insertion point. No post-merge reorder needed.
276    let merged = {
277        let text = ours_doc.get_or_insert_text(TEXT_KEY);
278        let txn = ours_doc.transact();
279        text.get_string(&txn)
280    };
281
282    // Post-merge dedup: remove identical adjacent blocks (#15)
283    Ok(dedup_adjacent_blocks(&merged))
284}
285
286/// Remove identical adjacent text blocks separated by blank lines.
287///
288/// After a CRDT merge, both sides may independently append the same content
289/// (e.g., a `### Re:` section), resulting in duplicate adjacent blocks.
290/// This pass identifies and removes duplicates while preserving intentionally
291/// repeated content (only dedup blocks >= 2 non-empty lines to avoid
292/// false positives on short repeated lines like `---` or blank lines).
293pub fn dedup_adjacent_blocks(text: &str) -> String {
294    let blocks: Vec<&str> = text.split("\n\n").collect();
295    if blocks.len() < 2 {
296        return text.to_string();
297    }
298
299    let mut result: Vec<&str> = Vec::with_capacity(blocks.len());
300    for block in &blocks {
301        let trimmed = block.trim();
302        // Only dedup substantial blocks (>= 2 non-empty lines)
303        let non_empty_lines = trimmed.lines().filter(|l| !l.trim().is_empty()).count();
304        if non_empty_lines >= 2
305            && let Some(prev) = result.last()
306            && prev.trim() == trimmed
307        {
308            eprintln!("[crdt] dedup: removed duplicate block ({} lines)", non_empty_lines);
309            continue;
310        }
311        result.push(*block);
312    }
313
314    result.join("\n\n")
315}
316
317/// Compact a CRDT state by re-encoding (GC tombstones where possible).
318pub fn compact(state: &[u8]) -> Result<Vec<u8>> {
319    let doc = CrdtDoc::decode_state(state)?;
320    Ok(doc.encode_state())
321}
322
323/// Count the number of bytes in the common prefix of two strings.
324fn common_prefix_len(a: &str, b: &str) -> usize {
325    a.bytes().zip(b.bytes()).take_while(|(x, y)| x == y).count()
326}
327
328/// Edit operation for replaying diffs onto a CRDT text.
329#[derive(Debug)]
330enum EditOp {
331    Retain(u32),
332    Delete(u32),
333    Insert(String),
334}
335
336/// Compute edit operations to transform `from` into `to` using `similar` diff.
337fn compute_edit_ops(from: &str, to: &str) -> Vec<EditOp> {
338    use similar::{ChangeTag, TextDiff};
339
340    let diff = TextDiff::from_lines(from, to);
341    let mut ops = Vec::new();
342
343    for change in diff.iter_all_changes() {
344        match change.tag() {
345            ChangeTag::Equal => {
346                let len = change.value().len() as u32;
347                if let Some(EditOp::Retain(n)) = ops.last_mut() {
348                    *n += len;
349                } else {
350                    ops.push(EditOp::Retain(len));
351                }
352            }
353            ChangeTag::Delete => {
354                let len = change.value().len() as u32;
355                if let Some(EditOp::Delete(n)) = ops.last_mut() {
356                    *n += len;
357                } else {
358                    ops.push(EditOp::Delete(len));
359                }
360            }
361            ChangeTag::Insert => {
362                let s = change.value().to_string();
363                if let Some(EditOp::Insert(existing)) = ops.last_mut() {
364                    existing.push_str(&s);
365                } else {
366                    ops.push(EditOp::Insert(s));
367                }
368            }
369        }
370    }
371
372    ops
373}
374
375/// Apply edit operations to a Yrs text type within a transaction.
376fn apply_ops(text: &TextRef, txn: &mut yrs::TransactionMut<'_>, ops: &[EditOp]) {
377    let mut cursor: u32 = 0;
378    for op in ops {
379        match op {
380            EditOp::Retain(n) => cursor += n,
381            EditOp::Delete(n) => {
382                text.remove_range(txn, cursor, *n);
383                // cursor stays — content shifted left
384            }
385            EditOp::Insert(s) => {
386                text.insert(txn, cursor, s);
387                cursor += s.len() as u32;
388            }
389        }
390    }
391}
392
393#[cfg(test)]
394mod tests {
395    use super::*;
396
397    #[test]
398    fn roundtrip_text() {
399        let content = "Hello, world!\nLine two.\n";
400        let doc = CrdtDoc::from_text(content);
401        assert_eq!(doc.to_text(), content);
402    }
403
404    #[test]
405    fn roundtrip_encode_decode() {
406        let content = "Some document content.\n";
407        let doc = CrdtDoc::from_text(content);
408        let encoded = doc.encode_state();
409        let decoded = CrdtDoc::decode_state(&encoded).unwrap();
410        assert_eq!(decoded.to_text(), content);
411    }
412
413    #[test]
414    fn apply_edit_insert() {
415        let doc = CrdtDoc::from_text("Hello world");
416        doc.apply_edit(5, 0, ",");
417        assert_eq!(doc.to_text(), "Hello, world");
418    }
419
420    #[test]
421    fn apply_edit_delete() {
422        let doc = CrdtDoc::from_text("Hello, world");
423        doc.apply_edit(5, 1, "");
424        assert_eq!(doc.to_text(), "Hello world");
425    }
426
427    #[test]
428    fn apply_edit_replace() {
429        let doc = CrdtDoc::from_text("Hello world");
430        doc.apply_edit(6, 5, "Rust");
431        assert_eq!(doc.to_text(), "Hello Rust");
432    }
433
434    #[test]
435    fn concurrent_append_merge_no_conflict() {
436        let base = "# Document\n\nBase content.\n";
437        let base_doc = CrdtDoc::from_text(base);
438        let base_state = base_doc.encode_state();
439
440        let ours = format!("{base}## Agent\n\nAgent response.\n");
441        let theirs = format!("{base}## User\n\nUser addition.\n");
442
443        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
444
445        // Both additions should be present
446        assert!(merged.contains("Agent response."), "missing agent text");
447        assert!(merged.contains("User addition."), "missing user text");
448        assert!(merged.contains("Base content."), "missing base text");
449        // No conflict markers
450        assert!(!merged.contains("<<<<<<<"));
451        assert!(!merged.contains(">>>>>>>"));
452    }
453
454    #[test]
455    fn concurrent_insert_same_position() {
456        let base = "Line 1\nLine 3\n";
457        let base_doc = CrdtDoc::from_text(base);
458        let base_state = base_doc.encode_state();
459
460        let ours = "Line 1\nAgent line\nLine 3\n";
461        let theirs = "Line 1\nUser line\nLine 3\n";
462
463        let merged = merge(Some(&base_state), ours, theirs).unwrap();
464
465        // Both insertions preserved, no conflict
466        assert!(merged.contains("Agent line"), "missing agent insertion");
467        assert!(merged.contains("User line"), "missing user insertion");
468        assert!(merged.contains("Line 1"), "missing line 1");
469        assert!(merged.contains("Line 3"), "missing line 3");
470    }
471
472    #[test]
473    fn merge_no_base_state() {
474        // When no base state exists, bootstrap from empty
475        let ours = "Agent wrote this.\n";
476        let theirs = "User wrote this.\n";
477
478        let merged = merge(None, ours, theirs).unwrap();
479
480        assert!(merged.contains("Agent wrote this."));
481        assert!(merged.contains("User wrote this."));
482    }
483
484    #[test]
485    fn compact_preserves_content() {
486        let doc = CrdtDoc::from_text("Hello");
487        doc.apply_edit(5, 0, " world");
488        doc.apply_edit(11, 0, "!");
489
490        let state = doc.encode_state();
491        let compacted = compact(&state).unwrap();
492        let restored = CrdtDoc::decode_state(&compacted).unwrap();
493
494        assert_eq!(restored.to_text(), "Hello world!");
495        assert!(compacted.len() <= state.len());
496    }
497
498    #[test]
499    fn compact_reduces_size_after_edits() {
500        let doc = CrdtDoc::from_text("aaaa");
501        // Many small edits to build up tombstones
502        for i in 0..20 {
503            let c = ((b'a' + (i % 26)) as char).to_string();
504            doc.apply_edit(0, 1, &c);
505        }
506        let state = doc.encode_state();
507        let compacted = compact(&state).unwrap();
508        let restored = CrdtDoc::decode_state(&compacted).unwrap();
509        assert_eq!(restored.to_text(), doc.to_text());
510    }
511
512    #[test]
513    fn empty_document() {
514        let doc = CrdtDoc::from_text("");
515        assert_eq!(doc.to_text(), "");
516
517        let encoded = doc.encode_state();
518        let decoded = CrdtDoc::decode_state(&encoded).unwrap();
519        assert_eq!(decoded.to_text(), "");
520    }
521
522    #[test]
523    fn decode_invalid_bytes_errors() {
524        let result = CrdtDoc::decode_state(&[0xff, 0xfe, 0xfd]);
525        assert!(result.is_err());
526    }
527
528    #[test]
529    fn merge_identical_texts() {
530        let base = "Same content.\n";
531        let base_doc = CrdtDoc::from_text(base);
532        let state = base_doc.encode_state();
533
534        let merged = merge(Some(&state), base, base).unwrap();
535        assert_eq!(merged, base);
536    }
537
538    #[test]
539    fn merge_one_side_unchanged() {
540        let base = "Original.\n";
541        let base_doc = CrdtDoc::from_text(base);
542        let state = base_doc.encode_state();
543
544        let ours = "Original.\nAgent added.\n";
545        let merged = merge(Some(&state), ours, base).unwrap();
546        assert_eq!(merged, ours);
547    }
548
549    /// Regression test: CRDT merge should not duplicate user prompt when both
550    /// ours and theirs contain the same text added since the base state.
551    ///
552    /// Scenario (brookebrodack-dev.md duplication bug):
553    /// 1. CRDT base = exchange content from a previous cycle (no user prompt)
554    /// 2. User adds prompt to exchange → saved as baseline
555    /// 3. Agent generates response, content_ours = baseline + response (has user prompt)
556    /// 4. User makes a small edit during response generation → content_current (has user prompt too)
557    /// 5. CRDT merge: both ours and theirs have the user prompt relative to stale base
558    /// 6. BUG: user prompt appears twice in merged output
559    #[test]
560    fn merge_stale_base_no_duplicate_user_prompt() {
561        // CRDT base from a previous cycle — does NOT have the user's current prompt
562        let base_content = "\
563## Assistant
564
565Previous response content.
566
567Committed and pushed.
568
569";
570        let base_doc = CrdtDoc::from_text(base_content);
571        let base_state = base_doc.encode_state();
572
573        // User adds prompt after base was saved
574        let user_prompt = "\
575Opening a video a shows video a.
576Closing video a then opening video b start video b but video b is hidden.
577Closing video b then reopening video b starts and shows video b. video b is visible.
578";
579
580        // content_ours: base + user prompt + agent response (from run_stream with full exchange)
581        let ours = format!("\
582{}{}### Re: Close A → Open B still hidden
583
584Added explicit height and visibility reset.
585
586Committed and pushed.
587
588", base_content, user_prompt);
589
590        // content_current: base + user prompt + minor user edit (e.g., added a blank line)
591        let theirs = format!("\
592{}{}
593", base_content, user_prompt);
594
595        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
596
597        // User prompt should appear exactly ONCE
598        let prompt_count = merged.matches("Opening a video a shows video a.").count();
599        assert_eq!(
600            prompt_count, 1,
601            "User prompt duplicated! Appeared {} times in:\n{}",
602            prompt_count, merged
603        );
604
605        // Agent response should be present
606        assert!(
607            merged.contains("### Re: Close A → Open B still hidden"),
608            "Agent response missing from merge:\n{}", merged
609        );
610    }
611
612    /// Regression test: When CRDT base is stale and both sides added the same text
613    /// at the same position, the merge should not duplicate it.
614    #[test]
615    fn merge_stale_base_same_insertion_both_sides() {
616        let base_content = "Line 1\nLine 2\n";
617        let base_doc = CrdtDoc::from_text(base_content);
618        let base_state = base_doc.encode_state();
619
620        // Both sides added the same text (user prompt) + ours adds more
621        let shared_addition = "User typed this.\n";
622        let ours = format!("{}{}Agent response.\n", base_content, shared_addition);
623        let theirs = format!("{}{}", base_content, shared_addition);
624
625        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
626
627        let count = merged.matches("User typed this.").count();
628        assert_eq!(
629            count, 1,
630            "Shared text duplicated! Appeared {} times in:\n{}",
631            count, merged
632        );
633        assert!(merged.contains("Agent response."), "Agent text missing:\n{}", merged);
634    }
635
636    /// Regression test: Character-level interleaving bug.
637    ///
638    /// When the user types in their editor while the agent is streaming,
639    /// both sides insert text at the same position relative to the base.
640    /// The CRDT base advancement logic used to snap to the shared prefix
641    /// of ours/theirs, which could land mid-line on a shared formatting
642    /// character (e.g., `*` from `*bold*` and `**bold**`). This caused
643    /// the formatting character to be absorbed into the base, splitting
644    /// it from the rest of the formatting sequence and producing garbled
645    /// text like `*Soft-bristle brush only**` instead of
646    /// `**Soft-bristle brush only**`.
647    ///
648    /// The fix: always snap the advanced base to a line boundary. If no
649    /// suitable line boundary exists after the current base length, don't
650    /// advance at all.
651    #[test]
652    fn merge_no_character_interleaving() {
653        // Base: a document with some existing content
654        let base = "# Doc\n\nPrevious content.\n\n";
655        let base_doc = CrdtDoc::from_text(base);
656        let base_state = base_doc.encode_state();
657
658        // Agent adds a response
659        let ours = "# Doc\n\nPrevious content.\n\n*Compacted. Content archived to*\n";
660        // User types something in their editor at the same position
661        let theirs = "# Doc\n\nPrevious content.\n\n**Soft-bristle brush only**\n";
662
663        let merged = merge(Some(&base_state), ours, theirs).unwrap();
664
665        // Both texts should be present as contiguous blocks, not interleaved
666        assert!(
667            merged.contains("*Compacted. Content archived to*"),
668            "Agent text should be contiguous (not interleaved). Got:\n{}",
669            merged
670        );
671        assert!(
672            merged.contains("**Soft-bristle brush only**"),
673            "User text should be contiguous (not interleaved). Got:\n{}",
674            merged
675        );
676    }
677
678    /// Regression test: Concurrent edits within the same line should not
679    /// produce character-level interleaving.
680    #[test]
681    fn merge_concurrent_same_line_no_garbling() {
682        let base = "Some base text\n";
683        let base_doc = CrdtDoc::from_text(base);
684        let base_state = base_doc.encode_state();
685
686        // Both sides replace the line with different content
687        let ours = "Agent wrote this line\n";
688        let theirs = "User wrote different text\n";
689
690        let merged = merge(Some(&base_state), ours, theirs).unwrap();
691
692        // At least one side's text should appear contiguously
693        let has_agent_contiguous = merged.contains("Agent wrote this line");
694        let has_user_contiguous = merged.contains("User wrote different text");
695
696        assert!(
697            has_agent_contiguous || has_user_contiguous,
698            "At least one side should have contiguous text (no char interleaving). Got:\n{}",
699            merged
700        );
701    }
702
703    /// Regression test: Replace-vs-append corruption (lazily-rs.md bug).
704    ///
705    /// Pattern:
706    /// - CRDT base is from a previous cycle (old exchange content)
707    /// - Agent replaces exchange content entirely (template replace mode)
708    /// - User appends new prompt text to exchange during response generation
709    /// - CRDT interleaves agent's new content with user's old + new text,
710    ///   causing mid-word splits like "key de" + [user text] + "cisions"
711    ///
712    /// Root cause: stale CRDT base doesn't match either side well enough
713    /// for prefix advancement, so the CRDT does a raw character-level merge
714    /// of the exchange section, interleaving replace and append operations.
715    ///
716    /// Fix: use baseline (not stored CRDT state) as merge base, so both
717    /// sides' diffs are computed from the exact content they diverged from.
718    #[test]
719    fn merge_replace_vs_append_no_interleaving() {
720        // Full document structure (template mode)
721        let header = "---\nagent_doc_format: template\n---\n\n# Title\n\n<!-- agent:exchange -->\n";
722        let footer = "\n<!-- /agent:exchange -->\n";
723
724        // Previous cycle's exchange content (what the CRDT state contains)
725        let old_exchange = "\
726### Committed, Pushed & Released
727
728**project (v0.1.0):**
729- Committed initial implementation
730- Tagged v0.1.0 and pushed
731
732Add a README.md to the project.
733Also add AGENTS.md with a symlink CLAUDE.md
734
735**sub-project:**
736- Committed fix + SPEC.md
737- Pushed to remote
738";
739        let stale_base = format!("{header}{old_exchange}{footer}");
740        let stale_state = CrdtDoc::from_text(&stale_base).encode_state();
741
742        // Baseline (what the file looked like when response generation started)
743        // Same as stale_base in this case — no user edits between cycles
744        let _baseline = stale_base.clone();
745
746        // Ours: agent replaces exchange content (template replace mode applied)
747        let agent_exchange = "\
748### Done
749
750Added to project and pushed:
751
752- **README.md** — overview, usage, design notes
753- **AGENTS.md** — architecture, key decisions, commands, related projects
754- **CLAUDE.md** → symlink to AGENTS.md
755
756All committed and pushed.
757";
758        let ours = format!("{header}{agent_exchange}{footer}");
759
760        // Theirs: user inserted new prompt IN THE MIDDLE of the exchange section
761        // (after the existing user prompt, before the sub-project sections)
762        // This is the critical difference — insertion within the range that ours deletes
763        let theirs_exchange = "\
764### Committed, Pushed & Released
765
766**project (v0.1.0):**
767- Committed initial implementation
768- Tagged v0.1.0 and pushed
769
770Add a README.md to the project.
771Also add AGENTS.md with a symlink CLAUDE.md
772
773Please add tests.
774Please comprehensively test adherence to the spec.
775
776**sub-project:**
777- Committed fix + SPEC.md
778- Pushed to remote
779";
780        let theirs = format!("{header}{theirs_exchange}{footer}");
781
782        // Using stale CRDT state (previous cycle) — this is what triggers the bug
783        let merged = merge(Some(&stale_state), &ours, &theirs).unwrap();
784
785        // Agent's replacement text should be contiguous (no interleaving)
786        assert!(
787            merged.contains("- **AGENTS.md** — architecture, key decisions, commands, related projects"),
788            "Agent text garbled (mid-word split). Got:\n{}", merged
789        );
790
791        // User's addition should be preserved
792        assert!(
793            merged.contains("Please add tests."),
794            "User addition missing. Got:\n{}", merged
795        );
796
797        // No fragments of old content mixed into agent's new content
798        assert!(
799            !merged.contains("key deAdd") && !merged.contains("key de\n"),
800            "Old content interleaved into agent text. Got:\n{}", merged
801        );
802    }
803
804    /// Same as merge_replace_vs_append_no_interleaving but using baseline
805    /// as CRDT base instead of stale state. This is the fix verification.
806    #[test]
807    fn merge_replace_vs_append_with_baseline_base() {
808        let header = "---\nagent_doc_format: template\n---\n\n# Title\n\n<!-- agent:exchange -->\n";
809        let footer = "\n<!-- /agent:exchange -->\n";
810
811        let old_exchange = "\
812### Previous Response
813
814Old content here.
815
816Add a README.md to the project.
817Also add AGENTS.md with a symlink CLAUDE.md
818";
819        let baseline = format!("{header}{old_exchange}{footer}");
820
821        // Ours: agent replaces exchange
822        let agent_exchange = "\
823### Done
824
825- **README.md** — overview, usage, design notes
826- **AGENTS.md** — architecture, key decisions, commands, related projects
827- **CLAUDE.md** → symlink to AGENTS.md
828
829All committed and pushed.
830";
831        let ours = format!("{header}{agent_exchange}{footer}");
832
833        // Theirs: user appended new prompt
834        let user_addition = "\nPlease add tests.\n";
835        let theirs = format!("{header}{old_exchange}{user_addition}{footer}");
836
837        // Use baseline as CRDT base (the fix)
838        let baseline_state = CrdtDoc::from_text(&baseline).encode_state();
839        let merged = merge(Some(&baseline_state), &ours, &theirs).unwrap();
840
841        // Agent text should be contiguous
842        assert!(
843            merged.contains("key decisions, commands, related projects"),
844            "Agent text garbled. Got:\n{}", merged
845        );
846
847        // User addition preserved
848        assert!(
849            merged.contains("Please add tests."),
850            "User addition missing. Got:\n{}", merged
851        );
852    }
853
854    /// Regression test: Simulates the exact scenario from the bug report.
855    ///
856    /// The agent streams a response into the exchange component while
857    /// the user types in their editor. Both sides share a common prefix
858    /// that includes markdown formatting characters. The CRDT merge must
859    /// preserve formatting integrity for both sides.
860    #[test]
861    fn merge_streaming_concurrent_edit_preserves_formatting() {
862        // Exchange component content after user's initial prompt
863        let base = "commit and push all rappstack packages.\n\n";
864        let base_doc = CrdtDoc::from_text(base);
865        let base_state = base_doc.encode_state();
866
867        // Agent's response (content_ours = user prompt + agent response)
868        let ours = "\
869commit and push all rappstack packages.
870
871### Re: commit and push
872
873*Compacted. Content archived to `docs/`*
874
875Done — all packages pushed.
876";
877
878        // User's concurrent edit (added a note at the bottom)
879        let theirs = "\
880commit and push all rappstack packages.
881
882**Soft-bristle brush only**
883";
884
885        let merged = merge(Some(&base_state), ours, theirs).unwrap();
886
887        // Agent formatting must be intact
888        assert!(
889            merged.contains("*Compacted. Content archived to `docs/`*"),
890            "Agent formatting broken. Got:\n{}",
891            merged
892        );
893        // User formatting must be intact
894        assert!(
895            merged.contains("**Soft-bristle brush only**"),
896            "User formatting broken. Got:\n{}",
897            merged
898        );
899        // No character-level interleaving
900        assert!(
901            !merged.contains("*C*C") && !merged.contains("**Sot"),
902            "Character interleaving detected. Got:\n{}",
903            merged
904        );
905    }
906
907    /// Regression test: Agent replaces multi-line block while user inserts within it.
908    /// With from_chars, this produces ~20 scattered character-level ops that interleave
909    /// with user edits. With from_lines, ops are contiguous line-level blocks.
910    ///
911    /// Uses a template document structure to match the real workflow where the baseline
912    /// (common ancestor) contains the exchange component with original content.
913    #[test]
914    fn merge_replace_vs_insert_no_interleaving() {
915        let header = "---\nagent_doc_format: template\nagent_doc_write: crdt\n---\n\n# Document Title\n\nSome preamble text that both sides share.\nThis provides enough common prefix to avoid stale detection.\n\n<!-- agent:exchange -->\n";
916        let footer = "<!-- /agent:exchange -->\n";
917
918        let old_exchange = "Line one of old content\nLine two of old content\nLine three of old content\n";
919        let baseline = format!("{header}{old_exchange}{footer}");
920        let baseline_doc = CrdtDoc::from_text(&baseline);
921        let baseline_state = baseline_doc.encode_state();
922
923        // Agent replaces exchange with completely new content
924        let agent_exchange = "Completely new line one\nCompletely new line two\nCompletely new line three\nCompletely new line four\n";
925        let ours = format!("{header}{agent_exchange}{footer}");
926
927        // User inserts a line in the middle of the original exchange
928        let theirs = format!("{header}Line one of old content\nUser inserted this line\nLine two of old content\nLine three of old content\n{footer}");
929
930        let merged = merge(Some(&baseline_state), &ours, &theirs).unwrap();
931
932        // Agent text should be contiguous — no mid-word splits
933        assert!(
934            merged.contains("Completely new line one"),
935            "Agent line 1 missing or garbled. Got:\n{}", merged
936        );
937        assert!(
938            merged.contains("Completely new line two"),
939            "Agent line 2 missing or garbled. Got:\n{}", merged
940        );
941
942        // User text should be preserved
943        assert!(
944            merged.contains("User inserted this line"),
945            "User insertion missing. Got:\n{}", merged
946        );
947
948        // No character interleaving (e.g., "Complete" + user text + "ly")
949        assert!(
950            !merged.contains("CompleteUser") && !merged.contains("Complete\nUser"),
951            "Character interleaving detected. Got:\n{}", merged
952        );
953    }
954
955    /// Test: agent content appears before human content when both append
956    /// to the same position.
957    #[test]
958    fn reorder_agent_before_human_at_append_boundary() {
959        let base = "# Document\n\nBase content.\n";
960        let base_doc = CrdtDoc::from_text(base);
961        let base_state = base_doc.encode_state();
962
963        // Agent appends response
964        let ours = format!("{base}### Agent Response\n\nAgent wrote this.\n");
965        // Human appends their own text
966        let theirs = format!("{base}User added this line.\n");
967
968        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
969
970        // Both should be present
971        assert!(merged.contains("Agent wrote this."), "missing agent text");
972        assert!(merged.contains("User added this line."), "missing user text");
973        assert!(merged.contains("Base content."), "missing base text");
974
975        // Agent content should appear before human content
976        let agent_pos = merged.find("Agent wrote this.").unwrap();
977        let human_pos = merged.find("User added this line.").unwrap();
978        assert!(
979            agent_pos < human_pos,
980            "Agent content should appear before human content.\nAgent pos: {}, Human pos: {}\nMerged:\n{}",
981            agent_pos, human_pos, merged
982        );
983    }
984
985    // -----------------------------------------------------------------------
986    // dedup_adjacent_blocks tests (#15)
987    // -----------------------------------------------------------------------
988
989    #[test]
990    fn dedup_removes_identical_adjacent_blocks() {
991        let text = "### Re: Question\nAnswer here.\n\n### Re: Question\nAnswer here.\n\nDifferent block.";
992        let result = dedup_adjacent_blocks(text);
993        assert_eq!(result.matches("### Re: Question").count(), 1);
994        assert!(result.contains("Different block."));
995    }
996
997    #[test]
998    fn dedup_preserves_different_adjacent_blocks() {
999        let text = "### Re: First\nAnswer one.\n\n### Re: Second\nAnswer two.";
1000        let result = dedup_adjacent_blocks(text);
1001        assert!(result.contains("### Re: First"));
1002        assert!(result.contains("### Re: Second"));
1003    }
1004
1005    #[test]
1006    fn dedup_ignores_short_repeated_lines() {
1007        // Single-line blocks like "---" should not be deduped
1008        let text = "---\n\n---\n\nContent.";
1009        let result = dedup_adjacent_blocks(text);
1010        assert_eq!(result, text);
1011    }
1012
1013    #[test]
1014    fn dedup_handles_empty_text() {
1015        assert_eq!(dedup_adjacent_blocks(""), "");
1016    }
1017
1018    #[test]
1019    fn dedup_no_change_when_no_duplicates() {
1020        let text = "Block A\nLine 2.\n\nBlock B\nLine 2.";
1021        let result = dedup_adjacent_blocks(text);
1022        assert_eq!(result, text);
1023    }
1024}