Skip to main content

agent_doc/
crdt.rs

1use anyhow::{Context, Result};
2use yrs::updates::decoder::Decode;
3use yrs::{Doc, GetString, ReadTxn, Text, TextRef, Transact, Update};
4
5const TEXT_KEY: &str = "content";
6
7/// CRDT document wrapping a Yjs `Doc` for conflict-free merging.
8pub struct CrdtDoc {
9    doc: Doc,
10}
11
12impl CrdtDoc {
13    /// Create a new CRDT document initialized with the given text content.
14    pub fn from_text(content: &str) -> Self {
15        let doc = Doc::new();
16        let text = doc.get_or_insert_text(TEXT_KEY);
17        let mut txn = doc.transact_mut();
18        text.insert(&mut txn, 0, content);
19        drop(txn);
20        CrdtDoc { doc }
21    }
22
23    /// Extract the current text content from the CRDT document.
24    pub fn to_text(&self) -> String {
25        let text = self.doc.get_or_insert_text(TEXT_KEY);
26        let txn = self.doc.transact();
27        text.get_string(&txn)
28    }
29
30    /// Apply a local edit: delete `delete_len` chars at `offset`, then insert `insert` there.
31    #[allow(dead_code)] // Used in tests and Phase 4 stream write-back
32    pub fn apply_edit(&self, offset: u32, delete_len: u32, insert: &str) {
33        let text = self.doc.get_or_insert_text(TEXT_KEY);
34        let mut txn = self.doc.transact_mut();
35        if delete_len > 0 {
36            text.remove_range(&mut txn, offset, delete_len);
37        }
38        if !insert.is_empty() {
39            text.insert(&mut txn, offset, insert);
40        }
41    }
42
43    /// Encode the full document state (for persistence).
44    pub fn encode_state(&self) -> Vec<u8> {
45        let txn = self.doc.transact();
46        txn.encode_state_as_update_v1(&yrs::StateVector::default())
47    }
48
49    /// Decode a previously encoded state into a new CrdtDoc.
50    pub fn decode_state(bytes: &[u8]) -> Result<Self> {
51        let doc = Doc::new();
52        let update = Update::decode_v1(bytes)
53            .map_err(|e| anyhow::anyhow!("failed to decode CRDT state: {}", e))?;
54        let mut txn = doc.transact_mut();
55        txn.apply_update(update)
56            .map_err(|e| anyhow::anyhow!("failed to apply CRDT update: {}", e))?;
57        drop(txn);
58        Ok(CrdtDoc { doc })
59    }
60}
61
62/// Merge two concurrent text versions against a common base using CRDT.
63///
64/// Creates three CRDT actors: base, ours, theirs.
65/// Applies each side's edits as diffs from the base, then merges updates.
66/// Returns the merged text (conflict-free).
67///
68/// **Stale base detection:** If the CRDT base text doesn't match either ours
69/// or theirs as a prefix/substring, the base is stale. In that case, we use
70/// `ours_text` as the base to prevent duplicate insertions.
71pub fn merge(base_state: Option<&[u8]>, ours_text: &str, theirs_text: &str) -> Result<String> {
72    // Short-circuit: if both sides are identical, no merge needed
73    if ours_text == theirs_text {
74        eprintln!("[crdt] ours == theirs, skipping merge");
75        return Ok(ours_text.to_string());
76    }
77
78    // Bootstrap base doc from state or empty
79    let base_doc = if let Some(bytes) = base_state {
80        CrdtDoc::decode_state(bytes)
81            .context("failed to decode base CRDT state")?
82    } else {
83        CrdtDoc::from_text("")
84    };
85    let mut base_text = base_doc.to_text();
86
87    eprintln!(
88        "[crdt] merge: base_len={} ours_len={} theirs_len={}",
89        base_text.len(),
90        ours_text.len(),
91        theirs_text.len()
92    );
93
94    // Stale base detection: if the base text doesn't share a common prefix
95    // with both sides, it's stale. Use ours as the base instead.
96    // This prevents duplicate insertions when both sides contain text
97    // that the stale base doesn't have.
98    let ours_common = common_prefix_len(&base_text, ours_text);
99    let theirs_common = common_prefix_len(&base_text, theirs_text);
100    let base_len = base_text.len();
101
102    if base_len > 0
103        && (ours_common as f64 / base_len as f64) < 0.5
104        && (theirs_common as f64 / base_len as f64) < 0.5
105    {
106        eprintln!(
107            "[crdt] Stale CRDT base detected (common prefix: ours={}%, theirs={}%). Using ours as base.",
108            (ours_common * 100) / base_len,
109            (theirs_common * 100) / base_len
110        );
111        base_text = ours_text.to_string();
112    }
113
114    // Advance base to the common prefix of ours and theirs when it extends
115    // beyond the current base.
116    //
117    // When both ours and theirs independently added the same text beyond the
118    // stale base (e.g., both contain a user prompt that the base doesn't have),
119    // the CRDT treats each insertion as independent and includes both, causing
120    // duplication. Fix: use the common prefix of ours and theirs as the effective
121    // base, so shared additions are not treated as independent insertions.
122    //
123    // This handles the common pattern where:
124    //   base   = "old content"
125    //   ours   = "old content + user prompt + agent response"
126    //   theirs = "old content + user prompt + small edit"
127    // Without fix: user prompt appears twice (from both sides).
128    // With fix: base advances to "old content + user prompt", ours' diff is
129    //           just the agent response, theirs' diff is just the small edit.
130    let mutual_prefix = common_prefix_len(ours_text, theirs_text);
131    if mutual_prefix > base_text.len() {
132        // Snap to a line boundary to avoid splitting mid-line/mid-word.
133        // Without this, the shared prefix can include partial formatting
134        // sequences (e.g., a leading `*` from `**bold**`), causing the
135        // CRDT merge to separate that character from the rest of the
136        // formatting, producing garbled text like `*Soft-bristle brush only**`
137        // instead of `**Soft-bristle brush only**`.
138        let snap = &ours_text[..mutual_prefix];
139        let snapped = match snap.rfind('\n') {
140            Some(pos) if pos >= base_text.len() => pos + 1,
141            _ => base_text.len(), // no suitable line boundary — don't advance
142        };
143        if snapped > base_text.len() {
144            eprintln!(
145                "[crdt] Advancing base to shared prefix (base_len={} → {})",
146                base_text.len(),
147                snapped
148            );
149            base_text = ours_text[..snapped].to_string();
150        }
151    }
152
153    // Compute diffs from base to each side
154    let ours_ops = compute_edit_ops(&base_text, ours_text);
155    let theirs_ops = compute_edit_ops(&base_text, theirs_text);
156
157    // Create two independent docs from the base state.
158    // If base was overridden (stale detection), rebuild from the new base_text.
159    let base_encoded = if base_text == base_doc.to_text() {
160        base_doc.encode_state()
161    } else {
162        CrdtDoc::from_text(&base_text).encode_state()
163    };
164
165    // Agent gets lower client ID (1) so Yrs natively places agent content
166    // BEFORE human content when both insert at the same position.
167    // Yrs orders concurrent inserts by client ID: lower client ID goes first.
168    let ours_doc = Doc::with_client_id(1);
169    {
170        let update = Update::decode_v1(&base_encoded)
171            .map_err(|e| anyhow::anyhow!("decode error: {}", e))?;
172        let mut txn = ours_doc.transact_mut();
173        txn.apply_update(update)
174            .map_err(|e| anyhow::anyhow!("apply error: {}", e))?;
175    }
176
177    let theirs_doc = Doc::with_client_id(2);
178    {
179        let update = Update::decode_v1(&base_encoded)
180            .map_err(|e| anyhow::anyhow!("decode error: {}", e))?;
181        let mut txn = theirs_doc.transact_mut();
182        txn.apply_update(update)
183            .map_err(|e| anyhow::anyhow!("apply error: {}", e))?;
184    }
185
186    // Apply ours edits
187    {
188        let text = ours_doc.get_or_insert_text(TEXT_KEY);
189        let mut txn = ours_doc.transact_mut();
190        apply_ops(&text, &mut txn, &ours_ops);
191    }
192
193    // Apply theirs edits
194    {
195        let text = theirs_doc.get_or_insert_text(TEXT_KEY);
196        let mut txn = theirs_doc.transact_mut();
197        apply_ops(&text, &mut txn, &theirs_ops);
198    }
199
200    // Merge: apply theirs' changes into ours
201    let ours_sv = {
202        let txn = ours_doc.transact();
203        txn.state_vector()
204    };
205    let theirs_update = {
206        let txn = theirs_doc.transact();
207        txn.encode_state_as_update_v1(&ours_sv)
208    };
209    {
210        let update = Update::decode_v1(&theirs_update)
211            .map_err(|e| anyhow::anyhow!("decode error: {}", e))?;
212        let mut txn = ours_doc.transact_mut();
213        txn.apply_update(update)
214            .map_err(|e| anyhow::anyhow!("apply error: {}", e))?;
215    }
216
217    // Read merged result. With agent=client_id(2) and human=client_id(1),
218    // Yrs natively places agent content before human content at the same
219    // insertion point. No post-merge reorder needed.
220    let merged = {
221        let text = ours_doc.get_or_insert_text(TEXT_KEY);
222        let txn = ours_doc.transact();
223        text.get_string(&txn)
224    };
225
226    // Post-merge dedup: remove identical adjacent blocks (#15)
227    Ok(dedup_adjacent_blocks(&merged))
228}
229
230/// Remove identical adjacent text blocks separated by blank lines.
231///
232/// After a CRDT merge, both sides may independently append the same content
233/// (e.g., a `### Re:` section), resulting in duplicate adjacent blocks.
234/// This pass identifies and removes duplicates while preserving intentionally
235/// repeated content (only dedup blocks >= 2 non-empty lines to avoid
236/// false positives on short repeated lines like `---` or blank lines).
237pub fn dedup_adjacent_blocks(text: &str) -> String {
238    let blocks: Vec<&str> = text.split("\n\n").collect();
239    if blocks.len() < 2 {
240        return text.to_string();
241    }
242
243    let mut result: Vec<&str> = Vec::with_capacity(blocks.len());
244    for block in &blocks {
245        let trimmed = block.trim();
246        // Only dedup substantial blocks (>= 2 non-empty lines)
247        let non_empty_lines = trimmed.lines().filter(|l| !l.trim().is_empty()).count();
248        if non_empty_lines >= 2
249            && let Some(prev) = result.last()
250            && prev.trim() == trimmed
251        {
252            eprintln!("[crdt] dedup: removed duplicate block ({} lines)", non_empty_lines);
253            continue;
254        }
255        result.push(*block);
256    }
257
258    result.join("\n\n")
259}
260
261/// Compact a CRDT state by re-encoding (GC tombstones where possible).
262pub fn compact(state: &[u8]) -> Result<Vec<u8>> {
263    let doc = CrdtDoc::decode_state(state)?;
264    Ok(doc.encode_state())
265}
266
267/// Count the number of bytes in the common prefix of two strings.
268fn common_prefix_len(a: &str, b: &str) -> usize {
269    a.bytes().zip(b.bytes()).take_while(|(x, y)| x == y).count()
270}
271
272/// Edit operation for replaying diffs onto a CRDT text.
273#[derive(Debug)]
274enum EditOp {
275    Retain(u32),
276    Delete(u32),
277    Insert(String),
278}
279
280/// Compute edit operations to transform `from` into `to` using `similar` diff.
281fn compute_edit_ops(from: &str, to: &str) -> Vec<EditOp> {
282    use similar::{ChangeTag, TextDiff};
283
284    let diff = TextDiff::from_lines(from, to);
285    let mut ops = Vec::new();
286
287    for change in diff.iter_all_changes() {
288        match change.tag() {
289            ChangeTag::Equal => {
290                let len = change.value().len() as u32;
291                if let Some(EditOp::Retain(n)) = ops.last_mut() {
292                    *n += len;
293                } else {
294                    ops.push(EditOp::Retain(len));
295                }
296            }
297            ChangeTag::Delete => {
298                let len = change.value().len() as u32;
299                if let Some(EditOp::Delete(n)) = ops.last_mut() {
300                    *n += len;
301                } else {
302                    ops.push(EditOp::Delete(len));
303                }
304            }
305            ChangeTag::Insert => {
306                let s = change.value().to_string();
307                if let Some(EditOp::Insert(existing)) = ops.last_mut() {
308                    existing.push_str(&s);
309                } else {
310                    ops.push(EditOp::Insert(s));
311                }
312            }
313        }
314    }
315
316    ops
317}
318
319/// Apply edit operations to a Yrs text type within a transaction.
320fn apply_ops(text: &TextRef, txn: &mut yrs::TransactionMut<'_>, ops: &[EditOp]) {
321    let mut cursor: u32 = 0;
322    for op in ops {
323        match op {
324            EditOp::Retain(n) => cursor += n,
325            EditOp::Delete(n) => {
326                text.remove_range(txn, cursor, *n);
327                // cursor stays — content shifted left
328            }
329            EditOp::Insert(s) => {
330                text.insert(txn, cursor, s);
331                cursor += s.len() as u32;
332            }
333        }
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    #[test]
342    fn roundtrip_text() {
343        let content = "Hello, world!\nLine two.\n";
344        let doc = CrdtDoc::from_text(content);
345        assert_eq!(doc.to_text(), content);
346    }
347
348    #[test]
349    fn roundtrip_encode_decode() {
350        let content = "Some document content.\n";
351        let doc = CrdtDoc::from_text(content);
352        let encoded = doc.encode_state();
353        let decoded = CrdtDoc::decode_state(&encoded).unwrap();
354        assert_eq!(decoded.to_text(), content);
355    }
356
357    #[test]
358    fn apply_edit_insert() {
359        let doc = CrdtDoc::from_text("Hello world");
360        doc.apply_edit(5, 0, ",");
361        assert_eq!(doc.to_text(), "Hello, world");
362    }
363
364    #[test]
365    fn apply_edit_delete() {
366        let doc = CrdtDoc::from_text("Hello, world");
367        doc.apply_edit(5, 1, "");
368        assert_eq!(doc.to_text(), "Hello world");
369    }
370
371    #[test]
372    fn apply_edit_replace() {
373        let doc = CrdtDoc::from_text("Hello world");
374        doc.apply_edit(6, 5, "Rust");
375        assert_eq!(doc.to_text(), "Hello Rust");
376    }
377
378    #[test]
379    fn concurrent_append_merge_no_conflict() {
380        let base = "# Document\n\nBase content.\n";
381        let base_doc = CrdtDoc::from_text(base);
382        let base_state = base_doc.encode_state();
383
384        let ours = format!("{base}## Agent\n\nAgent response.\n");
385        let theirs = format!("{base}## User\n\nUser addition.\n");
386
387        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
388
389        // Both additions should be present
390        assert!(merged.contains("Agent response."), "missing agent text");
391        assert!(merged.contains("User addition."), "missing user text");
392        assert!(merged.contains("Base content."), "missing base text");
393        // No conflict markers
394        assert!(!merged.contains("<<<<<<<"));
395        assert!(!merged.contains(">>>>>>>"));
396    }
397
398    #[test]
399    fn concurrent_insert_same_position() {
400        let base = "Line 1\nLine 3\n";
401        let base_doc = CrdtDoc::from_text(base);
402        let base_state = base_doc.encode_state();
403
404        let ours = "Line 1\nAgent line\nLine 3\n";
405        let theirs = "Line 1\nUser line\nLine 3\n";
406
407        let merged = merge(Some(&base_state), ours, theirs).unwrap();
408
409        // Both insertions preserved, no conflict
410        assert!(merged.contains("Agent line"), "missing agent insertion");
411        assert!(merged.contains("User line"), "missing user insertion");
412        assert!(merged.contains("Line 1"), "missing line 1");
413        assert!(merged.contains("Line 3"), "missing line 3");
414    }
415
416    #[test]
417    fn merge_no_base_state() {
418        // When no base state exists, bootstrap from empty
419        let ours = "Agent wrote this.\n";
420        let theirs = "User wrote this.\n";
421
422        let merged = merge(None, ours, theirs).unwrap();
423
424        assert!(merged.contains("Agent wrote this."));
425        assert!(merged.contains("User wrote this."));
426    }
427
428    #[test]
429    fn compact_preserves_content() {
430        let doc = CrdtDoc::from_text("Hello");
431        doc.apply_edit(5, 0, " world");
432        doc.apply_edit(11, 0, "!");
433
434        let state = doc.encode_state();
435        let compacted = compact(&state).unwrap();
436        let restored = CrdtDoc::decode_state(&compacted).unwrap();
437
438        assert_eq!(restored.to_text(), "Hello world!");
439        assert!(compacted.len() <= state.len());
440    }
441
442    #[test]
443    fn compact_reduces_size_after_edits() {
444        let doc = CrdtDoc::from_text("aaaa");
445        // Many small edits to build up tombstones
446        for i in 0..20 {
447            let c = ((b'a' + (i % 26)) as char).to_string();
448            doc.apply_edit(0, 1, &c);
449        }
450        let state = doc.encode_state();
451        let compacted = compact(&state).unwrap();
452        let restored = CrdtDoc::decode_state(&compacted).unwrap();
453        assert_eq!(restored.to_text(), doc.to_text());
454    }
455
456    #[test]
457    fn empty_document() {
458        let doc = CrdtDoc::from_text("");
459        assert_eq!(doc.to_text(), "");
460
461        let encoded = doc.encode_state();
462        let decoded = CrdtDoc::decode_state(&encoded).unwrap();
463        assert_eq!(decoded.to_text(), "");
464    }
465
466    #[test]
467    fn decode_invalid_bytes_errors() {
468        let result = CrdtDoc::decode_state(&[0xff, 0xfe, 0xfd]);
469        assert!(result.is_err());
470    }
471
472    #[test]
473    fn merge_identical_texts() {
474        let base = "Same content.\n";
475        let base_doc = CrdtDoc::from_text(base);
476        let state = base_doc.encode_state();
477
478        let merged = merge(Some(&state), base, base).unwrap();
479        assert_eq!(merged, base);
480    }
481
482    #[test]
483    fn merge_one_side_unchanged() {
484        let base = "Original.\n";
485        let base_doc = CrdtDoc::from_text(base);
486        let state = base_doc.encode_state();
487
488        let ours = "Original.\nAgent added.\n";
489        let merged = merge(Some(&state), ours, base).unwrap();
490        assert_eq!(merged, ours);
491    }
492
493    /// Regression test: CRDT merge should not duplicate user prompt when both
494    /// ours and theirs contain the same text added since the base state.
495    ///
496    /// Scenario (brookebrodack-dev.md duplication bug):
497    /// 1. CRDT base = exchange content from a previous cycle (no user prompt)
498    /// 2. User adds prompt to exchange → saved as baseline
499    /// 3. Agent generates response, content_ours = baseline + response (has user prompt)
500    /// 4. User makes a small edit during response generation → content_current (has user prompt too)
501    /// 5. CRDT merge: both ours and theirs have the user prompt relative to stale base
502    /// 6. BUG: user prompt appears twice in merged output
503    #[test]
504    fn merge_stale_base_no_duplicate_user_prompt() {
505        // CRDT base from a previous cycle — does NOT have the user's current prompt
506        let base_content = "\
507## Assistant
508
509Previous response content.
510
511Committed and pushed.
512
513";
514        let base_doc = CrdtDoc::from_text(base_content);
515        let base_state = base_doc.encode_state();
516
517        // User adds prompt after base was saved
518        let user_prompt = "\
519Opening a video a shows video a.
520Closing video a then opening video b start video b but video b is hidden.
521Closing video b then reopening video b starts and shows video b. video b is visible.
522";
523
524        // content_ours: base + user prompt + agent response (from run_stream with full exchange)
525        let ours = format!("\
526{}{}### Re: Close A → Open B still hidden
527
528Added explicit height and visibility reset.
529
530Committed and pushed.
531
532", base_content, user_prompt);
533
534        // content_current: base + user prompt + minor user edit (e.g., added a blank line)
535        let theirs = format!("\
536{}{}
537", base_content, user_prompt);
538
539        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
540
541        // User prompt should appear exactly ONCE
542        let prompt_count = merged.matches("Opening a video a shows video a.").count();
543        assert_eq!(
544            prompt_count, 1,
545            "User prompt duplicated! Appeared {} times in:\n{}",
546            prompt_count, merged
547        );
548
549        // Agent response should be present
550        assert!(
551            merged.contains("### Re: Close A → Open B still hidden"),
552            "Agent response missing from merge:\n{}", merged
553        );
554    }
555
556    /// Regression test: When CRDT base is stale and both sides added the same text
557    /// at the same position, the merge should not duplicate it.
558    #[test]
559    fn merge_stale_base_same_insertion_both_sides() {
560        let base_content = "Line 1\nLine 2\n";
561        let base_doc = CrdtDoc::from_text(base_content);
562        let base_state = base_doc.encode_state();
563
564        // Both sides added the same text (user prompt) + ours adds more
565        let shared_addition = "User typed this.\n";
566        let ours = format!("{}{}Agent response.\n", base_content, shared_addition);
567        let theirs = format!("{}{}", base_content, shared_addition);
568
569        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
570
571        let count = merged.matches("User typed this.").count();
572        assert_eq!(
573            count, 1,
574            "Shared text duplicated! Appeared {} times in:\n{}",
575            count, merged
576        );
577        assert!(merged.contains("Agent response."), "Agent text missing:\n{}", merged);
578    }
579
580    /// Regression test: Character-level interleaving bug.
581    ///
582    /// When the user types in their editor while the agent is streaming,
583    /// both sides insert text at the same position relative to the base.
584    /// The CRDT base advancement logic used to snap to the shared prefix
585    /// of ours/theirs, which could land mid-line on a shared formatting
586    /// character (e.g., `*` from `*bold*` and `**bold**`). This caused
587    /// the formatting character to be absorbed into the base, splitting
588    /// it from the rest of the formatting sequence and producing garbled
589    /// text like `*Soft-bristle brush only**` instead of
590    /// `**Soft-bristle brush only**`.
591    ///
592    /// The fix: always snap the advanced base to a line boundary. If no
593    /// suitable line boundary exists after the current base length, don't
594    /// advance at all.
595    #[test]
596    fn merge_no_character_interleaving() {
597        // Base: a document with some existing content
598        let base = "# Doc\n\nPrevious content.\n\n";
599        let base_doc = CrdtDoc::from_text(base);
600        let base_state = base_doc.encode_state();
601
602        // Agent adds a response
603        let ours = "# Doc\n\nPrevious content.\n\n*Compacted. Content archived to*\n";
604        // User types something in their editor at the same position
605        let theirs = "# Doc\n\nPrevious content.\n\n**Soft-bristle brush only**\n";
606
607        let merged = merge(Some(&base_state), ours, theirs).unwrap();
608
609        // Both texts should be present as contiguous blocks, not interleaved
610        assert!(
611            merged.contains("*Compacted. Content archived to*"),
612            "Agent text should be contiguous (not interleaved). Got:\n{}",
613            merged
614        );
615        assert!(
616            merged.contains("**Soft-bristle brush only**"),
617            "User text should be contiguous (not interleaved). Got:\n{}",
618            merged
619        );
620    }
621
622    /// Regression test: Concurrent edits within the same line should not
623    /// produce character-level interleaving.
624    #[test]
625    fn merge_concurrent_same_line_no_garbling() {
626        let base = "Some base text\n";
627        let base_doc = CrdtDoc::from_text(base);
628        let base_state = base_doc.encode_state();
629
630        // Both sides replace the line with different content
631        let ours = "Agent wrote this line\n";
632        let theirs = "User wrote different text\n";
633
634        let merged = merge(Some(&base_state), ours, theirs).unwrap();
635
636        // At least one side's text should appear contiguously
637        let has_agent_contiguous = merged.contains("Agent wrote this line");
638        let has_user_contiguous = merged.contains("User wrote different text");
639
640        assert!(
641            has_agent_contiguous || has_user_contiguous,
642            "At least one side should have contiguous text (no char interleaving). Got:\n{}",
643            merged
644        );
645    }
646
647    /// Regression test: Replace-vs-append corruption (lazily-rs.md bug).
648    ///
649    /// Pattern:
650    /// - CRDT base is from a previous cycle (old exchange content)
651    /// - Agent replaces exchange content entirely (template replace mode)
652    /// - User appends new prompt text to exchange during response generation
653    /// - CRDT interleaves agent's new content with user's old + new text,
654    ///   causing mid-word splits like "key de" + [user text] + "cisions"
655    ///
656    /// Root cause: stale CRDT base doesn't match either side well enough
657    /// for prefix advancement, so the CRDT does a raw character-level merge
658    /// of the exchange section, interleaving replace and append operations.
659    ///
660    /// Fix: use baseline (not stored CRDT state) as merge base, so both
661    /// sides' diffs are computed from the exact content they diverged from.
662    #[test]
663    fn merge_replace_vs_append_no_interleaving() {
664        // Full document structure (template mode)
665        let header = "---\nagent_doc_format: template\n---\n\n# Title\n\n<!-- agent:exchange -->\n";
666        let footer = "\n<!-- /agent:exchange -->\n";
667
668        // Previous cycle's exchange content (what the CRDT state contains)
669        let old_exchange = "\
670### Committed, Pushed & Released
671
672**project (v0.1.0):**
673- Committed initial implementation
674- Tagged v0.1.0 and pushed
675
676Add a README.md to the project.
677Also add AGENTS.md with a symlink CLAUDE.md
678
679**sub-project:**
680- Committed fix + SPEC.md
681- Pushed to remote
682";
683        let stale_base = format!("{header}{old_exchange}{footer}");
684        let stale_state = CrdtDoc::from_text(&stale_base).encode_state();
685
686        // Baseline (what the file looked like when response generation started)
687        // Same as stale_base in this case — no user edits between cycles
688        let _baseline = stale_base.clone();
689
690        // Ours: agent replaces exchange content (template replace mode applied)
691        let agent_exchange = "\
692### Done
693
694Added to project and pushed:
695
696- **README.md** — overview, usage, design notes
697- **AGENTS.md** — architecture, key decisions, commands, related projects
698- **CLAUDE.md** → symlink to AGENTS.md
699
700All committed and pushed.
701";
702        let ours = format!("{header}{agent_exchange}{footer}");
703
704        // Theirs: user inserted new prompt IN THE MIDDLE of the exchange section
705        // (after the existing user prompt, before the sub-project sections)
706        // This is the critical difference — insertion within the range that ours deletes
707        let theirs_exchange = "\
708### Committed, Pushed & Released
709
710**project (v0.1.0):**
711- Committed initial implementation
712- Tagged v0.1.0 and pushed
713
714Add a README.md to the project.
715Also add AGENTS.md with a symlink CLAUDE.md
716
717Please add tests.
718Please comprehensively test adherence to the spec.
719
720**sub-project:**
721- Committed fix + SPEC.md
722- Pushed to remote
723";
724        let theirs = format!("{header}{theirs_exchange}{footer}");
725
726        // Using stale CRDT state (previous cycle) — this is what triggers the bug
727        let merged = merge(Some(&stale_state), &ours, &theirs).unwrap();
728
729        // Agent's replacement text should be contiguous (no interleaving)
730        assert!(
731            merged.contains("- **AGENTS.md** — architecture, key decisions, commands, related projects"),
732            "Agent text garbled (mid-word split). Got:\n{}", merged
733        );
734
735        // User's addition should be preserved
736        assert!(
737            merged.contains("Please add tests."),
738            "User addition missing. Got:\n{}", merged
739        );
740
741        // No fragments of old content mixed into agent's new content
742        assert!(
743            !merged.contains("key deAdd") && !merged.contains("key de\n"),
744            "Old content interleaved into agent text. Got:\n{}", merged
745        );
746    }
747
748    /// Same as merge_replace_vs_append_no_interleaving but using baseline
749    /// as CRDT base instead of stale state. This is the fix verification.
750    #[test]
751    fn merge_replace_vs_append_with_baseline_base() {
752        let header = "---\nagent_doc_format: template\n---\n\n# Title\n\n<!-- agent:exchange -->\n";
753        let footer = "\n<!-- /agent:exchange -->\n";
754
755        let old_exchange = "\
756### Previous Response
757
758Old content here.
759
760Add a README.md to the project.
761Also add AGENTS.md with a symlink CLAUDE.md
762";
763        let baseline = format!("{header}{old_exchange}{footer}");
764
765        // Ours: agent replaces exchange
766        let agent_exchange = "\
767### Done
768
769- **README.md** — overview, usage, design notes
770- **AGENTS.md** — architecture, key decisions, commands, related projects
771- **CLAUDE.md** → symlink to AGENTS.md
772
773All committed and pushed.
774";
775        let ours = format!("{header}{agent_exchange}{footer}");
776
777        // Theirs: user appended new prompt
778        let user_addition = "\nPlease add tests.\n";
779        let theirs = format!("{header}{old_exchange}{user_addition}{footer}");
780
781        // Use baseline as CRDT base (the fix)
782        let baseline_state = CrdtDoc::from_text(&baseline).encode_state();
783        let merged = merge(Some(&baseline_state), &ours, &theirs).unwrap();
784
785        // Agent text should be contiguous
786        assert!(
787            merged.contains("key decisions, commands, related projects"),
788            "Agent text garbled. Got:\n{}", merged
789        );
790
791        // User addition preserved
792        assert!(
793            merged.contains("Please add tests."),
794            "User addition missing. Got:\n{}", merged
795        );
796    }
797
798    /// Regression test: Simulates the exact scenario from the bug report.
799    ///
800    /// The agent streams a response into the exchange component while
801    /// the user types in their editor. Both sides share a common prefix
802    /// that includes markdown formatting characters. The CRDT merge must
803    /// preserve formatting integrity for both sides.
804    #[test]
805    fn merge_streaming_concurrent_edit_preserves_formatting() {
806        // Exchange component content after user's initial prompt
807        let base = "commit and push all rappstack packages.\n\n";
808        let base_doc = CrdtDoc::from_text(base);
809        let base_state = base_doc.encode_state();
810
811        // Agent's response (content_ours = user prompt + agent response)
812        let ours = "\
813commit and push all rappstack packages.
814
815### Re: commit and push
816
817*Compacted. Content archived to `docs/`*
818
819Done — all packages pushed.
820";
821
822        // User's concurrent edit (added a note at the bottom)
823        let theirs = "\
824commit and push all rappstack packages.
825
826**Soft-bristle brush only**
827";
828
829        let merged = merge(Some(&base_state), ours, theirs).unwrap();
830
831        // Agent formatting must be intact
832        assert!(
833            merged.contains("*Compacted. Content archived to `docs/`*"),
834            "Agent formatting broken. Got:\n{}",
835            merged
836        );
837        // User formatting must be intact
838        assert!(
839            merged.contains("**Soft-bristle brush only**"),
840            "User formatting broken. Got:\n{}",
841            merged
842        );
843        // No character-level interleaving
844        assert!(
845            !merged.contains("*C*C") && !merged.contains("**Sot"),
846            "Character interleaving detected. Got:\n{}",
847            merged
848        );
849    }
850
851    /// Regression test: Agent replaces multi-line block while user inserts within it.
852    /// With from_chars, this produces ~20 scattered character-level ops that interleave
853    /// with user edits. With from_lines, ops are contiguous line-level blocks.
854    ///
855    /// Uses a template document structure to match the real workflow where the baseline
856    /// (common ancestor) contains the exchange component with original content.
857    #[test]
858    fn merge_replace_vs_insert_no_interleaving() {
859        let header = "---\nagent_doc_format: template\nagent_doc_write: crdt\n---\n\n# Document Title\n\nSome preamble text that both sides share.\nThis provides enough common prefix to avoid stale detection.\n\n<!-- agent:exchange -->\n";
860        let footer = "<!-- /agent:exchange -->\n";
861
862        let old_exchange = "Line one of old content\nLine two of old content\nLine three of old content\n";
863        let baseline = format!("{header}{old_exchange}{footer}");
864        let baseline_doc = CrdtDoc::from_text(&baseline);
865        let baseline_state = baseline_doc.encode_state();
866
867        // Agent replaces exchange with completely new content
868        let agent_exchange = "Completely new line one\nCompletely new line two\nCompletely new line three\nCompletely new line four\n";
869        let ours = format!("{header}{agent_exchange}{footer}");
870
871        // User inserts a line in the middle of the original exchange
872        let theirs = format!("{header}Line one of old content\nUser inserted this line\nLine two of old content\nLine three of old content\n{footer}");
873
874        let merged = merge(Some(&baseline_state), &ours, &theirs).unwrap();
875
876        // Agent text should be contiguous — no mid-word splits
877        assert!(
878            merged.contains("Completely new line one"),
879            "Agent line 1 missing or garbled. Got:\n{}", merged
880        );
881        assert!(
882            merged.contains("Completely new line two"),
883            "Agent line 2 missing or garbled. Got:\n{}", merged
884        );
885
886        // User text should be preserved
887        assert!(
888            merged.contains("User inserted this line"),
889            "User insertion missing. Got:\n{}", merged
890        );
891
892        // No character interleaving (e.g., "Complete" + user text + "ly")
893        assert!(
894            !merged.contains("CompleteUser") && !merged.contains("Complete\nUser"),
895            "Character interleaving detected. Got:\n{}", merged
896        );
897    }
898
899    /// Test: agent content appears before human content when both append
900    /// to the same position.
901    #[test]
902    fn reorder_agent_before_human_at_append_boundary() {
903        let base = "# Document\n\nBase content.\n";
904        let base_doc = CrdtDoc::from_text(base);
905        let base_state = base_doc.encode_state();
906
907        // Agent appends response
908        let ours = format!("{base}### Agent Response\n\nAgent wrote this.\n");
909        // Human appends their own text
910        let theirs = format!("{base}User added this line.\n");
911
912        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
913
914        // Both should be present
915        assert!(merged.contains("Agent wrote this."), "missing agent text");
916        assert!(merged.contains("User added this line."), "missing user text");
917        assert!(merged.contains("Base content."), "missing base text");
918
919        // Agent content should appear before human content
920        let agent_pos = merged.find("Agent wrote this.").unwrap();
921        let human_pos = merged.find("User added this line.").unwrap();
922        assert!(
923            agent_pos < human_pos,
924            "Agent content should appear before human content.\nAgent pos: {}, Human pos: {}\nMerged:\n{}",
925            agent_pos, human_pos, merged
926        );
927    }
928
929    // -----------------------------------------------------------------------
930    // dedup_adjacent_blocks tests (#15)
931    // -----------------------------------------------------------------------
932
933    #[test]
934    fn dedup_removes_identical_adjacent_blocks() {
935        let text = "### Re: Question\nAnswer here.\n\n### Re: Question\nAnswer here.\n\nDifferent block.";
936        let result = dedup_adjacent_blocks(text);
937        assert_eq!(result.matches("### Re: Question").count(), 1);
938        assert!(result.contains("Different block."));
939    }
940
941    #[test]
942    fn dedup_preserves_different_adjacent_blocks() {
943        let text = "### Re: First\nAnswer one.\n\n### Re: Second\nAnswer two.";
944        let result = dedup_adjacent_blocks(text);
945        assert!(result.contains("### Re: First"));
946        assert!(result.contains("### Re: Second"));
947    }
948
949    #[test]
950    fn dedup_ignores_short_repeated_lines() {
951        // Single-line blocks like "---" should not be deduped
952        let text = "---\n\n---\n\nContent.";
953        let result = dedup_adjacent_blocks(text);
954        assert_eq!(result, text);
955    }
956
957    #[test]
958    fn dedup_handles_empty_text() {
959        assert_eq!(dedup_adjacent_blocks(""), "");
960    }
961
962    #[test]
963    fn dedup_no_change_when_no_duplicates() {
964        let text = "Block A\nLine 2.\n\nBlock B\nLine 2.";
965        let result = dedup_adjacent_blocks(text);
966        assert_eq!(result, text);
967    }
968}