Skip to main content

agent_doc/
crdt.rs

1use anyhow::{Context, Result};
2use yrs::updates::decoder::Decode;
3use yrs::{Doc, GetString, ReadTxn, Text, TextRef, Transact, Update};
4
5const TEXT_KEY: &str = "content";
6
7/// CRDT document wrapping a Yjs `Doc` for conflict-free merging.
8pub struct CrdtDoc {
9    doc: Doc,
10}
11
12impl CrdtDoc {
13    /// Create a new CRDT document initialized with the given text content.
14    pub fn from_text(content: &str) -> Self {
15        let doc = Doc::new();
16        let text = doc.get_or_insert_text(TEXT_KEY);
17        let mut txn = doc.transact_mut();
18        text.insert(&mut txn, 0, content);
19        drop(txn);
20        CrdtDoc { doc }
21    }
22
23    /// Extract the current text content from the CRDT document.
24    pub fn to_text(&self) -> String {
25        let text = self.doc.get_or_insert_text(TEXT_KEY);
26        let txn = self.doc.transact();
27        text.get_string(&txn)
28    }
29
30    /// Apply a local edit: delete `delete_len` chars at `offset`, then insert `insert` there.
31    #[allow(dead_code)] // Used in tests and Phase 4 stream write-back
32    pub fn apply_edit(&self, offset: u32, delete_len: u32, insert: &str) {
33        let text = self.doc.get_or_insert_text(TEXT_KEY);
34        let mut txn = self.doc.transact_mut();
35        if delete_len > 0 {
36            text.remove_range(&mut txn, offset, delete_len);
37        }
38        if !insert.is_empty() {
39            text.insert(&mut txn, offset, insert);
40        }
41    }
42
43    /// Encode the full document state (for persistence).
44    pub fn encode_state(&self) -> Vec<u8> {
45        let txn = self.doc.transact();
46        txn.encode_state_as_update_v1(&yrs::StateVector::default())
47    }
48
49    /// Decode a previously encoded state into a new CrdtDoc.
50    pub fn decode_state(bytes: &[u8]) -> Result<Self> {
51        let doc = Doc::new();
52        let update = Update::decode_v1(bytes)
53            .map_err(|e| anyhow::anyhow!("failed to decode CRDT state: {}", e))?;
54        let mut txn = doc.transact_mut();
55        txn.apply_update(update)
56            .map_err(|e| anyhow::anyhow!("failed to apply CRDT update: {}", e))?;
57        drop(txn);
58        Ok(CrdtDoc { doc })
59    }
60}
61
62/// Merge two concurrent text versions against a common base using CRDT.
63///
64/// Creates three CRDT actors: base, ours, theirs.
65/// Applies each side's edits as diffs from the base, then merges updates.
66/// Returns the merged text (conflict-free).
67///
68/// **Stale base detection:** If the CRDT base text doesn't match either ours
69/// or theirs as a prefix/substring, the base is stale. In that case, we use
70/// `ours_text` as the base to prevent duplicate insertions.
71pub fn merge(base_state: Option<&[u8]>, ours_text: &str, theirs_text: &str) -> Result<String> {
72    // Short-circuit: if both sides are identical, no merge needed
73    if ours_text == theirs_text {
74        eprintln!("[crdt] ours == theirs, skipping merge");
75        return Ok(ours_text.to_string());
76    }
77
78    // Bootstrap base doc from state or empty
79    let base_doc = if let Some(bytes) = base_state {
80        CrdtDoc::decode_state(bytes)
81            .context("failed to decode base CRDT state")?
82    } else {
83        CrdtDoc::from_text("")
84    };
85    let mut base_text = base_doc.to_text();
86
87    eprintln!(
88        "[crdt] merge: base_len={} ours_len={} theirs_len={}",
89        base_text.len(),
90        ours_text.len(),
91        theirs_text.len()
92    );
93
94    // Stale base detection: if the base text doesn't share a common prefix
95    // with both sides, it's stale. Use ours as the base instead.
96    // This prevents duplicate insertions when both sides contain text
97    // that the stale base doesn't have.
98    let ours_common = common_prefix_len(&base_text, ours_text);
99    let theirs_common = common_prefix_len(&base_text, theirs_text);
100    let base_len = base_text.len();
101
102    if base_len > 0
103        && (ours_common as f64 / base_len as f64) < 0.5
104        && (theirs_common as f64 / base_len as f64) < 0.5
105    {
106        eprintln!(
107            "[crdt] Stale CRDT base detected (common prefix: ours={}%, theirs={}%). Using ours as base.",
108            (ours_common * 100) / base_len,
109            (theirs_common * 100) / base_len
110        );
111        base_text = ours_text.to_string();
112    }
113
114    // Advance base to the common prefix of ours and theirs when it extends
115    // beyond the current base.
116    //
117    // When both ours and theirs independently added the same text beyond the
118    // stale base (e.g., both contain a user prompt that the base doesn't have),
119    // the CRDT treats each insertion as independent and includes both, causing
120    // duplication. Fix: use the common prefix of ours and theirs as the effective
121    // base, so shared additions are not treated as independent insertions.
122    //
123    // This handles the common pattern where:
124    //   base   = "old content"
125    //   ours   = "old content + user prompt + agent response"
126    //   theirs = "old content + user prompt + small edit"
127    // Without fix: user prompt appears twice (from both sides).
128    // With fix: base advances to "old content + user prompt", ours' diff is
129    //           just the agent response, theirs' diff is just the small edit.
130    let mutual_prefix = common_prefix_len(ours_text, theirs_text);
131    if mutual_prefix > base_text.len() {
132        // Snap to a line boundary to avoid splitting mid-line/mid-word.
133        // Without this, the shared prefix can include partial formatting
134        // sequences (e.g., a leading `*` from `**bold**`), causing the
135        // CRDT merge to separate that character from the rest of the
136        // formatting, producing garbled text like `*Soft-bristle brush only**`
137        // instead of `**Soft-bristle brush only**`.
138        let snap = &ours_text[..mutual_prefix];
139        let snapped = match snap.rfind('\n') {
140            Some(pos) if pos >= base_text.len() => pos + 1,
141            _ => base_text.len(), // no suitable line boundary — don't advance
142        };
143        if snapped > base_text.len() {
144            eprintln!(
145                "[crdt] Advancing base to shared prefix (base_len={} → {})",
146                base_text.len(),
147                snapped
148            );
149            base_text = ours_text[..snapped].to_string();
150        }
151    }
152
153    // Compute diffs from base to each side
154    let ours_ops = compute_edit_ops(&base_text, ours_text);
155    let theirs_ops = compute_edit_ops(&base_text, theirs_text);
156
157    // Create two independent docs from the base state.
158    // If base was overridden (stale detection), rebuild from the new base_text.
159    let base_encoded = if base_text == base_doc.to_text() {
160        base_doc.encode_state()
161    } else {
162        CrdtDoc::from_text(&base_text).encode_state()
163    };
164
165    let ours_doc = Doc::with_client_id(1);
166    {
167        let update = Update::decode_v1(&base_encoded)
168            .map_err(|e| anyhow::anyhow!("decode error: {}", e))?;
169        let mut txn = ours_doc.transact_mut();
170        txn.apply_update(update)
171            .map_err(|e| anyhow::anyhow!("apply error: {}", e))?;
172    }
173
174    let theirs_doc = Doc::with_client_id(2);
175    {
176        let update = Update::decode_v1(&base_encoded)
177            .map_err(|e| anyhow::anyhow!("decode error: {}", e))?;
178        let mut txn = theirs_doc.transact_mut();
179        txn.apply_update(update)
180            .map_err(|e| anyhow::anyhow!("apply error: {}", e))?;
181    }
182
183    // Apply ours edits
184    {
185        let text = ours_doc.get_or_insert_text(TEXT_KEY);
186        let mut txn = ours_doc.transact_mut();
187        apply_ops(&text, &mut txn, &ours_ops);
188    }
189
190    // Apply theirs edits
191    {
192        let text = theirs_doc.get_or_insert_text(TEXT_KEY);
193        let mut txn = theirs_doc.transact_mut();
194        apply_ops(&text, &mut txn, &theirs_ops);
195    }
196
197    // Merge: apply theirs' changes into ours
198    let ours_sv = {
199        let txn = ours_doc.transact();
200        txn.state_vector()
201    };
202    let theirs_update = {
203        let txn = theirs_doc.transact();
204        txn.encode_state_as_update_v1(&ours_sv)
205    };
206    {
207        let update = Update::decode_v1(&theirs_update)
208            .map_err(|e| anyhow::anyhow!("decode error: {}", e))?;
209        let mut txn = ours_doc.transact_mut();
210        txn.apply_update(update)
211            .map_err(|e| anyhow::anyhow!("apply error: {}", e))?;
212    }
213
214    // Read merged result
215    let text = ours_doc.get_or_insert_text(TEXT_KEY);
216    let txn = ours_doc.transact();
217    let merged = text.get_string(&txn);
218
219    // Post-merge dedup: remove identical adjacent blocks (#15)
220    Ok(dedup_adjacent_blocks(&merged))
221}
222
223/// Remove identical adjacent text blocks separated by blank lines.
224///
225/// After a CRDT merge, both sides may independently append the same content
226/// (e.g., a `### Re:` section), resulting in duplicate adjacent blocks.
227/// This pass identifies and removes duplicates while preserving intentionally
228/// repeated content (only dedup blocks >= 2 non-empty lines to avoid
229/// false positives on short repeated lines like `---` or blank lines).
230pub fn dedup_adjacent_blocks(text: &str) -> String {
231    let blocks: Vec<&str> = text.split("\n\n").collect();
232    if blocks.len() < 2 {
233        return text.to_string();
234    }
235
236    let mut result: Vec<&str> = Vec::with_capacity(blocks.len());
237    for block in &blocks {
238        let trimmed = block.trim();
239        // Only dedup substantial blocks (>= 2 non-empty lines)
240        let non_empty_lines = trimmed.lines().filter(|l| !l.trim().is_empty()).count();
241        if non_empty_lines >= 2
242            && let Some(prev) = result.last()
243            && prev.trim() == trimmed
244        {
245            eprintln!("[crdt] dedup: removed duplicate block ({} lines)", non_empty_lines);
246            continue;
247        }
248        result.push(*block);
249    }
250
251    result.join("\n\n")
252}
253
254/// Compact a CRDT state by re-encoding (GC tombstones where possible).
255pub fn compact(state: &[u8]) -> Result<Vec<u8>> {
256    let doc = CrdtDoc::decode_state(state)?;
257    Ok(doc.encode_state())
258}
259
260/// Count the number of bytes in the common prefix of two strings.
261fn common_prefix_len(a: &str, b: &str) -> usize {
262    a.bytes().zip(b.bytes()).take_while(|(x, y)| x == y).count()
263}
264
265/// Edit operation for replaying diffs onto a CRDT text.
266#[derive(Debug)]
267enum EditOp {
268    Retain(u32),
269    Delete(u32),
270    Insert(String),
271}
272
273/// Compute edit operations to transform `from` into `to` using `similar` diff.
274fn compute_edit_ops(from: &str, to: &str) -> Vec<EditOp> {
275    use similar::{ChangeTag, TextDiff};
276
277    let diff = TextDiff::from_lines(from, to);
278    let mut ops = Vec::new();
279
280    for change in diff.iter_all_changes() {
281        match change.tag() {
282            ChangeTag::Equal => {
283                let len = change.value().len() as u32;
284                if let Some(EditOp::Retain(n)) = ops.last_mut() {
285                    *n += len;
286                } else {
287                    ops.push(EditOp::Retain(len));
288                }
289            }
290            ChangeTag::Delete => {
291                let len = change.value().len() as u32;
292                if let Some(EditOp::Delete(n)) = ops.last_mut() {
293                    *n += len;
294                } else {
295                    ops.push(EditOp::Delete(len));
296                }
297            }
298            ChangeTag::Insert => {
299                let s = change.value().to_string();
300                if let Some(EditOp::Insert(existing)) = ops.last_mut() {
301                    existing.push_str(&s);
302                } else {
303                    ops.push(EditOp::Insert(s));
304                }
305            }
306        }
307    }
308
309    ops
310}
311
312/// Apply edit operations to a Yrs text type within a transaction.
313fn apply_ops(text: &TextRef, txn: &mut yrs::TransactionMut<'_>, ops: &[EditOp]) {
314    let mut cursor: u32 = 0;
315    for op in ops {
316        match op {
317            EditOp::Retain(n) => cursor += n,
318            EditOp::Delete(n) => {
319                text.remove_range(txn, cursor, *n);
320                // cursor stays — content shifted left
321            }
322            EditOp::Insert(s) => {
323                text.insert(txn, cursor, s);
324                cursor += s.len() as u32;
325            }
326        }
327    }
328}
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333
334    #[test]
335    fn roundtrip_text() {
336        let content = "Hello, world!\nLine two.\n";
337        let doc = CrdtDoc::from_text(content);
338        assert_eq!(doc.to_text(), content);
339    }
340
341    #[test]
342    fn roundtrip_encode_decode() {
343        let content = "Some document content.\n";
344        let doc = CrdtDoc::from_text(content);
345        let encoded = doc.encode_state();
346        let decoded = CrdtDoc::decode_state(&encoded).unwrap();
347        assert_eq!(decoded.to_text(), content);
348    }
349
350    #[test]
351    fn apply_edit_insert() {
352        let doc = CrdtDoc::from_text("Hello world");
353        doc.apply_edit(5, 0, ",");
354        assert_eq!(doc.to_text(), "Hello, world");
355    }
356
357    #[test]
358    fn apply_edit_delete() {
359        let doc = CrdtDoc::from_text("Hello, world");
360        doc.apply_edit(5, 1, "");
361        assert_eq!(doc.to_text(), "Hello world");
362    }
363
364    #[test]
365    fn apply_edit_replace() {
366        let doc = CrdtDoc::from_text("Hello world");
367        doc.apply_edit(6, 5, "Rust");
368        assert_eq!(doc.to_text(), "Hello Rust");
369    }
370
371    #[test]
372    fn concurrent_append_merge_no_conflict() {
373        let base = "# Document\n\nBase content.\n";
374        let base_doc = CrdtDoc::from_text(base);
375        let base_state = base_doc.encode_state();
376
377        let ours = format!("{base}## Agent\n\nAgent response.\n");
378        let theirs = format!("{base}## User\n\nUser addition.\n");
379
380        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
381
382        // Both additions should be present
383        assert!(merged.contains("Agent response."), "missing agent text");
384        assert!(merged.contains("User addition."), "missing user text");
385        assert!(merged.contains("Base content."), "missing base text");
386        // No conflict markers
387        assert!(!merged.contains("<<<<<<<"));
388        assert!(!merged.contains(">>>>>>>"));
389    }
390
391    #[test]
392    fn concurrent_insert_same_position() {
393        let base = "Line 1\nLine 3\n";
394        let base_doc = CrdtDoc::from_text(base);
395        let base_state = base_doc.encode_state();
396
397        let ours = "Line 1\nAgent line\nLine 3\n";
398        let theirs = "Line 1\nUser line\nLine 3\n";
399
400        let merged = merge(Some(&base_state), ours, theirs).unwrap();
401
402        // Both insertions preserved, no conflict
403        assert!(merged.contains("Agent line"), "missing agent insertion");
404        assert!(merged.contains("User line"), "missing user insertion");
405        assert!(merged.contains("Line 1"), "missing line 1");
406        assert!(merged.contains("Line 3"), "missing line 3");
407    }
408
409    #[test]
410    fn merge_no_base_state() {
411        // When no base state exists, bootstrap from empty
412        let ours = "Agent wrote this.\n";
413        let theirs = "User wrote this.\n";
414
415        let merged = merge(None, ours, theirs).unwrap();
416
417        assert!(merged.contains("Agent wrote this."));
418        assert!(merged.contains("User wrote this."));
419    }
420
421    #[test]
422    fn compact_preserves_content() {
423        let doc = CrdtDoc::from_text("Hello");
424        doc.apply_edit(5, 0, " world");
425        doc.apply_edit(11, 0, "!");
426
427        let state = doc.encode_state();
428        let compacted = compact(&state).unwrap();
429        let restored = CrdtDoc::decode_state(&compacted).unwrap();
430
431        assert_eq!(restored.to_text(), "Hello world!");
432        assert!(compacted.len() <= state.len());
433    }
434
435    #[test]
436    fn compact_reduces_size_after_edits() {
437        let doc = CrdtDoc::from_text("aaaa");
438        // Many small edits to build up tombstones
439        for i in 0..20 {
440            let c = ((b'a' + (i % 26)) as char).to_string();
441            doc.apply_edit(0, 1, &c);
442        }
443        let state = doc.encode_state();
444        let compacted = compact(&state).unwrap();
445        let restored = CrdtDoc::decode_state(&compacted).unwrap();
446        assert_eq!(restored.to_text(), doc.to_text());
447    }
448
449    #[test]
450    fn empty_document() {
451        let doc = CrdtDoc::from_text("");
452        assert_eq!(doc.to_text(), "");
453
454        let encoded = doc.encode_state();
455        let decoded = CrdtDoc::decode_state(&encoded).unwrap();
456        assert_eq!(decoded.to_text(), "");
457    }
458
459    #[test]
460    fn decode_invalid_bytes_errors() {
461        let result = CrdtDoc::decode_state(&[0xff, 0xfe, 0xfd]);
462        assert!(result.is_err());
463    }
464
465    #[test]
466    fn merge_identical_texts() {
467        let base = "Same content.\n";
468        let base_doc = CrdtDoc::from_text(base);
469        let state = base_doc.encode_state();
470
471        let merged = merge(Some(&state), base, base).unwrap();
472        assert_eq!(merged, base);
473    }
474
475    #[test]
476    fn merge_one_side_unchanged() {
477        let base = "Original.\n";
478        let base_doc = CrdtDoc::from_text(base);
479        let state = base_doc.encode_state();
480
481        let ours = "Original.\nAgent added.\n";
482        let merged = merge(Some(&state), ours, base).unwrap();
483        assert_eq!(merged, ours);
484    }
485
486    /// Regression test: CRDT merge should not duplicate user prompt when both
487    /// ours and theirs contain the same text added since the base state.
488    ///
489    /// Scenario (brookebrodack-dev.md duplication bug):
490    /// 1. CRDT base = exchange content from a previous cycle (no user prompt)
491    /// 2. User adds prompt to exchange → saved as baseline
492    /// 3. Agent generates response, content_ours = baseline + response (has user prompt)
493    /// 4. User makes a small edit during response generation → content_current (has user prompt too)
494    /// 5. CRDT merge: both ours and theirs have the user prompt relative to stale base
495    /// 6. BUG: user prompt appears twice in merged output
496    #[test]
497    fn merge_stale_base_no_duplicate_user_prompt() {
498        // CRDT base from a previous cycle — does NOT have the user's current prompt
499        let base_content = "\
500## Assistant
501
502Previous response content.
503
504Committed and pushed.
505
506";
507        let base_doc = CrdtDoc::from_text(base_content);
508        let base_state = base_doc.encode_state();
509
510        // User adds prompt after base was saved
511        let user_prompt = "\
512Opening a video a shows video a.
513Closing video a then opening video b start video b but video b is hidden.
514Closing video b then reopening video b starts and shows video b. video b is visible.
515";
516
517        // content_ours: base + user prompt + agent response (from run_stream with full exchange)
518        let ours = format!("\
519{}{}### Re: Close A → Open B still hidden
520
521Added explicit height and visibility reset.
522
523Committed and pushed.
524
525", base_content, user_prompt);
526
527        // content_current: base + user prompt + minor user edit (e.g., added a blank line)
528        let theirs = format!("\
529{}{}
530", base_content, user_prompt);
531
532        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
533
534        // User prompt should appear exactly ONCE
535        let prompt_count = merged.matches("Opening a video a shows video a.").count();
536        assert_eq!(
537            prompt_count, 1,
538            "User prompt duplicated! Appeared {} times in:\n{}",
539            prompt_count, merged
540        );
541
542        // Agent response should be present
543        assert!(
544            merged.contains("### Re: Close A → Open B still hidden"),
545            "Agent response missing from merge:\n{}", merged
546        );
547    }
548
549    /// Regression test: When CRDT base is stale and both sides added the same text
550    /// at the same position, the merge should not duplicate it.
551    #[test]
552    fn merge_stale_base_same_insertion_both_sides() {
553        let base_content = "Line 1\nLine 2\n";
554        let base_doc = CrdtDoc::from_text(base_content);
555        let base_state = base_doc.encode_state();
556
557        // Both sides added the same text (user prompt) + ours adds more
558        let shared_addition = "User typed this.\n";
559        let ours = format!("{}{}Agent response.\n", base_content, shared_addition);
560        let theirs = format!("{}{}", base_content, shared_addition);
561
562        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
563
564        let count = merged.matches("User typed this.").count();
565        assert_eq!(
566            count, 1,
567            "Shared text duplicated! Appeared {} times in:\n{}",
568            count, merged
569        );
570        assert!(merged.contains("Agent response."), "Agent text missing:\n{}", merged);
571    }
572
573    /// Regression test: Character-level interleaving bug.
574    ///
575    /// When the user types in their editor while the agent is streaming,
576    /// both sides insert text at the same position relative to the base.
577    /// The CRDT base advancement logic used to snap to the shared prefix
578    /// of ours/theirs, which could land mid-line on a shared formatting
579    /// character (e.g., `*` from `*bold*` and `**bold**`). This caused
580    /// the formatting character to be absorbed into the base, splitting
581    /// it from the rest of the formatting sequence and producing garbled
582    /// text like `*Soft-bristle brush only**` instead of
583    /// `**Soft-bristle brush only**`.
584    ///
585    /// The fix: always snap the advanced base to a line boundary. If no
586    /// suitable line boundary exists after the current base length, don't
587    /// advance at all.
588    #[test]
589    fn merge_no_character_interleaving() {
590        // Base: a document with some existing content
591        let base = "# Doc\n\nPrevious content.\n\n";
592        let base_doc = CrdtDoc::from_text(base);
593        let base_state = base_doc.encode_state();
594
595        // Agent adds a response
596        let ours = "# Doc\n\nPrevious content.\n\n*Compacted. Content archived to*\n";
597        // User types something in their editor at the same position
598        let theirs = "# Doc\n\nPrevious content.\n\n**Soft-bristle brush only**\n";
599
600        let merged = merge(Some(&base_state), ours, theirs).unwrap();
601
602        // Both texts should be present as contiguous blocks, not interleaved
603        assert!(
604            merged.contains("*Compacted. Content archived to*"),
605            "Agent text should be contiguous (not interleaved). Got:\n{}",
606            merged
607        );
608        assert!(
609            merged.contains("**Soft-bristle brush only**"),
610            "User text should be contiguous (not interleaved). Got:\n{}",
611            merged
612        );
613    }
614
615    /// Regression test: Concurrent edits within the same line should not
616    /// produce character-level interleaving.
617    #[test]
618    fn merge_concurrent_same_line_no_garbling() {
619        let base = "Some base text\n";
620        let base_doc = CrdtDoc::from_text(base);
621        let base_state = base_doc.encode_state();
622
623        // Both sides replace the line with different content
624        let ours = "Agent wrote this line\n";
625        let theirs = "User wrote different text\n";
626
627        let merged = merge(Some(&base_state), ours, theirs).unwrap();
628
629        // At least one side's text should appear contiguously
630        let has_agent_contiguous = merged.contains("Agent wrote this line");
631        let has_user_contiguous = merged.contains("User wrote different text");
632
633        assert!(
634            has_agent_contiguous || has_user_contiguous,
635            "At least one side should have contiguous text (no char interleaving). Got:\n{}",
636            merged
637        );
638    }
639
640    /// Regression test: Replace-vs-append corruption (lazily-rs.md bug).
641    ///
642    /// Pattern:
643    /// - CRDT base is from a previous cycle (old exchange content)
644    /// - Agent replaces exchange content entirely (template replace mode)
645    /// - User appends new prompt text to exchange during response generation
646    /// - CRDT interleaves agent's new content with user's old + new text,
647    ///   causing mid-word splits like "key de" + [user text] + "cisions"
648    ///
649    /// Root cause: stale CRDT base doesn't match either side well enough
650    /// for prefix advancement, so the CRDT does a raw character-level merge
651    /// of the exchange section, interleaving replace and append operations.
652    ///
653    /// Fix: use baseline (not stored CRDT state) as merge base, so both
654    /// sides' diffs are computed from the exact content they diverged from.
655    #[test]
656    fn merge_replace_vs_append_no_interleaving() {
657        // Full document structure (template mode)
658        let header = "---\nagent_doc_format: template\n---\n\n# Title\n\n<!-- agent:exchange -->\n";
659        let footer = "\n<!-- /agent:exchange -->\n";
660
661        // Previous cycle's exchange content (what the CRDT state contains)
662        let old_exchange = "\
663### Committed, Pushed & Released
664
665**project (v0.1.0):**
666- Committed initial implementation
667- Tagged v0.1.0 and pushed
668
669Add a README.md to the project.
670Also add AGENTS.md with a symlink CLAUDE.md
671
672**sub-project:**
673- Committed fix + SPEC.md
674- Pushed to remote
675";
676        let stale_base = format!("{header}{old_exchange}{footer}");
677        let stale_state = CrdtDoc::from_text(&stale_base).encode_state();
678
679        // Baseline (what the file looked like when response generation started)
680        // Same as stale_base in this case — no user edits between cycles
681        let _baseline = stale_base.clone();
682
683        // Ours: agent replaces exchange content (template replace mode applied)
684        let agent_exchange = "\
685### Done
686
687Added to project and pushed:
688
689- **README.md** — overview, usage, design notes
690- **AGENTS.md** — architecture, key decisions, commands, related projects
691- **CLAUDE.md** → symlink to AGENTS.md
692
693All committed and pushed.
694";
695        let ours = format!("{header}{agent_exchange}{footer}");
696
697        // Theirs: user inserted new prompt IN THE MIDDLE of the exchange section
698        // (after the existing user prompt, before the sub-project sections)
699        // This is the critical difference — insertion within the range that ours deletes
700        let theirs_exchange = "\
701### Committed, Pushed & Released
702
703**project (v0.1.0):**
704- Committed initial implementation
705- Tagged v0.1.0 and pushed
706
707Add a README.md to the project.
708Also add AGENTS.md with a symlink CLAUDE.md
709
710Please add tests.
711Please comprehensively test adherence to the spec.
712
713**sub-project:**
714- Committed fix + SPEC.md
715- Pushed to remote
716";
717        let theirs = format!("{header}{theirs_exchange}{footer}");
718
719        // Using stale CRDT state (previous cycle) — this is what triggers the bug
720        let merged = merge(Some(&stale_state), &ours, &theirs).unwrap();
721
722        // Agent's replacement text should be contiguous (no interleaving)
723        assert!(
724            merged.contains("- **AGENTS.md** — architecture, key decisions, commands, related projects"),
725            "Agent text garbled (mid-word split). Got:\n{}", merged
726        );
727
728        // User's addition should be preserved
729        assert!(
730            merged.contains("Please add tests."),
731            "User addition missing. Got:\n{}", merged
732        );
733
734        // No fragments of old content mixed into agent's new content
735        assert!(
736            !merged.contains("key deAdd") && !merged.contains("key de\n"),
737            "Old content interleaved into agent text. Got:\n{}", merged
738        );
739    }
740
741    /// Same as merge_replace_vs_append_no_interleaving but using baseline
742    /// as CRDT base instead of stale state. This is the fix verification.
743    #[test]
744    fn merge_replace_vs_append_with_baseline_base() {
745        let header = "---\nagent_doc_format: template\n---\n\n# Title\n\n<!-- agent:exchange -->\n";
746        let footer = "\n<!-- /agent:exchange -->\n";
747
748        let old_exchange = "\
749### Previous Response
750
751Old content here.
752
753Add a README.md to the project.
754Also add AGENTS.md with a symlink CLAUDE.md
755";
756        let baseline = format!("{header}{old_exchange}{footer}");
757
758        // Ours: agent replaces exchange
759        let agent_exchange = "\
760### Done
761
762- **README.md** — overview, usage, design notes
763- **AGENTS.md** — architecture, key decisions, commands, related projects
764- **CLAUDE.md** → symlink to AGENTS.md
765
766All committed and pushed.
767";
768        let ours = format!("{header}{agent_exchange}{footer}");
769
770        // Theirs: user appended new prompt
771        let user_addition = "\nPlease add tests.\n";
772        let theirs = format!("{header}{old_exchange}{user_addition}{footer}");
773
774        // Use baseline as CRDT base (the fix)
775        let baseline_state = CrdtDoc::from_text(&baseline).encode_state();
776        let merged = merge(Some(&baseline_state), &ours, &theirs).unwrap();
777
778        // Agent text should be contiguous
779        assert!(
780            merged.contains("key decisions, commands, related projects"),
781            "Agent text garbled. Got:\n{}", merged
782        );
783
784        // User addition preserved
785        assert!(
786            merged.contains("Please add tests."),
787            "User addition missing. Got:\n{}", merged
788        );
789    }
790
791    /// Regression test: Simulates the exact scenario from the bug report.
792    ///
793    /// The agent streams a response into the exchange component while
794    /// the user types in their editor. Both sides share a common prefix
795    /// that includes markdown formatting characters. The CRDT merge must
796    /// preserve formatting integrity for both sides.
797    #[test]
798    fn merge_streaming_concurrent_edit_preserves_formatting() {
799        // Exchange component content after user's initial prompt
800        let base = "commit and push all rappstack packages.\n\n";
801        let base_doc = CrdtDoc::from_text(base);
802        let base_state = base_doc.encode_state();
803
804        // Agent's response (content_ours = user prompt + agent response)
805        let ours = "\
806commit and push all rappstack packages.
807
808### Re: commit and push
809
810*Compacted. Content archived to `docs/`*
811
812Done — all packages pushed.
813";
814
815        // User's concurrent edit (added a note at the bottom)
816        let theirs = "\
817commit and push all rappstack packages.
818
819**Soft-bristle brush only**
820";
821
822        let merged = merge(Some(&base_state), ours, theirs).unwrap();
823
824        // Agent formatting must be intact
825        assert!(
826            merged.contains("*Compacted. Content archived to `docs/`*"),
827            "Agent formatting broken. Got:\n{}",
828            merged
829        );
830        // User formatting must be intact
831        assert!(
832            merged.contains("**Soft-bristle brush only**"),
833            "User formatting broken. Got:\n{}",
834            merged
835        );
836        // No character-level interleaving
837        assert!(
838            !merged.contains("*C*C") && !merged.contains("**Sot"),
839            "Character interleaving detected. Got:\n{}",
840            merged
841        );
842    }
843
844    /// Regression test: Agent replaces multi-line block while user inserts within it.
845    /// With from_chars, this produces ~20 scattered character-level ops that interleave
846    /// with user edits. With from_lines, ops are contiguous line-level blocks.
847    ///
848    /// Uses a template document structure to match the real workflow where the baseline
849    /// (common ancestor) contains the exchange component with original content.
850    #[test]
851    fn merge_replace_vs_insert_no_interleaving() {
852        let header = "---\nagent_doc_format: template\nagent_doc_write: crdt\n---\n\n# Document Title\n\nSome preamble text that both sides share.\nThis provides enough common prefix to avoid stale detection.\n\n<!-- agent:exchange -->\n";
853        let footer = "<!-- /agent:exchange -->\n";
854
855        let old_exchange = "Line one of old content\nLine two of old content\nLine three of old content\n";
856        let baseline = format!("{header}{old_exchange}{footer}");
857        let baseline_doc = CrdtDoc::from_text(&baseline);
858        let baseline_state = baseline_doc.encode_state();
859
860        // Agent replaces exchange with completely new content
861        let agent_exchange = "Completely new line one\nCompletely new line two\nCompletely new line three\nCompletely new line four\n";
862        let ours = format!("{header}{agent_exchange}{footer}");
863
864        // User inserts a line in the middle of the original exchange
865        let theirs = format!("{header}Line one of old content\nUser inserted this line\nLine two of old content\nLine three of old content\n{footer}");
866
867        let merged = merge(Some(&baseline_state), &ours, &theirs).unwrap();
868
869        // Agent text should be contiguous — no mid-word splits
870        assert!(
871            merged.contains("Completely new line one"),
872            "Agent line 1 missing or garbled. Got:\n{}", merged
873        );
874        assert!(
875            merged.contains("Completely new line two"),
876            "Agent line 2 missing or garbled. Got:\n{}", merged
877        );
878
879        // User text should be preserved
880        assert!(
881            merged.contains("User inserted this line"),
882            "User insertion missing. Got:\n{}", merged
883        );
884
885        // No character interleaving (e.g., "Complete" + user text + "ly")
886        assert!(
887            !merged.contains("CompleteUser") && !merged.contains("Complete\nUser"),
888            "Character interleaving detected. Got:\n{}", merged
889        );
890    }
891
892    // -----------------------------------------------------------------------
893    // dedup_adjacent_blocks tests (#15)
894    // -----------------------------------------------------------------------
895
896    #[test]
897    fn dedup_removes_identical_adjacent_blocks() {
898        let text = "### Re: Question\nAnswer here.\n\n### Re: Question\nAnswer here.\n\nDifferent block.";
899        let result = dedup_adjacent_blocks(text);
900        assert_eq!(result.matches("### Re: Question").count(), 1);
901        assert!(result.contains("Different block."));
902    }
903
904    #[test]
905    fn dedup_preserves_different_adjacent_blocks() {
906        let text = "### Re: First\nAnswer one.\n\n### Re: Second\nAnswer two.";
907        let result = dedup_adjacent_blocks(text);
908        assert!(result.contains("### Re: First"));
909        assert!(result.contains("### Re: Second"));
910    }
911
912    #[test]
913    fn dedup_ignores_short_repeated_lines() {
914        // Single-line blocks like "---" should not be deduped
915        let text = "---\n\n---\n\nContent.";
916        let result = dedup_adjacent_blocks(text);
917        assert_eq!(result, text);
918    }
919
920    #[test]
921    fn dedup_handles_empty_text() {
922        assert_eq!(dedup_adjacent_blocks(""), "");
923    }
924
925    #[test]
926    fn dedup_no_change_when_no_duplicates() {
927        let text = "Block A\nLine 2.\n\nBlock B\nLine 2.";
928        let result = dedup_adjacent_blocks(text);
929        assert_eq!(result, text);
930    }
931}