Skip to main content

agent_doc/
crdt.rs

1use anyhow::{Context, Result};
2use yrs::types::text::{ChangeKind, YChange};
3use yrs::updates::decoder::Decode;
4use yrs::{Doc, GetString, ReadTxn, Text, TextRef, Transact, Update};
5
6const TEXT_KEY: &str = "content";
7
8/// CRDT document wrapping a Yjs `Doc` for conflict-free merging.
9pub struct CrdtDoc {
10    doc: Doc,
11}
12
13impl CrdtDoc {
14    /// Create a new CRDT document initialized with the given text content.
15    pub fn from_text(content: &str) -> Self {
16        let doc = Doc::new();
17        let text = doc.get_or_insert_text(TEXT_KEY);
18        let mut txn = doc.transact_mut();
19        text.insert(&mut txn, 0, content);
20        drop(txn);
21        CrdtDoc { doc }
22    }
23
24    /// Extract the current text content from the CRDT document.
25    pub fn to_text(&self) -> String {
26        let text = self.doc.get_or_insert_text(TEXT_KEY);
27        let txn = self.doc.transact();
28        text.get_string(&txn)
29    }
30
31    /// Apply a local edit: delete `delete_len` chars at `offset`, then insert `insert` there.
32    #[allow(dead_code)] // Used in tests and Phase 4 stream write-back
33    pub fn apply_edit(&self, offset: u32, delete_len: u32, insert: &str) {
34        let text = self.doc.get_or_insert_text(TEXT_KEY);
35        let mut txn = self.doc.transact_mut();
36        if delete_len > 0 {
37            text.remove_range(&mut txn, offset, delete_len);
38        }
39        if !insert.is_empty() {
40            text.insert(&mut txn, offset, insert);
41        }
42    }
43
44    /// Encode the full document state (for persistence).
45    pub fn encode_state(&self) -> Vec<u8> {
46        let txn = self.doc.transact();
47        txn.encode_state_as_update_v1(&yrs::StateVector::default())
48    }
49
50    /// Decode a previously encoded state into a new CrdtDoc.
51    pub fn decode_state(bytes: &[u8]) -> Result<Self> {
52        let doc = Doc::new();
53        let update = Update::decode_v1(bytes)
54            .map_err(|e| anyhow::anyhow!("failed to decode CRDT state: {}", e))?;
55        let mut txn = doc.transact_mut();
56        txn.apply_update(update)
57            .map_err(|e| anyhow::anyhow!("failed to apply CRDT update: {}", e))?;
58        drop(txn);
59        Ok(CrdtDoc { doc })
60    }
61}
62
63/// Reorder agent content (client_id=1) before human content (client_id=2)
64/// at the append boundary after a CRDT merge.
65///
66/// After merging, when both sides append to the same position, the CRDT may
67/// place human content before agent content. This function detects that case
68/// and swaps the groups so agent content appears first at the boundary.
69fn reorder_agent_before_human(doc: &Doc) -> String {
70    let text = doc.get_or_insert_text(TEXT_KEY);
71    let txn = doc.transact();
72    let chunks = text.diff(&txn, YChange::identity);
73
74    if chunks.is_empty() {
75        return text.get_string(&txn);
76    }
77
78    // Extract text content from each chunk along with its attribution
79    #[derive(Debug)]
80    struct Chunk {
81        text: String,
82        is_added: bool,
83        client: u64,
84    }
85
86    let mut parts: Vec<Chunk> = Vec::new();
87    for chunk in &chunks {
88        let s = chunk.insert.clone().to_string(&txn);
89        let (is_added, client) = match &chunk.ychange {
90            Some(yc) if yc.kind == ChangeKind::Added => (true, yc.id.client),
91            _ => (false, 0),
92        };
93        parts.push(Chunk {
94            text: s,
95            is_added,
96            client,
97        });
98    }
99
100    // Find the append boundary: contiguous Added chunks at the end
101    let boundary_start = {
102        let mut i = parts.len();
103        while i > 0 && parts[i - 1].is_added {
104            i -= 1;
105        }
106        i
107    };
108
109    // If no boundary or only one chunk in the boundary, nothing to reorder
110    if boundary_start >= parts.len() || (parts.len() - boundary_start) < 2 {
111        return text.get_string(&txn);
112    }
113
114    let boundary = &parts[boundary_start..];
115
116    // Check if there's a human chunk (client=2) before an agent chunk (client=1)
117    // within the boundary. If so, we need to reorder.
118    let mut found_human_before_agent = false;
119    let mut seen_human = false;
120    for chunk in boundary {
121        if chunk.client == 2 {
122            seen_human = true;
123        } else if chunk.client == 1 && seen_human {
124            found_human_before_agent = true;
125            break;
126        }
127    }
128
129    if !found_human_before_agent {
130        return text.get_string(&txn);
131    }
132
133    // Separate boundary chunks into agent group and human group
134    let mut agent_text = String::new();
135    let mut human_text = String::new();
136    for chunk in boundary {
137        if chunk.client == 1 {
138            agent_text.push_str(&chunk.text);
139        } else if chunk.client == 2 {
140            human_text.push_str(&chunk.text);
141        } else {
142            // Other clients — append to human group as a fallback
143            human_text.push_str(&chunk.text);
144        }
145    }
146
147    eprintln!(
148        "[crdt] reorder: moving agent content ({} bytes) before human content ({} bytes) at append boundary",
149        agent_text.len(),
150        human_text.len()
151    );
152
153    // Reconstruct: prefix + agent + human
154    let mut result = String::new();
155    for chunk in &parts[..boundary_start] {
156        result.push_str(&chunk.text);
157    }
158    result.push_str(&agent_text);
159    result.push_str(&human_text);
160
161    result
162}
163
164/// Merge two concurrent text versions against a common base using CRDT.
165///
166/// Creates three CRDT actors: base, ours, theirs.
167/// Applies each side's edits as diffs from the base, then merges updates.
168/// Returns the merged text (conflict-free).
169///
170/// **Stale base detection:** If the CRDT base text doesn't match either ours
171/// or theirs as a prefix/substring, the base is stale. In that case, we use
172/// `ours_text` as the base to prevent duplicate insertions.
173pub fn merge(base_state: Option<&[u8]>, ours_text: &str, theirs_text: &str) -> Result<String> {
174    // Short-circuit: if both sides are identical, no merge needed
175    if ours_text == theirs_text {
176        eprintln!("[crdt] ours == theirs, skipping merge");
177        return Ok(ours_text.to_string());
178    }
179
180    // Bootstrap base doc from state or empty
181    let base_doc = if let Some(bytes) = base_state {
182        CrdtDoc::decode_state(bytes)
183            .context("failed to decode base CRDT state")?
184    } else {
185        CrdtDoc::from_text("")
186    };
187    let mut base_text = base_doc.to_text();
188
189    eprintln!(
190        "[crdt] merge: base_len={} ours_len={} theirs_len={}",
191        base_text.len(),
192        ours_text.len(),
193        theirs_text.len()
194    );
195
196    // Stale base detection: if the base text doesn't share a common prefix
197    // with both sides, it's stale. Use ours as the base instead.
198    // This prevents duplicate insertions when both sides contain text
199    // that the stale base doesn't have.
200    let ours_common = common_prefix_len(&base_text, ours_text);
201    let theirs_common = common_prefix_len(&base_text, theirs_text);
202    let base_len = base_text.len();
203
204    if base_len > 0
205        && (ours_common as f64 / base_len as f64) < 0.5
206        && (theirs_common as f64 / base_len as f64) < 0.5
207    {
208        eprintln!(
209            "[crdt] Stale CRDT base detected (common prefix: ours={}%, theirs={}%). Using ours as base.",
210            (ours_common * 100) / base_len,
211            (theirs_common * 100) / base_len
212        );
213        base_text = ours_text.to_string();
214    }
215
216    // Advance base to the common prefix of ours and theirs when it extends
217    // beyond the current base.
218    //
219    // When both ours and theirs independently added the same text beyond the
220    // stale base (e.g., both contain a user prompt that the base doesn't have),
221    // the CRDT treats each insertion as independent and includes both, causing
222    // duplication. Fix: use the common prefix of ours and theirs as the effective
223    // base, so shared additions are not treated as independent insertions.
224    //
225    // This handles the common pattern where:
226    //   base   = "old content"
227    //   ours   = "old content + user prompt + agent response"
228    //   theirs = "old content + user prompt + small edit"
229    // Without fix: user prompt appears twice (from both sides).
230    // With fix: base advances to "old content + user prompt", ours' diff is
231    //           just the agent response, theirs' diff is just the small edit.
232    let mutual_prefix = common_prefix_len(ours_text, theirs_text);
233    if mutual_prefix > base_text.len() {
234        // Snap to a line boundary to avoid splitting mid-line/mid-word.
235        // Without this, the shared prefix can include partial formatting
236        // sequences (e.g., a leading `*` from `**bold**`), causing the
237        // CRDT merge to separate that character from the rest of the
238        // formatting, producing garbled text like `*Soft-bristle brush only**`
239        // instead of `**Soft-bristle brush only**`.
240        let snap = &ours_text[..mutual_prefix];
241        let snapped = match snap.rfind('\n') {
242            Some(pos) if pos >= base_text.len() => pos + 1,
243            _ => base_text.len(), // no suitable line boundary — don't advance
244        };
245        if snapped > base_text.len() {
246            eprintln!(
247                "[crdt] Advancing base to shared prefix (base_len={} → {})",
248                base_text.len(),
249                snapped
250            );
251            base_text = ours_text[..snapped].to_string();
252        }
253    }
254
255    // Compute diffs from base to each side
256    let ours_ops = compute_edit_ops(&base_text, ours_text);
257    let theirs_ops = compute_edit_ops(&base_text, theirs_text);
258
259    // Create two independent docs from the base state.
260    // If base was overridden (stale detection), rebuild from the new base_text.
261    let base_encoded = if base_text == base_doc.to_text() {
262        base_doc.encode_state()
263    } else {
264        CrdtDoc::from_text(&base_text).encode_state()
265    };
266
267    let ours_doc = Doc::with_client_id(1);
268    {
269        let update = Update::decode_v1(&base_encoded)
270            .map_err(|e| anyhow::anyhow!("decode error: {}", e))?;
271        let mut txn = ours_doc.transact_mut();
272        txn.apply_update(update)
273            .map_err(|e| anyhow::anyhow!("apply error: {}", e))?;
274    }
275
276    let theirs_doc = Doc::with_client_id(2);
277    {
278        let update = Update::decode_v1(&base_encoded)
279            .map_err(|e| anyhow::anyhow!("decode error: {}", e))?;
280        let mut txn = theirs_doc.transact_mut();
281        txn.apply_update(update)
282            .map_err(|e| anyhow::anyhow!("apply error: {}", e))?;
283    }
284
285    // Apply ours edits
286    {
287        let text = ours_doc.get_or_insert_text(TEXT_KEY);
288        let mut txn = ours_doc.transact_mut();
289        apply_ops(&text, &mut txn, &ours_ops);
290    }
291
292    // Apply theirs edits
293    {
294        let text = theirs_doc.get_or_insert_text(TEXT_KEY);
295        let mut txn = theirs_doc.transact_mut();
296        apply_ops(&text, &mut txn, &theirs_ops);
297    }
298
299    // Merge: apply theirs' changes into ours
300    let ours_sv = {
301        let txn = ours_doc.transact();
302        txn.state_vector()
303    };
304    let theirs_update = {
305        let txn = theirs_doc.transact();
306        txn.encode_state_as_update_v1(&ours_sv)
307    };
308    {
309        let update = Update::decode_v1(&theirs_update)
310            .map_err(|e| anyhow::anyhow!("decode error: {}", e))?;
311        let mut txn = ours_doc.transact_mut();
312        txn.apply_update(update)
313            .map_err(|e| anyhow::anyhow!("apply error: {}", e))?;
314    }
315
316    // Read merged result with agent-before-human reordering at append boundary
317    let merged = reorder_agent_before_human(&ours_doc);
318
319    // Post-merge dedup: remove identical adjacent blocks (#15)
320    Ok(dedup_adjacent_blocks(&merged))
321}
322
323/// Remove identical adjacent text blocks separated by blank lines.
324///
325/// After a CRDT merge, both sides may independently append the same content
326/// (e.g., a `### Re:` section), resulting in duplicate adjacent blocks.
327/// This pass identifies and removes duplicates while preserving intentionally
328/// repeated content (only dedup blocks >= 2 non-empty lines to avoid
329/// false positives on short repeated lines like `---` or blank lines).
330pub fn dedup_adjacent_blocks(text: &str) -> String {
331    let blocks: Vec<&str> = text.split("\n\n").collect();
332    if blocks.len() < 2 {
333        return text.to_string();
334    }
335
336    let mut result: Vec<&str> = Vec::with_capacity(blocks.len());
337    for block in &blocks {
338        let trimmed = block.trim();
339        // Only dedup substantial blocks (>= 2 non-empty lines)
340        let non_empty_lines = trimmed.lines().filter(|l| !l.trim().is_empty()).count();
341        if non_empty_lines >= 2
342            && let Some(prev) = result.last()
343            && prev.trim() == trimmed
344        {
345            eprintln!("[crdt] dedup: removed duplicate block ({} lines)", non_empty_lines);
346            continue;
347        }
348        result.push(*block);
349    }
350
351    result.join("\n\n")
352}
353
354/// Compact a CRDT state by re-encoding (GC tombstones where possible).
355pub fn compact(state: &[u8]) -> Result<Vec<u8>> {
356    let doc = CrdtDoc::decode_state(state)?;
357    Ok(doc.encode_state())
358}
359
360/// Count the number of bytes in the common prefix of two strings.
361fn common_prefix_len(a: &str, b: &str) -> usize {
362    a.bytes().zip(b.bytes()).take_while(|(x, y)| x == y).count()
363}
364
365/// Edit operation for replaying diffs onto a CRDT text.
366#[derive(Debug)]
367enum EditOp {
368    Retain(u32),
369    Delete(u32),
370    Insert(String),
371}
372
373/// Compute edit operations to transform `from` into `to` using `similar` diff.
374fn compute_edit_ops(from: &str, to: &str) -> Vec<EditOp> {
375    use similar::{ChangeTag, TextDiff};
376
377    let diff = TextDiff::from_lines(from, to);
378    let mut ops = Vec::new();
379
380    for change in diff.iter_all_changes() {
381        match change.tag() {
382            ChangeTag::Equal => {
383                let len = change.value().len() as u32;
384                if let Some(EditOp::Retain(n)) = ops.last_mut() {
385                    *n += len;
386                } else {
387                    ops.push(EditOp::Retain(len));
388                }
389            }
390            ChangeTag::Delete => {
391                let len = change.value().len() as u32;
392                if let Some(EditOp::Delete(n)) = ops.last_mut() {
393                    *n += len;
394                } else {
395                    ops.push(EditOp::Delete(len));
396                }
397            }
398            ChangeTag::Insert => {
399                let s = change.value().to_string();
400                if let Some(EditOp::Insert(existing)) = ops.last_mut() {
401                    existing.push_str(&s);
402                } else {
403                    ops.push(EditOp::Insert(s));
404                }
405            }
406        }
407    }
408
409    ops
410}
411
412/// Apply edit operations to a Yrs text type within a transaction.
413fn apply_ops(text: &TextRef, txn: &mut yrs::TransactionMut<'_>, ops: &[EditOp]) {
414    let mut cursor: u32 = 0;
415    for op in ops {
416        match op {
417            EditOp::Retain(n) => cursor += n,
418            EditOp::Delete(n) => {
419                text.remove_range(txn, cursor, *n);
420                // cursor stays — content shifted left
421            }
422            EditOp::Insert(s) => {
423                text.insert(txn, cursor, s);
424                cursor += s.len() as u32;
425            }
426        }
427    }
428}
429
430#[cfg(test)]
431mod tests {
432    use super::*;
433
434    #[test]
435    fn roundtrip_text() {
436        let content = "Hello, world!\nLine two.\n";
437        let doc = CrdtDoc::from_text(content);
438        assert_eq!(doc.to_text(), content);
439    }
440
441    #[test]
442    fn roundtrip_encode_decode() {
443        let content = "Some document content.\n";
444        let doc = CrdtDoc::from_text(content);
445        let encoded = doc.encode_state();
446        let decoded = CrdtDoc::decode_state(&encoded).unwrap();
447        assert_eq!(decoded.to_text(), content);
448    }
449
450    #[test]
451    fn apply_edit_insert() {
452        let doc = CrdtDoc::from_text("Hello world");
453        doc.apply_edit(5, 0, ",");
454        assert_eq!(doc.to_text(), "Hello, world");
455    }
456
457    #[test]
458    fn apply_edit_delete() {
459        let doc = CrdtDoc::from_text("Hello, world");
460        doc.apply_edit(5, 1, "");
461        assert_eq!(doc.to_text(), "Hello world");
462    }
463
464    #[test]
465    fn apply_edit_replace() {
466        let doc = CrdtDoc::from_text("Hello world");
467        doc.apply_edit(6, 5, "Rust");
468        assert_eq!(doc.to_text(), "Hello Rust");
469    }
470
471    #[test]
472    fn concurrent_append_merge_no_conflict() {
473        let base = "# Document\n\nBase content.\n";
474        let base_doc = CrdtDoc::from_text(base);
475        let base_state = base_doc.encode_state();
476
477        let ours = format!("{base}## Agent\n\nAgent response.\n");
478        let theirs = format!("{base}## User\n\nUser addition.\n");
479
480        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
481
482        // Both additions should be present
483        assert!(merged.contains("Agent response."), "missing agent text");
484        assert!(merged.contains("User addition."), "missing user text");
485        assert!(merged.contains("Base content."), "missing base text");
486        // No conflict markers
487        assert!(!merged.contains("<<<<<<<"));
488        assert!(!merged.contains(">>>>>>>"));
489    }
490
491    #[test]
492    fn concurrent_insert_same_position() {
493        let base = "Line 1\nLine 3\n";
494        let base_doc = CrdtDoc::from_text(base);
495        let base_state = base_doc.encode_state();
496
497        let ours = "Line 1\nAgent line\nLine 3\n";
498        let theirs = "Line 1\nUser line\nLine 3\n";
499
500        let merged = merge(Some(&base_state), ours, theirs).unwrap();
501
502        // Both insertions preserved, no conflict
503        assert!(merged.contains("Agent line"), "missing agent insertion");
504        assert!(merged.contains("User line"), "missing user insertion");
505        assert!(merged.contains("Line 1"), "missing line 1");
506        assert!(merged.contains("Line 3"), "missing line 3");
507    }
508
509    #[test]
510    fn merge_no_base_state() {
511        // When no base state exists, bootstrap from empty
512        let ours = "Agent wrote this.\n";
513        let theirs = "User wrote this.\n";
514
515        let merged = merge(None, ours, theirs).unwrap();
516
517        assert!(merged.contains("Agent wrote this."));
518        assert!(merged.contains("User wrote this."));
519    }
520
521    #[test]
522    fn compact_preserves_content() {
523        let doc = CrdtDoc::from_text("Hello");
524        doc.apply_edit(5, 0, " world");
525        doc.apply_edit(11, 0, "!");
526
527        let state = doc.encode_state();
528        let compacted = compact(&state).unwrap();
529        let restored = CrdtDoc::decode_state(&compacted).unwrap();
530
531        assert_eq!(restored.to_text(), "Hello world!");
532        assert!(compacted.len() <= state.len());
533    }
534
535    #[test]
536    fn compact_reduces_size_after_edits() {
537        let doc = CrdtDoc::from_text("aaaa");
538        // Many small edits to build up tombstones
539        for i in 0..20 {
540            let c = ((b'a' + (i % 26)) as char).to_string();
541            doc.apply_edit(0, 1, &c);
542        }
543        let state = doc.encode_state();
544        let compacted = compact(&state).unwrap();
545        let restored = CrdtDoc::decode_state(&compacted).unwrap();
546        assert_eq!(restored.to_text(), doc.to_text());
547    }
548
549    #[test]
550    fn empty_document() {
551        let doc = CrdtDoc::from_text("");
552        assert_eq!(doc.to_text(), "");
553
554        let encoded = doc.encode_state();
555        let decoded = CrdtDoc::decode_state(&encoded).unwrap();
556        assert_eq!(decoded.to_text(), "");
557    }
558
559    #[test]
560    fn decode_invalid_bytes_errors() {
561        let result = CrdtDoc::decode_state(&[0xff, 0xfe, 0xfd]);
562        assert!(result.is_err());
563    }
564
565    #[test]
566    fn merge_identical_texts() {
567        let base = "Same content.\n";
568        let base_doc = CrdtDoc::from_text(base);
569        let state = base_doc.encode_state();
570
571        let merged = merge(Some(&state), base, base).unwrap();
572        assert_eq!(merged, base);
573    }
574
575    #[test]
576    fn merge_one_side_unchanged() {
577        let base = "Original.\n";
578        let base_doc = CrdtDoc::from_text(base);
579        let state = base_doc.encode_state();
580
581        let ours = "Original.\nAgent added.\n";
582        let merged = merge(Some(&state), ours, base).unwrap();
583        assert_eq!(merged, ours);
584    }
585
586    /// Regression test: CRDT merge should not duplicate user prompt when both
587    /// ours and theirs contain the same text added since the base state.
588    ///
589    /// Scenario (brookebrodack-dev.md duplication bug):
590    /// 1. CRDT base = exchange content from a previous cycle (no user prompt)
591    /// 2. User adds prompt to exchange → saved as baseline
592    /// 3. Agent generates response, content_ours = baseline + response (has user prompt)
593    /// 4. User makes a small edit during response generation → content_current (has user prompt too)
594    /// 5. CRDT merge: both ours and theirs have the user prompt relative to stale base
595    /// 6. BUG: user prompt appears twice in merged output
596    #[test]
597    fn merge_stale_base_no_duplicate_user_prompt() {
598        // CRDT base from a previous cycle — does NOT have the user's current prompt
599        let base_content = "\
600## Assistant
601
602Previous response content.
603
604Committed and pushed.
605
606";
607        let base_doc = CrdtDoc::from_text(base_content);
608        let base_state = base_doc.encode_state();
609
610        // User adds prompt after base was saved
611        let user_prompt = "\
612Opening a video a shows video a.
613Closing video a then opening video b start video b but video b is hidden.
614Closing video b then reopening video b starts and shows video b. video b is visible.
615";
616
617        // content_ours: base + user prompt + agent response (from run_stream with full exchange)
618        let ours = format!("\
619{}{}### Re: Close A → Open B still hidden
620
621Added explicit height and visibility reset.
622
623Committed and pushed.
624
625", base_content, user_prompt);
626
627        // content_current: base + user prompt + minor user edit (e.g., added a blank line)
628        let theirs = format!("\
629{}{}
630", base_content, user_prompt);
631
632        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
633
634        // User prompt should appear exactly ONCE
635        let prompt_count = merged.matches("Opening a video a shows video a.").count();
636        assert_eq!(
637            prompt_count, 1,
638            "User prompt duplicated! Appeared {} times in:\n{}",
639            prompt_count, merged
640        );
641
642        // Agent response should be present
643        assert!(
644            merged.contains("### Re: Close A → Open B still hidden"),
645            "Agent response missing from merge:\n{}", merged
646        );
647    }
648
649    /// Regression test: When CRDT base is stale and both sides added the same text
650    /// at the same position, the merge should not duplicate it.
651    #[test]
652    fn merge_stale_base_same_insertion_both_sides() {
653        let base_content = "Line 1\nLine 2\n";
654        let base_doc = CrdtDoc::from_text(base_content);
655        let base_state = base_doc.encode_state();
656
657        // Both sides added the same text (user prompt) + ours adds more
658        let shared_addition = "User typed this.\n";
659        let ours = format!("{}{}Agent response.\n", base_content, shared_addition);
660        let theirs = format!("{}{}", base_content, shared_addition);
661
662        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
663
664        let count = merged.matches("User typed this.").count();
665        assert_eq!(
666            count, 1,
667            "Shared text duplicated! Appeared {} times in:\n{}",
668            count, merged
669        );
670        assert!(merged.contains("Agent response."), "Agent text missing:\n{}", merged);
671    }
672
673    /// Regression test: Character-level interleaving bug.
674    ///
675    /// When the user types in their editor while the agent is streaming,
676    /// both sides insert text at the same position relative to the base.
677    /// The CRDT base advancement logic used to snap to the shared prefix
678    /// of ours/theirs, which could land mid-line on a shared formatting
679    /// character (e.g., `*` from `*bold*` and `**bold**`). This caused
680    /// the formatting character to be absorbed into the base, splitting
681    /// it from the rest of the formatting sequence and producing garbled
682    /// text like `*Soft-bristle brush only**` instead of
683    /// `**Soft-bristle brush only**`.
684    ///
685    /// The fix: always snap the advanced base to a line boundary. If no
686    /// suitable line boundary exists after the current base length, don't
687    /// advance at all.
688    #[test]
689    fn merge_no_character_interleaving() {
690        // Base: a document with some existing content
691        let base = "# Doc\n\nPrevious content.\n\n";
692        let base_doc = CrdtDoc::from_text(base);
693        let base_state = base_doc.encode_state();
694
695        // Agent adds a response
696        let ours = "# Doc\n\nPrevious content.\n\n*Compacted. Content archived to*\n";
697        // User types something in their editor at the same position
698        let theirs = "# Doc\n\nPrevious content.\n\n**Soft-bristle brush only**\n";
699
700        let merged = merge(Some(&base_state), ours, theirs).unwrap();
701
702        // Both texts should be present as contiguous blocks, not interleaved
703        assert!(
704            merged.contains("*Compacted. Content archived to*"),
705            "Agent text should be contiguous (not interleaved). Got:\n{}",
706            merged
707        );
708        assert!(
709            merged.contains("**Soft-bristle brush only**"),
710            "User text should be contiguous (not interleaved). Got:\n{}",
711            merged
712        );
713    }
714
715    /// Regression test: Concurrent edits within the same line should not
716    /// produce character-level interleaving.
717    #[test]
718    fn merge_concurrent_same_line_no_garbling() {
719        let base = "Some base text\n";
720        let base_doc = CrdtDoc::from_text(base);
721        let base_state = base_doc.encode_state();
722
723        // Both sides replace the line with different content
724        let ours = "Agent wrote this line\n";
725        let theirs = "User wrote different text\n";
726
727        let merged = merge(Some(&base_state), ours, theirs).unwrap();
728
729        // At least one side's text should appear contiguously
730        let has_agent_contiguous = merged.contains("Agent wrote this line");
731        let has_user_contiguous = merged.contains("User wrote different text");
732
733        assert!(
734            has_agent_contiguous || has_user_contiguous,
735            "At least one side should have contiguous text (no char interleaving). Got:\n{}",
736            merged
737        );
738    }
739
740    /// Regression test: Replace-vs-append corruption (lazily-rs.md bug).
741    ///
742    /// Pattern:
743    /// - CRDT base is from a previous cycle (old exchange content)
744    /// - Agent replaces exchange content entirely (template replace mode)
745    /// - User appends new prompt text to exchange during response generation
746    /// - CRDT interleaves agent's new content with user's old + new text,
747    ///   causing mid-word splits like "key de" + [user text] + "cisions"
748    ///
749    /// Root cause: stale CRDT base doesn't match either side well enough
750    /// for prefix advancement, so the CRDT does a raw character-level merge
751    /// of the exchange section, interleaving replace and append operations.
752    ///
753    /// Fix: use baseline (not stored CRDT state) as merge base, so both
754    /// sides' diffs are computed from the exact content they diverged from.
755    #[test]
756    fn merge_replace_vs_append_no_interleaving() {
757        // Full document structure (template mode)
758        let header = "---\nagent_doc_format: template\n---\n\n# Title\n\n<!-- agent:exchange -->\n";
759        let footer = "\n<!-- /agent:exchange -->\n";
760
761        // Previous cycle's exchange content (what the CRDT state contains)
762        let old_exchange = "\
763### Committed, Pushed & Released
764
765**project (v0.1.0):**
766- Committed initial implementation
767- Tagged v0.1.0 and pushed
768
769Add a README.md to the project.
770Also add AGENTS.md with a symlink CLAUDE.md
771
772**sub-project:**
773- Committed fix + SPEC.md
774- Pushed to remote
775";
776        let stale_base = format!("{header}{old_exchange}{footer}");
777        let stale_state = CrdtDoc::from_text(&stale_base).encode_state();
778
779        // Baseline (what the file looked like when response generation started)
780        // Same as stale_base in this case — no user edits between cycles
781        let _baseline = stale_base.clone();
782
783        // Ours: agent replaces exchange content (template replace mode applied)
784        let agent_exchange = "\
785### Done
786
787Added to project and pushed:
788
789- **README.md** — overview, usage, design notes
790- **AGENTS.md** — architecture, key decisions, commands, related projects
791- **CLAUDE.md** → symlink to AGENTS.md
792
793All committed and pushed.
794";
795        let ours = format!("{header}{agent_exchange}{footer}");
796
797        // Theirs: user inserted new prompt IN THE MIDDLE of the exchange section
798        // (after the existing user prompt, before the sub-project sections)
799        // This is the critical difference — insertion within the range that ours deletes
800        let theirs_exchange = "\
801### Committed, Pushed & Released
802
803**project (v0.1.0):**
804- Committed initial implementation
805- Tagged v0.1.0 and pushed
806
807Add a README.md to the project.
808Also add AGENTS.md with a symlink CLAUDE.md
809
810Please add tests.
811Please comprehensively test adherence to the spec.
812
813**sub-project:**
814- Committed fix + SPEC.md
815- Pushed to remote
816";
817        let theirs = format!("{header}{theirs_exchange}{footer}");
818
819        // Using stale CRDT state (previous cycle) — this is what triggers the bug
820        let merged = merge(Some(&stale_state), &ours, &theirs).unwrap();
821
822        // Agent's replacement text should be contiguous (no interleaving)
823        assert!(
824            merged.contains("- **AGENTS.md** — architecture, key decisions, commands, related projects"),
825            "Agent text garbled (mid-word split). Got:\n{}", merged
826        );
827
828        // User's addition should be preserved
829        assert!(
830            merged.contains("Please add tests."),
831            "User addition missing. Got:\n{}", merged
832        );
833
834        // No fragments of old content mixed into agent's new content
835        assert!(
836            !merged.contains("key deAdd") && !merged.contains("key de\n"),
837            "Old content interleaved into agent text. Got:\n{}", merged
838        );
839    }
840
841    /// Same as merge_replace_vs_append_no_interleaving but using baseline
842    /// as CRDT base instead of stale state. This is the fix verification.
843    #[test]
844    fn merge_replace_vs_append_with_baseline_base() {
845        let header = "---\nagent_doc_format: template\n---\n\n# Title\n\n<!-- agent:exchange -->\n";
846        let footer = "\n<!-- /agent:exchange -->\n";
847
848        let old_exchange = "\
849### Previous Response
850
851Old content here.
852
853Add a README.md to the project.
854Also add AGENTS.md with a symlink CLAUDE.md
855";
856        let baseline = format!("{header}{old_exchange}{footer}");
857
858        // Ours: agent replaces exchange
859        let agent_exchange = "\
860### Done
861
862- **README.md** — overview, usage, design notes
863- **AGENTS.md** — architecture, key decisions, commands, related projects
864- **CLAUDE.md** → symlink to AGENTS.md
865
866All committed and pushed.
867";
868        let ours = format!("{header}{agent_exchange}{footer}");
869
870        // Theirs: user appended new prompt
871        let user_addition = "\nPlease add tests.\n";
872        let theirs = format!("{header}{old_exchange}{user_addition}{footer}");
873
874        // Use baseline as CRDT base (the fix)
875        let baseline_state = CrdtDoc::from_text(&baseline).encode_state();
876        let merged = merge(Some(&baseline_state), &ours, &theirs).unwrap();
877
878        // Agent text should be contiguous
879        assert!(
880            merged.contains("key decisions, commands, related projects"),
881            "Agent text garbled. Got:\n{}", merged
882        );
883
884        // User addition preserved
885        assert!(
886            merged.contains("Please add tests."),
887            "User addition missing. Got:\n{}", merged
888        );
889    }
890
891    /// Regression test: Simulates the exact scenario from the bug report.
892    ///
893    /// The agent streams a response into the exchange component while
894    /// the user types in their editor. Both sides share a common prefix
895    /// that includes markdown formatting characters. The CRDT merge must
896    /// preserve formatting integrity for both sides.
897    #[test]
898    fn merge_streaming_concurrent_edit_preserves_formatting() {
899        // Exchange component content after user's initial prompt
900        let base = "commit and push all rappstack packages.\n\n";
901        let base_doc = CrdtDoc::from_text(base);
902        let base_state = base_doc.encode_state();
903
904        // Agent's response (content_ours = user prompt + agent response)
905        let ours = "\
906commit and push all rappstack packages.
907
908### Re: commit and push
909
910*Compacted. Content archived to `docs/`*
911
912Done — all packages pushed.
913";
914
915        // User's concurrent edit (added a note at the bottom)
916        let theirs = "\
917commit and push all rappstack packages.
918
919**Soft-bristle brush only**
920";
921
922        let merged = merge(Some(&base_state), ours, theirs).unwrap();
923
924        // Agent formatting must be intact
925        assert!(
926            merged.contains("*Compacted. Content archived to `docs/`*"),
927            "Agent formatting broken. Got:\n{}",
928            merged
929        );
930        // User formatting must be intact
931        assert!(
932            merged.contains("**Soft-bristle brush only**"),
933            "User formatting broken. Got:\n{}",
934            merged
935        );
936        // No character-level interleaving
937        assert!(
938            !merged.contains("*C*C") && !merged.contains("**Sot"),
939            "Character interleaving detected. Got:\n{}",
940            merged
941        );
942    }
943
944    /// Regression test: Agent replaces multi-line block while user inserts within it.
945    /// With from_chars, this produces ~20 scattered character-level ops that interleave
946    /// with user edits. With from_lines, ops are contiguous line-level blocks.
947    ///
948    /// Uses a template document structure to match the real workflow where the baseline
949    /// (common ancestor) contains the exchange component with original content.
950    #[test]
951    fn merge_replace_vs_insert_no_interleaving() {
952        let header = "---\nagent_doc_format: template\nagent_doc_write: crdt\n---\n\n# Document Title\n\nSome preamble text that both sides share.\nThis provides enough common prefix to avoid stale detection.\n\n<!-- agent:exchange -->\n";
953        let footer = "<!-- /agent:exchange -->\n";
954
955        let old_exchange = "Line one of old content\nLine two of old content\nLine three of old content\n";
956        let baseline = format!("{header}{old_exchange}{footer}");
957        let baseline_doc = CrdtDoc::from_text(&baseline);
958        let baseline_state = baseline_doc.encode_state();
959
960        // Agent replaces exchange with completely new content
961        let agent_exchange = "Completely new line one\nCompletely new line two\nCompletely new line three\nCompletely new line four\n";
962        let ours = format!("{header}{agent_exchange}{footer}");
963
964        // User inserts a line in the middle of the original exchange
965        let theirs = format!("{header}Line one of old content\nUser inserted this line\nLine two of old content\nLine three of old content\n{footer}");
966
967        let merged = merge(Some(&baseline_state), &ours, &theirs).unwrap();
968
969        // Agent text should be contiguous — no mid-word splits
970        assert!(
971            merged.contains("Completely new line one"),
972            "Agent line 1 missing or garbled. Got:\n{}", merged
973        );
974        assert!(
975            merged.contains("Completely new line two"),
976            "Agent line 2 missing or garbled. Got:\n{}", merged
977        );
978
979        // User text should be preserved
980        assert!(
981            merged.contains("User inserted this line"),
982            "User insertion missing. Got:\n{}", merged
983        );
984
985        // No character interleaving (e.g., "Complete" + user text + "ly")
986        assert!(
987            !merged.contains("CompleteUser") && !merged.contains("Complete\nUser"),
988            "Character interleaving detected. Got:\n{}", merged
989        );
990    }
991
992    /// Test: agent content appears before human content when both append
993    /// to the same position.
994    #[test]
995    fn reorder_agent_before_human_at_append_boundary() {
996        let base = "# Document\n\nBase content.\n";
997        let base_doc = CrdtDoc::from_text(base);
998        let base_state = base_doc.encode_state();
999
1000        // Agent appends response
1001        let ours = format!("{base}### Agent Response\n\nAgent wrote this.\n");
1002        // Human appends their own text
1003        let theirs = format!("{base}User added this line.\n");
1004
1005        let merged = merge(Some(&base_state), &ours, &theirs).unwrap();
1006
1007        // Both should be present
1008        assert!(merged.contains("Agent wrote this."), "missing agent text");
1009        assert!(merged.contains("User added this line."), "missing user text");
1010        assert!(merged.contains("Base content."), "missing base text");
1011
1012        // Agent content should appear before human content
1013        let agent_pos = merged.find("Agent wrote this.").unwrap();
1014        let human_pos = merged.find("User added this line.").unwrap();
1015        assert!(
1016            agent_pos < human_pos,
1017            "Agent content should appear before human content.\nAgent pos: {}, Human pos: {}\nMerged:\n{}",
1018            agent_pos, human_pos, merged
1019        );
1020    }
1021
1022    // -----------------------------------------------------------------------
1023    // dedup_adjacent_blocks tests (#15)
1024    // -----------------------------------------------------------------------
1025
1026    #[test]
1027    fn dedup_removes_identical_adjacent_blocks() {
1028        let text = "### Re: Question\nAnswer here.\n\n### Re: Question\nAnswer here.\n\nDifferent block.";
1029        let result = dedup_adjacent_blocks(text);
1030        assert_eq!(result.matches("### Re: Question").count(), 1);
1031        assert!(result.contains("Different block."));
1032    }
1033
1034    #[test]
1035    fn dedup_preserves_different_adjacent_blocks() {
1036        let text = "### Re: First\nAnswer one.\n\n### Re: Second\nAnswer two.";
1037        let result = dedup_adjacent_blocks(text);
1038        assert!(result.contains("### Re: First"));
1039        assert!(result.contains("### Re: Second"));
1040    }
1041
1042    #[test]
1043    fn dedup_ignores_short_repeated_lines() {
1044        // Single-line blocks like "---" should not be deduped
1045        let text = "---\n\n---\n\nContent.";
1046        let result = dedup_adjacent_blocks(text);
1047        assert_eq!(result, text);
1048    }
1049
1050    #[test]
1051    fn dedup_handles_empty_text() {
1052        assert_eq!(dedup_adjacent_blocks(""), "");
1053    }
1054
1055    #[test]
1056    fn dedup_no_change_when_no_duplicates() {
1057        let text = "Block A\nLine 2.\n\nBlock B\nLine 2.";
1058        let result = dedup_adjacent_blocks(text);
1059        assert_eq!(result, text);
1060    }
1061}