Skip to main content

omni_dev/atlassian/
diff.rs

1//! Structural diff over [`AdfDocument`].
2//!
3//! Produces an in-memory IR (`Diff`) that the `diff_format` module renders
4//! into the YAML output for `confluence_compare`. The diff is structurally
5//! aware: it walks the ADF tree, splits documents into heading-delimited
6//! sections, and emits per-block change records rather than character-level
7//! deltas over a serialization. See the design notes in issue #706.
8//!
9//! Node identity uses a three-tier matcher:
10//!
11//! 1. **Natural-key**: `attrs.localId` for `table` / `tableRow` / `tableCell`,
12//!    `attrs.id` for `media` / `mention`, `attrs.url` for `inlineCard` /
13//!    `blockCard`, top-level `localId` for `expand` / `nestedExpand`.
14//! 2. **Content-hash**: stable hash of the canonicalized subtree, bucketed
15//!    by node type. Catches "moved without edit" cases.
16//! 3. **Positional**: index-based pairing of the residual.
17
18use std::collections::{HashMap, HashSet};
19use std::hash::{Hash, Hasher};
20
21use serde::Serialize;
22use similar::{ChangeTag, TextDiff};
23
24use crate::atlassian::adf::{AdfDocument, AdfNode};
25
26// ── Public IR ────────────────────────────────────────────────────────
27
28/// Diff between two ADF documents.
29#[derive(Debug, Clone, Serialize)]
30pub struct Diff {
31    /// Sections present in either document, in `to` order with `Removed`
32    /// sections appended at the end.
33    pub sections: Vec<SectionDiff>,
34    /// Aggregate change statistics.
35    pub stats: DiffStats,
36}
37
38/// Aggregate counts across the diff.
39#[derive(Debug, Clone, Default, Serialize, PartialEq, Eq)]
40pub struct DiffStats {
41    /// Sections that exist only in `to`.
42    pub sections_added: u32,
43    /// Sections that exist only in `from`.
44    pub sections_removed: u32,
45    /// Sections present on both sides with at least one delta.
46    pub sections_modified: u32,
47    /// Sections present on both sides at different positions.
48    pub sections_moved: u32,
49    /// Paragraph-shaped block edits (paragraph, blockquote leaves, etc.).
50    pub paragraphs_modified: u32,
51    /// Table edits (one or more cell changes).
52    pub tables_modified: u32,
53    /// Total characters added across all prose deltas.
54    pub chars_added: u32,
55    /// Total characters removed across all prose deltas.
56    pub chars_removed: u32,
57    /// Total words added across all prose deltas.
58    pub words_added: u32,
59    /// Total words removed across all prose deltas.
60    pub words_removed: u32,
61}
62
63/// A single section-level diff entry.
64#[derive(Debug, Clone, Serialize)]
65pub struct SectionDiff {
66    /// Heading text (empty for the document preamble preceding the first heading).
67    pub heading: String,
68    /// Heading-anchor path, e.g. `/h2#background`. Empty path for the preamble.
69    pub path: String,
70    /// Coarse change classification.
71    pub change: ChangeKind,
72    /// Per-block deltas inside the section.
73    #[serde(skip_serializing_if = "Vec::is_empty")]
74    pub deltas: Vec<NodeDelta>,
75}
76
77/// Coarse change classification.
78#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)]
79#[serde(rename_all = "snake_case")]
80pub enum ChangeKind {
81    /// Section exists only in `to`.
82    Added,
83    /// Section exists only in `from`.
84    Removed,
85    /// Section exists on both sides with content edits.
86    Modified,
87    /// Section exists on both sides at the same position with no content edits.
88    Unchanged,
89    /// Section exists on both sides but at different positions.
90    Moved,
91}
92
93/// A per-block delta inside a section.
94#[derive(Debug, Clone, Serialize)]
95#[serde(tag = "kind", rename_all = "snake_case")]
96pub enum NodeDelta {
97    /// A whole block was added.
98    Added(NodeSnapshot),
99    /// A whole block was removed.
100    Removed(NodeSnapshot),
101    /// A paragraph or other prose-leaf block was modified.
102    Paragraph(ParagraphDelta),
103    /// A code block was modified (line-level).
104    CodeBlock(CodeBlockDelta),
105    /// A table was modified (cell-level).
106    Table(TableDelta),
107    /// A list was modified (item-level).
108    List(ListDelta),
109    /// A block changed but no specialized renderer is wired up.
110    Opaque(OpaqueDelta),
111}
112
113/// Snapshot of a block as plain text, used for added/removed entries.
114#[derive(Debug, Clone, Serialize)]
115pub struct NodeSnapshot {
116    /// ADF node type (`paragraph`, `codeBlock`, ...).
117    pub node_type: String,
118    /// Plain-text rendering of the node.
119    pub text: String,
120}
121
122/// Paragraph-level prose change.
123#[derive(Debug, Clone, Serialize)]
124pub struct ParagraphDelta {
125    /// Plain-text content before.
126    pub from_text: String,
127    /// Plain-text content after.
128    pub to_text: String,
129    /// Words added in this delta (for stats roll-up).
130    pub words_added: u32,
131    /// Words removed in this delta.
132    pub words_removed: u32,
133}
134
135/// Code-block change.
136#[derive(Debug, Clone, Serialize)]
137pub struct CodeBlockDelta {
138    /// Code language attribute, if any.
139    #[serde(skip_serializing_if = "Option::is_none")]
140    pub language: Option<String>,
141    /// Code text before.
142    pub from_text: String,
143    /// Code text after.
144    pub to_text: String,
145}
146
147/// Table change (one or more cells modified).
148#[derive(Debug, Clone, Serialize)]
149pub struct TableDelta {
150    /// Modified cells.
151    pub cells: Vec<CellDelta>,
152}
153
154/// A single modified cell.
155#[derive(Debug, Clone, Serialize)]
156pub struct CellDelta {
157    /// 0-based row.
158    pub row: usize,
159    /// 0-based column.
160    pub col: usize,
161    /// Cell text before.
162    pub from_text: String,
163    /// Cell text after.
164    pub to_text: String,
165}
166
167/// List change.
168#[derive(Debug, Clone, Default, Serialize)]
169pub struct ListDelta {
170    /// New list items, as plain text.
171    #[serde(skip_serializing_if = "Vec::is_empty")]
172    pub items_added: Vec<String>,
173    /// Removed list items, as plain text.
174    #[serde(skip_serializing_if = "Vec::is_empty")]
175    pub items_removed: Vec<String>,
176    /// Items modified in place: `(from, to)` pairs.
177    #[serde(skip_serializing_if = "Vec::is_empty")]
178    pub items_modified: Vec<(String, String)>,
179}
180
181/// Fallback delta for block types without a specialized renderer.
182#[derive(Debug, Clone, Serialize)]
183pub struct OpaqueDelta {
184    /// ADF node type that changed.
185    pub node_type: String,
186    /// Plain-text snapshot of the `from` side.
187    pub from_summary: String,
188    /// Plain-text snapshot of the `to` side.
189    pub to_summary: String,
190}
191
192/// Diff configuration.
193#[derive(Debug, Clone, Default)]
194pub struct DiffOptions {
195    /// When true, runs of whitespace inside text nodes are collapsed to a
196    /// single space before diffing. Eliminates whitespace-only edits.
197    pub ignore_whitespace: bool,
198}
199
200// ── Entry point ──────────────────────────────────────────────────────
201
202/// Computes a structural diff between two ADF documents.
203#[must_use]
204pub fn diff_documents(from: &AdfDocument, to: &AdfDocument, opts: &DiffOptions) -> Diff {
205    let from_sections = split_into_sections(&from.content, opts);
206    let to_sections = split_into_sections(&to.content, opts);
207
208    // Build path → original_index lookup tables for O(1) match.
209    let mut from_by_path: HashMap<String, usize> = HashMap::with_capacity(from_sections.len());
210    for (idx, s) in from_sections.iter().enumerate() {
211        from_by_path.insert(s.path.clone(), idx);
212    }
213
214    let mut sections: Vec<SectionDiff> = Vec::new();
215    let mut stats = DiffStats::default();
216    let mut matched_from: HashSet<usize> = HashSet::new();
217
218    for (to_idx, to_section) in to_sections.iter().enumerate() {
219        if let Some(&from_idx) = from_by_path.get(&to_section.path) {
220            matched_from.insert(from_idx);
221            let from_section = &from_sections[from_idx];
222            let deltas = diff_blocks(&from_section.blocks, &to_section.blocks, opts);
223            for delta in &deltas {
224                accumulate_delta(&mut stats, delta);
225            }
226            let change = if !deltas.is_empty() {
227                stats.sections_modified += 1;
228                ChangeKind::Modified
229            } else if from_idx != to_idx {
230                stats.sections_moved += 1;
231                ChangeKind::Moved
232            } else {
233                ChangeKind::Unchanged
234            };
235            sections.push(SectionDiff {
236                heading: to_section.heading.clone(),
237                path: to_section.path.clone(),
238                change,
239                deltas,
240            });
241        } else {
242            stats.sections_added += 1;
243            sections.push(SectionDiff {
244                heading: to_section.heading.clone(),
245                path: to_section.path.clone(),
246                change: ChangeKind::Added,
247                deltas: snapshot_blocks(&to_section.blocks),
248            });
249        }
250    }
251
252    // Append removed sections in their original order.
253    for (from_idx, from_section) in from_sections.iter().enumerate() {
254        if !matched_from.contains(&from_idx) {
255            stats.sections_removed += 1;
256            sections.push(SectionDiff {
257                heading: from_section.heading.clone(),
258                path: from_section.path.clone(),
259                change: ChangeKind::Removed,
260                deltas: snapshot_blocks(&from_section.blocks),
261            });
262        }
263    }
264
265    Diff { sections, stats }
266}
267
268// ── Section split ────────────────────────────────────────────────────
269
270#[derive(Debug, Clone)]
271struct RawSection {
272    /// Original heading node (None for the document preamble).
273    heading_node: Option<AdfNode>,
274    /// Plain heading text (empty for the preamble).
275    heading: String,
276    /// Stable section path (e.g. `/h2#background`), or empty for preamble.
277    path: String,
278    /// Body blocks of the section (excluding the heading itself).
279    blocks: Vec<AdfNode>,
280}
281
282fn split_into_sections(content: &[AdfNode], opts: &DiffOptions) -> Vec<RawSection> {
283    let mut sections: Vec<RawSection> = Vec::new();
284    let mut occurrences: HashMap<(u8, String), u32> = HashMap::new();
285    let mut current_blocks: Vec<AdfNode> = Vec::new();
286    let mut current_heading: Option<AdfNode> = None;
287    let mut current_level: u8 = 0;
288
289    for node in content {
290        if node.node_type == "heading" {
291            // Close the previous section.
292            sections.push(build_section(
293                current_heading.take(),
294                current_level,
295                std::mem::take(&mut current_blocks),
296                &mut occurrences,
297                opts,
298            ));
299            current_level = heading_level(node).unwrap_or(0);
300            current_heading = Some(node.clone());
301        } else {
302            current_blocks.push(node.clone());
303        }
304    }
305    sections.push(build_section(
306        current_heading,
307        current_level,
308        current_blocks,
309        &mut occurrences,
310        opts,
311    ));
312
313    // Drop a leading empty preamble (no heading and no blocks): common case.
314    if let Some(first) = sections.first() {
315        if first.heading_node.is_none() && first.blocks.is_empty() {
316            sections.remove(0);
317        }
318    }
319    sections
320}
321
322fn build_section(
323    heading_node: Option<AdfNode>,
324    level: u8,
325    blocks: Vec<AdfNode>,
326    occurrences: &mut HashMap<(u8, String), u32>,
327    opts: &DiffOptions,
328) -> RawSection {
329    let heading_text = heading_node
330        .as_ref()
331        .map(|n| extract_text(n, opts))
332        .unwrap_or_default();
333    let heading_text = heading_text.trim().to_string();
334    let path = if heading_node.is_some() {
335        let slug = slugify(&heading_text);
336        let key = (level, slug.clone());
337        let count = occurrences.entry(key).or_insert(0);
338        *count += 1;
339        section_path(level, &slug, *count)
340    } else {
341        String::new()
342    };
343    RawSection {
344        heading_node,
345        heading: heading_text,
346        path,
347        blocks,
348    }
349}
350
351fn heading_level(node: &AdfNode) -> Option<u8> {
352    let attrs = node.attrs.as_ref()?;
353    attrs
354        .get("level")
355        .and_then(serde_json::Value::as_u64)
356        .and_then(|n| u8::try_from(n).ok())
357}
358
359fn section_path(level: u8, slug: &str, occurrence: u32) -> String {
360    if occurrence <= 1 {
361        format!("/h{level}#{slug}")
362    } else {
363        format!("/h{level}#{slug}-{occurrence}")
364    }
365}
366
367fn slugify(text: &str) -> String {
368    let mut out = String::new();
369    let mut prev_dash = true;
370    for c in text.chars() {
371        if c.is_alphanumeric() {
372            for lc in c.to_lowercase() {
373                out.push(lc);
374            }
375            prev_dash = false;
376        } else if !prev_dash {
377            out.push('-');
378            prev_dash = true;
379        }
380    }
381    while out.ends_with('-') {
382        out.pop();
383    }
384    if out.is_empty() {
385        out.push_str("section");
386    }
387    out
388}
389
390// ── Text extraction & whitespace normalization ──────────────────────
391
392fn extract_text(node: &AdfNode, opts: &DiffOptions) -> String {
393    let mut out = String::new();
394    collect_text(node, &mut out);
395    if opts.ignore_whitespace {
396        normalize_whitespace(&out)
397    } else {
398        out
399    }
400}
401
402fn collect_text(node: &AdfNode, out: &mut String) {
403    if let Some(t) = &node.text {
404        out.push_str(t);
405    }
406    if let Some(children) = &node.content {
407        for child in children {
408            collect_text(child, out);
409        }
410    }
411}
412
413fn normalize_whitespace(s: &str) -> String {
414    let mut out = String::with_capacity(s.len());
415    let mut prev_ws = false;
416    for c in s.chars() {
417        if c.is_whitespace() {
418            if !prev_ws {
419                out.push(' ');
420                prev_ws = true;
421            }
422        } else {
423            out.push(c);
424            prev_ws = false;
425        }
426    }
427    out.trim().to_string()
428}
429
430// ── Block-level diff with three-tier matching ───────────────────────
431
432fn diff_blocks(from: &[AdfNode], to: &[AdfNode], opts: &DiffOptions) -> Vec<NodeDelta> {
433    let pairs = match_nodes(from, to);
434    let mut deltas: Vec<NodeDelta> = Vec::new();
435
436    for pair in pairs {
437        match pair {
438            MatchPair::Both(fi, ti) => {
439                if let Some(delta) = diff_node(&from[fi], &to[ti], opts) {
440                    deltas.push(delta);
441                }
442            }
443            MatchPair::OnlyFrom(fi) => {
444                deltas.push(NodeDelta::Removed(snapshot_node(&from[fi], opts)));
445            }
446            MatchPair::OnlyTo(ti) => {
447                deltas.push(NodeDelta::Added(snapshot_node(&to[ti], opts)));
448            }
449        }
450    }
451    deltas
452}
453
454#[derive(Debug, Clone, Copy)]
455enum MatchPair {
456    Both(usize, usize),
457    OnlyFrom(usize),
458    OnlyTo(usize),
459}
460
461/// Three-tier matcher: natural keys first, then content hash, then position.
462fn match_nodes(from: &[AdfNode], to: &[AdfNode]) -> Vec<MatchPair> {
463    let mut from_used = vec![false; from.len()];
464    let mut to_used = vec![false; to.len()];
465    let mut pairs: Vec<MatchPair> = Vec::new();
466
467    // Tier 1: natural-key match. Pair only when the same key occurs in both
468    // and node types agree.
469    let mut from_keys: HashMap<(String, String), Vec<usize>> = HashMap::new();
470    for (i, n) in from.iter().enumerate() {
471        if let Some(k) = natural_key(n) {
472            from_keys
473                .entry((n.node_type.clone(), k))
474                .or_default()
475                .push(i);
476        }
477    }
478    for (i, n) in to.iter().enumerate() {
479        if let Some(k) = natural_key(n) {
480            if let Some(slots) = from_keys.get_mut(&(n.node_type.clone(), k)) {
481                if let Some(fi) = slots.pop() {
482                    pairs.push(MatchPair::Both(fi, i));
483                    from_used[fi] = true;
484                    to_used[i] = true;
485                }
486            }
487        }
488    }
489
490    // Tier 2: content-hash match on the residual, bucketed by node type.
491    let mut from_hashes: HashMap<(String, u64), Vec<usize>> = HashMap::new();
492    for (i, n) in from.iter().enumerate() {
493        if from_used[i] {
494            continue;
495        }
496        from_hashes
497            .entry((n.node_type.clone(), content_hash(n)))
498            .or_default()
499            .push(i);
500    }
501    for (i, n) in to.iter().enumerate() {
502        if to_used[i] {
503            continue;
504        }
505        let h = content_hash(n);
506        if let Some(slots) = from_hashes.get_mut(&(n.node_type.clone(), h)) {
507            if let Some(fi) = slots.pop() {
508                pairs.push(MatchPair::Both(fi, i));
509                from_used[fi] = true;
510                to_used[i] = true;
511            }
512        }
513    }
514
515    // Tier 3: positional pairing of the remainder (only when node types match).
516    let from_residual: Vec<usize> = (0..from.len()).filter(|&i| !from_used[i]).collect();
517    let to_residual: Vec<usize> = (0..to.len()).filter(|&i| !to_used[i]).collect();
518    let mut fi = 0;
519    let mut ti = 0;
520    while fi < from_residual.len() && ti < to_residual.len() {
521        let f = from_residual[fi];
522        let t = to_residual[ti];
523        if from[f].node_type == to[t].node_type {
524            pairs.push(MatchPair::Both(f, t));
525            from_used[f] = true;
526            to_used[t] = true;
527            fi += 1;
528            ti += 1;
529        } else {
530            // Type mismatch — emit removal of the earlier residual side.
531            // Heuristic: drop the smaller index so we keep aligning forward.
532            if from_residual[fi] <= to_residual[ti] {
533                pairs.push(MatchPair::OnlyFrom(f));
534                from_used[f] = true;
535                fi += 1;
536            } else {
537                pairs.push(MatchPair::OnlyTo(t));
538                to_used[t] = true;
539                ti += 1;
540            }
541        }
542    }
543    while fi < from_residual.len() {
544        pairs.push(MatchPair::OnlyFrom(from_residual[fi]));
545        fi += 1;
546    }
547    while ti < to_residual.len() {
548        pairs.push(MatchPair::OnlyTo(to_residual[ti]));
549        ti += 1;
550    }
551
552    // Sort `Both` pairs by `to` index for stable output ordering.
553    pairs.sort_by_key(|p| match p {
554        MatchPair::Both(_, t) | MatchPair::OnlyTo(t) => (*t, 0),
555        MatchPair::OnlyFrom(f) => (usize::MAX, *f),
556    });
557    pairs
558}
559
560fn natural_key(node: &AdfNode) -> Option<String> {
561    if let Some(id) = &node.local_id {
562        return Some(id.clone());
563    }
564    let attrs = node.attrs.as_ref()?;
565    let key_attr: Option<&str> = match node.node_type.as_str() {
566        "table" | "tableRow" | "tableCell" | "tableHeader" | "expand" | "nestedExpand" => {
567            Some("localId")
568        }
569        "media" | "mention" => Some("id"),
570        "inlineCard" | "blockCard" => Some("url"),
571        _ => None,
572    };
573    let key_attr = key_attr?;
574    attrs
575        .get(key_attr)
576        .and_then(serde_json::Value::as_str)
577        .map(str::to_string)
578}
579
580fn content_hash(node: &AdfNode) -> u64 {
581    let mut hasher = std::collections::hash_map::DefaultHasher::new();
582    hash_node(node, &mut hasher);
583    hasher.finish()
584}
585
586fn hash_node(node: &AdfNode, hasher: &mut impl Hasher) {
587    node.node_type.hash(hasher);
588    if let Some(t) = &node.text {
589        t.hash(hasher);
590    }
591    if let Some(children) = &node.content {
592        for c in children {
593            hash_node(c, hasher);
594        }
595    }
596}
597
598// ── Per-node diff dispatch ──────────────────────────────────────────
599
600fn diff_node(from: &AdfNode, to: &AdfNode, opts: &DiffOptions) -> Option<NodeDelta> {
601    match from.node_type.as_str() {
602        "paragraph" | "blockquote" => diff_paragraph(from, to, opts),
603        "codeBlock" => diff_code_block(from, to, opts),
604        "table" => diff_table(from, to, opts),
605        "bulletList" | "orderedList" | "taskList" => diff_list(from, to, opts),
606        _ => diff_opaque(from, to, opts),
607    }
608}
609
610fn diff_paragraph(from: &AdfNode, to: &AdfNode, opts: &DiffOptions) -> Option<NodeDelta> {
611    let from_text = extract_text(from, opts);
612    let to_text = extract_text(to, opts);
613    if from_text == to_text {
614        return None;
615    }
616    let (words_added, words_removed) = word_counts(&from_text, &to_text);
617    Some(NodeDelta::Paragraph(ParagraphDelta {
618        from_text,
619        to_text,
620        words_added,
621        words_removed,
622    }))
623}
624
625fn diff_code_block(from: &AdfNode, to: &AdfNode, opts: &DiffOptions) -> Option<NodeDelta> {
626    let from_text = extract_text(from, opts);
627    let to_text = extract_text(to, opts);
628    if from_text == to_text {
629        return None;
630    }
631    let language = code_language(from).or_else(|| code_language(to));
632    Some(NodeDelta::CodeBlock(CodeBlockDelta {
633        language,
634        from_text,
635        to_text,
636    }))
637}
638
639fn code_language(node: &AdfNode) -> Option<String> {
640    node.attrs
641        .as_ref()?
642        .get("language")
643        .and_then(serde_json::Value::as_str)
644        .map(str::to_string)
645}
646
647fn diff_table(from: &AdfNode, to: &AdfNode, opts: &DiffOptions) -> Option<NodeDelta> {
648    let from_rows = table_rows(from);
649    let to_rows = table_rows(to);
650    let row_count = from_rows.len().max(to_rows.len());
651    let mut cells: Vec<CellDelta> = Vec::new();
652    for r in 0..row_count {
653        let from_cells = from_rows.get(r).map_or(&[][..], Vec::as_slice);
654        let to_cells = to_rows.get(r).map_or(&[][..], Vec::as_slice);
655        let col_count = from_cells.len().max(to_cells.len());
656        for c in 0..col_count {
657            let from_text = from_cells
658                .get(c)
659                .map(|n| extract_text(n, opts))
660                .unwrap_or_default();
661            let to_text = to_cells
662                .get(c)
663                .map(|n| extract_text(n, opts))
664                .unwrap_or_default();
665            if from_text != to_text {
666                cells.push(CellDelta {
667                    row: r,
668                    col: c,
669                    from_text,
670                    to_text,
671                });
672            }
673        }
674    }
675    if cells.is_empty() {
676        None
677    } else {
678        Some(NodeDelta::Table(TableDelta { cells }))
679    }
680}
681
682fn table_rows(node: &AdfNode) -> Vec<Vec<&AdfNode>> {
683    let mut rows: Vec<Vec<&AdfNode>> = Vec::new();
684    if let Some(children) = &node.content {
685        for row in children {
686            if row.node_type == "tableRow" {
687                let mut cells: Vec<&AdfNode> = Vec::new();
688                if let Some(row_children) = &row.content {
689                    for cell in row_children {
690                        if cell.node_type == "tableCell" || cell.node_type == "tableHeader" {
691                            cells.push(cell);
692                        }
693                    }
694                }
695                rows.push(cells);
696            }
697        }
698    }
699    rows
700}
701
702fn diff_list(from: &AdfNode, to: &AdfNode, opts: &DiffOptions) -> Option<NodeDelta> {
703    let mut from_remaining = list_items(from, opts);
704    let mut to_remaining = list_items(to, opts);
705    let mut delta = ListDelta::default();
706
707    // Pair identical items by content first.
708    from_remaining.retain(|f| {
709        if let Some(pos) = to_remaining.iter().position(|t| t == f) {
710            to_remaining.remove(pos);
711            false
712        } else {
713            true
714        }
715    });
716
717    // Pair the rest by position as "modified".
718    let pair_count = from_remaining.len().min(to_remaining.len());
719    for i in 0..pair_count {
720        delta
721            .items_modified
722            .push((from_remaining[i].clone(), to_remaining[i].clone()));
723    }
724    delta
725        .items_removed
726        .extend(from_remaining.iter().skip(pair_count).cloned());
727    delta
728        .items_added
729        .extend(to_remaining.iter().skip(pair_count).cloned());
730
731    if delta.items_added.is_empty()
732        && delta.items_removed.is_empty()
733        && delta.items_modified.is_empty()
734    {
735        None
736    } else {
737        Some(NodeDelta::List(delta))
738    }
739}
740
741fn list_items(node: &AdfNode, opts: &DiffOptions) -> Vec<String> {
742    node.content
743        .as_ref()
744        .map(|children| {
745            children
746                .iter()
747                .map(|item| extract_text(item, opts))
748                .collect()
749        })
750        .unwrap_or_default()
751}
752
753fn diff_opaque(from: &AdfNode, to: &AdfNode, opts: &DiffOptions) -> Option<NodeDelta> {
754    let from_text = extract_text(from, opts);
755    let to_text = extract_text(to, opts);
756    if from_text == to_text && content_hash(from) == content_hash(to) {
757        return None;
758    }
759    Some(NodeDelta::Opaque(OpaqueDelta {
760        node_type: from.node_type.clone(),
761        from_summary: from_text,
762        to_summary: to_text,
763    }))
764}
765
766// ── Snapshot helpers (for added/removed blocks) ─────────────────────
767
768fn snapshot_node(node: &AdfNode, opts: &DiffOptions) -> NodeSnapshot {
769    NodeSnapshot {
770        node_type: node.node_type.clone(),
771        text: extract_text(node, opts),
772    }
773}
774
775fn snapshot_blocks(blocks: &[AdfNode]) -> Vec<NodeDelta> {
776    // Snapshots use no normalization — we just want plain text.
777    let opts = DiffOptions::default();
778    blocks
779        .iter()
780        .map(|n| NodeDelta::Added(snapshot_node(n, &opts)))
781        .collect()
782}
783
784// ── Word-counting helper using `similar` ────────────────────────────
785
786fn word_counts(from: &str, to: &str) -> (u32, u32) {
787    let diff = TextDiff::from_words(from, to);
788    let mut added: u32 = 0;
789    let mut removed: u32 = 0;
790    for change in diff.iter_all_changes() {
791        let val = change.value();
792        if val.trim().is_empty() {
793            continue;
794        }
795        match change.tag() {
796            ChangeTag::Insert => added += 1,
797            ChangeTag::Delete => removed += 1,
798            ChangeTag::Equal => {}
799        }
800    }
801    (added, removed)
802}
803
804// ── Stats accumulation ───────────────────────────────────────────────
805
806fn accumulate_delta(stats: &mut DiffStats, delta: &NodeDelta) {
807    match delta {
808        NodeDelta::Paragraph(p) => {
809            stats.paragraphs_modified += 1;
810            stats.words_added += p.words_added;
811            stats.words_removed += p.words_removed;
812            let (ca, cr) = char_counts(&p.from_text, &p.to_text);
813            stats.chars_added += ca;
814            stats.chars_removed += cr;
815        }
816        NodeDelta::CodeBlock(c) => {
817            let (ca, cr) = char_counts(&c.from_text, &c.to_text);
818            stats.chars_added += ca;
819            stats.chars_removed += cr;
820        }
821        NodeDelta::Table(_) => {
822            stats.tables_modified += 1;
823        }
824        NodeDelta::Added(s) => {
825            stats.chars_added += s.text.chars().count() as u32;
826            stats.words_added += s.text.split_whitespace().count() as u32;
827        }
828        NodeDelta::Removed(s) => {
829            stats.chars_removed += s.text.chars().count() as u32;
830            stats.words_removed += s.text.split_whitespace().count() as u32;
831        }
832        NodeDelta::List(_) | NodeDelta::Opaque(_) => {}
833    }
834}
835
836fn char_counts(from: &str, to: &str) -> (u32, u32) {
837    let diff = TextDiff::from_chars(from, to);
838    let mut added: u32 = 0;
839    let mut removed: u32 = 0;
840    for change in diff.iter_all_changes() {
841        match change.tag() {
842            ChangeTag::Insert => added += 1,
843            ChangeTag::Delete => removed += 1,
844            ChangeTag::Equal => {}
845        }
846    }
847    (added, removed)
848}
849
850// ── Tests ────────────────────────────────────────────────────────────
851
852#[cfg(test)]
853#[allow(clippy::unwrap_used, clippy::expect_used)]
854mod tests {
855    use super::*;
856    use serde_json::json;
857
858    fn doc(content: Vec<AdfNode>) -> AdfDocument {
859        AdfDocument {
860            version: 1,
861            doc_type: "doc".to_string(),
862            content,
863        }
864    }
865
866    fn p(text: &str) -> AdfNode {
867        AdfNode::paragraph(vec![AdfNode::text(text)])
868    }
869
870    fn h(level: u8, text: &str) -> AdfNode {
871        AdfNode::heading(level, vec![AdfNode::text(text)])
872    }
873
874    #[test]
875    fn slugify_basic() {
876        assert_eq!(slugify("Background"), "background");
877        assert_eq!(slugify("Hello World"), "hello-world");
878        assert_eq!(slugify("Foo, Bar & Baz!"), "foo-bar-baz");
879        assert_eq!(slugify("   spaced   "), "spaced");
880        assert_eq!(slugify("!!!"), "section");
881    }
882
883    #[test]
884    fn section_path_includes_occurrence_for_collisions() {
885        assert_eq!(section_path(2, "background", 1), "/h2#background");
886        assert_eq!(section_path(2, "background", 2), "/h2#background-2");
887    }
888
889    #[test]
890    fn split_into_sections_groups_by_heading() {
891        let document = doc(vec![
892            p("preamble"),
893            h(2, "Background"),
894            p("paragraph A"),
895            h(2, "Architecture"),
896            p("paragraph B"),
897        ]);
898        let sections = split_into_sections(&document.content, &DiffOptions::default());
899        assert_eq!(sections.len(), 3);
900        assert!(sections[0].path.is_empty());
901        assert_eq!(sections[1].path, "/h2#background");
902        assert_eq!(sections[2].path, "/h2#architecture");
903    }
904
905    #[test]
906    fn duplicate_heading_gets_occurrence_suffix() {
907        let document = doc(vec![h(2, "Notes"), p("a"), h(2, "Notes"), p("b")]);
908        let sections = split_into_sections(&document.content, &DiffOptions::default());
909        assert_eq!(sections.len(), 2);
910        assert_eq!(sections[0].path, "/h2#notes");
911        assert_eq!(sections[1].path, "/h2#notes-2");
912    }
913
914    #[test]
915    fn diff_paragraph_text_change_classifies_section_modified() {
916        let from = doc(vec![h(2, "Background"), p("We use database version 12.")]);
917        let to = doc(vec![h(2, "Background"), p("We use database version 14.")]);
918        let d = diff_documents(&from, &to, &DiffOptions::default());
919        assert_eq!(d.sections.len(), 1);
920        assert_eq!(d.sections[0].change, ChangeKind::Modified);
921        assert_eq!(d.stats.sections_modified, 1);
922        assert_eq!(d.stats.paragraphs_modified, 1);
923        assert!(d.stats.words_added > 0 || d.stats.words_removed > 0);
924        match &d.sections[0].deltas[0] {
925            NodeDelta::Paragraph(p) => {
926                assert!(p.from_text.contains("12"));
927                assert!(p.to_text.contains("14"));
928            }
929            other => panic!("expected paragraph delta, got {other:?}"),
930        }
931    }
932
933    #[test]
934    fn added_section_classified() {
935        let from = doc(vec![h(2, "A"), p("a")]);
936        let to = doc(vec![h(2, "A"), p("a"), h(2, "B"), p("b")]);
937        let d = diff_documents(&from, &to, &DiffOptions::default());
938        assert_eq!(d.stats.sections_added, 1);
939        assert_eq!(d.stats.sections_removed, 0);
940        let added = d
941            .sections
942            .iter()
943            .find(|s| s.path == "/h2#b")
944            .expect("section B should appear");
945        assert_eq!(added.change, ChangeKind::Added);
946    }
947
948    #[test]
949    fn removed_section_classified() {
950        let from = doc(vec![h(2, "A"), p("a"), h(2, "B"), p("b")]);
951        let to = doc(vec![h(2, "A"), p("a")]);
952        let d = diff_documents(&from, &to, &DiffOptions::default());
953        assert_eq!(d.stats.sections_removed, 1);
954        let removed = d
955            .sections
956            .iter()
957            .find(|s| s.path == "/h2#b")
958            .expect("section B should appear");
959        assert_eq!(removed.change, ChangeKind::Removed);
960    }
961
962    #[test]
963    fn moved_section_classified() {
964        let from = doc(vec![h(2, "A"), p("a"), h(2, "B"), p("b")]);
965        let to = doc(vec![h(2, "B"), p("b"), h(2, "A"), p("a")]);
966        let d = diff_documents(&from, &to, &DiffOptions::default());
967        assert_eq!(d.stats.sections_moved, 2);
968        for s in &d.sections {
969            assert_eq!(s.change, ChangeKind::Moved);
970        }
971    }
972
973    #[test]
974    fn unchanged_when_documents_identical() {
975        let from = doc(vec![h(2, "A"), p("a"), h(2, "B"), p("b")]);
976        let to = from.clone();
977        let d = diff_documents(&from, &to, &DiffOptions::default());
978        for s in &d.sections {
979            assert_eq!(s.change, ChangeKind::Unchanged);
980        }
981        assert_eq!(d.stats.sections_modified, 0);
982        assert_eq!(d.stats.sections_added, 0);
983        assert_eq!(d.stats.sections_removed, 0);
984    }
985
986    #[test]
987    fn whitespace_normalization_suppresses_trivial_diff() {
988        let from = doc(vec![h(2, "A"), p("hello world")]);
989        let to = doc(vec![h(2, "A"), p("hello   world")]);
990        let opts = DiffOptions {
991            ignore_whitespace: true,
992        };
993        let d = diff_documents(&from, &to, &opts);
994        assert_eq!(d.sections[0].change, ChangeKind::Unchanged);
995    }
996
997    #[test]
998    fn whitespace_normalization_off_keeps_diff() {
999        let from = doc(vec![h(2, "A"), p("hello world")]);
1000        let to = doc(vec![h(2, "A"), p("hello   world")]);
1001        let d = diff_documents(&from, &to, &DiffOptions::default());
1002        assert_eq!(d.sections[0].change, ChangeKind::Modified);
1003    }
1004
1005    #[test]
1006    fn code_block_diff_emits_delta() {
1007        let from = doc(vec![
1008            h(2, "Code"),
1009            AdfNode::code_block(Some("rust"), "fn a() {}"),
1010        ]);
1011        let to = doc(vec![
1012            h(2, "Code"),
1013            AdfNode::code_block(Some("rust"), "fn a() { 1 }"),
1014        ]);
1015        let d = diff_documents(&from, &to, &DiffOptions::default());
1016        match &d.sections[0].deltas[0] {
1017            NodeDelta::CodeBlock(c) => {
1018                assert_eq!(c.language.as_deref(), Some("rust"));
1019                assert!(c.to_text.contains('1'));
1020            }
1021            other => panic!("expected code block delta, got {other:?}"),
1022        }
1023    }
1024
1025    #[test]
1026    fn table_cell_edit_emits_cell_delta() {
1027        let from_table = AdfNode::table(vec![AdfNode::table_row(vec![
1028            AdfNode::table_cell(vec![p("alpha")]),
1029            AdfNode::table_cell(vec![p("beta")]),
1030        ])]);
1031        let to_table = AdfNode::table(vec![AdfNode::table_row(vec![
1032            AdfNode::table_cell(vec![p("alpha")]),
1033            AdfNode::table_cell(vec![p("BETA")]),
1034        ])]);
1035        let from = doc(vec![h(2, "T"), from_table]);
1036        let to = doc(vec![h(2, "T"), to_table]);
1037        let d = diff_documents(&from, &to, &DiffOptions::default());
1038        assert_eq!(d.stats.tables_modified, 1);
1039        match &d.sections[0].deltas[0] {
1040            NodeDelta::Table(t) => {
1041                assert_eq!(t.cells.len(), 1);
1042                assert_eq!(t.cells[0].row, 0);
1043                assert_eq!(t.cells[0].col, 1);
1044                assert_eq!(t.cells[0].from_text, "beta");
1045                assert_eq!(t.cells[0].to_text, "BETA");
1046            }
1047            other => panic!("expected table delta, got {other:?}"),
1048        }
1049    }
1050
1051    #[test]
1052    fn list_item_add_remove_emits_list_delta() {
1053        let from = doc(vec![
1054            h(2, "L"),
1055            AdfNode::bullet_list(vec![
1056                AdfNode::list_item(vec![p("one")]),
1057                AdfNode::list_item(vec![p("two")]),
1058            ]),
1059        ]);
1060        let to = doc(vec![
1061            h(2, "L"),
1062            AdfNode::bullet_list(vec![
1063                AdfNode::list_item(vec![p("one")]),
1064                AdfNode::list_item(vec![p("three")]),
1065            ]),
1066        ]);
1067        let d = diff_documents(&from, &to, &DiffOptions::default());
1068        match &d.sections[0].deltas[0] {
1069            NodeDelta::List(l) => {
1070                // The matcher pairs the unchanged "one" first, then the
1071                // residual "two"/"three" become a modified pair.
1072                assert_eq!(l.items_modified.len(), 1);
1073                assert_eq!(l.items_modified[0].0, "two");
1074                assert_eq!(l.items_modified[0].1, "three");
1075            }
1076            other => panic!("expected list delta, got {other:?}"),
1077        }
1078    }
1079
1080    #[test]
1081    fn natural_key_localid_pairs_moved_table_row() {
1082        let make_row = |local_id: &str, text: &str| AdfNode {
1083            node_type: "tableRow".to_string(),
1084            attrs: Some(json!({"localId": local_id})),
1085            content: Some(vec![AdfNode::table_cell(vec![p(text)])]),
1086            text: None,
1087            marks: None,
1088            local_id: None,
1089            parameters: None,
1090        };
1091        let from = vec![make_row("r1", "alpha"), make_row("r2", "beta")];
1092        let to = vec![make_row("r2", "beta"), make_row("r1", "ALPHA")];
1093        let pairs = match_nodes(&from, &to);
1094        // Both rows pair via natural-key, even though one was edited.
1095        let both = pairs
1096            .iter()
1097            .filter(|p| matches!(p, MatchPair::Both(_, _)))
1098            .count();
1099        assert_eq!(both, 2);
1100    }
1101
1102    #[test]
1103    fn content_hash_pairs_moved_paragraph_without_localid() {
1104        let from = vec![p("alpha"), p("beta")];
1105        let to = vec![p("beta"), p("alpha")];
1106        let pairs = match_nodes(&from, &to);
1107        // Both paragraphs pair via content hash (tier 2).
1108        let both = pairs
1109            .iter()
1110            .filter(|p| matches!(p, MatchPair::Both(_, _)))
1111            .count();
1112        assert_eq!(both, 2);
1113    }
1114
1115    #[test]
1116    fn position_pairs_residual_when_types_match() {
1117        let from = vec![p("one"), p("two")];
1118        let to = vec![p("uno"), p("dos")];
1119        // Neither hashes match, so both pair positionally.
1120        let pairs = match_nodes(&from, &to);
1121        let both = pairs
1122            .iter()
1123            .filter(|p| matches!(p, MatchPair::Both(_, _)))
1124            .count();
1125        assert_eq!(both, 2);
1126    }
1127
1128    #[test]
1129    fn opaque_delta_fallback() {
1130        let from_panel = AdfNode {
1131            node_type: "panel".to_string(),
1132            attrs: Some(json!({"panelType": "info"})),
1133            content: Some(vec![p("note A")]),
1134            text: None,
1135            marks: None,
1136            local_id: None,
1137            parameters: None,
1138        };
1139        let to_panel = AdfNode {
1140            node_type: "panel".to_string(),
1141            attrs: Some(json!({"panelType": "info"})),
1142            content: Some(vec![p("note B")]),
1143            text: None,
1144            marks: None,
1145            local_id: None,
1146            parameters: None,
1147        };
1148        let from = doc(vec![h(2, "P"), from_panel]);
1149        let to = doc(vec![h(2, "P"), to_panel]);
1150        let d = diff_documents(&from, &to, &DiffOptions::default());
1151        match &d.sections[0].deltas[0] {
1152            NodeDelta::Opaque(o) => assert_eq!(o.node_type, "panel"),
1153            other => panic!("expected opaque delta, got {other:?}"),
1154        }
1155    }
1156
1157    #[test]
1158    fn preamble_diff_works_without_heading() {
1159        let from = doc(vec![p("intro old")]);
1160        let to = doc(vec![p("intro new")]);
1161        let d = diff_documents(&from, &to, &DiffOptions::default());
1162        assert_eq!(d.sections.len(), 1);
1163        assert_eq!(d.sections[0].path, "");
1164        assert_eq!(d.sections[0].change, ChangeKind::Modified);
1165    }
1166
1167    #[test]
1168    fn empty_documents_produce_empty_diff() {
1169        let from = doc(vec![]);
1170        let to = doc(vec![]);
1171        let d = diff_documents(&from, &to, &DiffOptions::default());
1172        assert_eq!(d.sections.len(), 0);
1173        assert_eq!(d.stats, DiffStats::default());
1174    }
1175
1176    #[test]
1177    fn heading_with_no_text_uses_section_slug() {
1178        let from = doc(vec![AdfNode::heading(2, vec![]), p("a")]);
1179        let to = doc(vec![AdfNode::heading(2, vec![]), p("b")]);
1180        let d = diff_documents(&from, &to, &DiffOptions::default());
1181        assert_eq!(d.sections.len(), 1);
1182        assert_eq!(d.sections[0].path, "/h2#section");
1183    }
1184
1185    // ── Three-tier matcher edge cases ─────────────────────────────
1186
1187    #[test]
1188    fn match_nodes_residual_more_from_emits_only_from() {
1189        // Three from-only paragraphs; no to nodes — every block becomes OnlyFrom.
1190        let from = vec![p("a"), p("b"), p("c")];
1191        let to: Vec<AdfNode> = Vec::new();
1192        let pairs = match_nodes(&from, &to);
1193        assert_eq!(pairs.len(), 3);
1194        assert!(pairs.iter().all(|p| matches!(p, MatchPair::OnlyFrom(_))));
1195    }
1196
1197    #[test]
1198    fn match_nodes_residual_more_to_emits_only_to() {
1199        let from: Vec<AdfNode> = Vec::new();
1200        let to = vec![p("a"), p("b"), p("c")];
1201        let pairs = match_nodes(&from, &to);
1202        assert_eq!(pairs.len(), 3);
1203        assert!(pairs.iter().all(|p| matches!(p, MatchPair::OnlyTo(_))));
1204    }
1205
1206    #[test]
1207    fn match_nodes_type_mismatch_in_residual() {
1208        // Different node types at the same position force tier 3 to drop
1209        // one side rather than pair across types. With from=[paragraph,
1210        // codeBlock] and to=[codeBlock, paragraph]:
1211        //   fi=0 (para) vs ti=0 (code) — type mismatch → OnlyFrom(0)
1212        //   fi=1 (code) vs ti=0 (code) — match → Both(1, 0)
1213        //   leftover ti=1 (para) → OnlyTo(1)
1214        let from = vec![p("alpha"), AdfNode::code_block(None, "old code")];
1215        let to = vec![AdfNode::code_block(None, "new code"), p("beta")];
1216        let pairs = match_nodes(&from, &to);
1217        let both = pairs
1218            .iter()
1219            .filter(|p| matches!(p, MatchPair::Both(_, _)))
1220            .count();
1221        let only_from = pairs
1222            .iter()
1223            .filter(|p| matches!(p, MatchPair::OnlyFrom(_)))
1224            .count();
1225        let only_to = pairs
1226            .iter()
1227            .filter(|p| matches!(p, MatchPair::OnlyTo(_)))
1228            .count();
1229        // One typed pair plus one drop on each side — exercises the
1230        // type-mismatch branch of the residual loop.
1231        assert_eq!(both, 1);
1232        assert_eq!(only_from, 1);
1233        assert_eq!(only_to, 1);
1234    }
1235
1236    #[test]
1237    fn match_nodes_type_mismatch_drops_to_when_to_index_smaller() {
1238        // Force the `else` branch of the type-mismatch heuristic where
1239        // `from_residual[fi] > to_residual[ti]` — tier 1 pairs nothing,
1240        // and the type mismatch sits with from-residual indices ahead of
1241        // to-residual indices. Construct: from = [code, code], to = [para, code].
1242        //   tier 2/3: from[0]=code matches from-residual[0]=0; to[1]=code
1243        //   gives the to-residual=[0,1], from-residual=[0,1]
1244        //
1245        // Instead, use natural keys to "anchor" early indices on one side:
1246        // from = [keyed-table, code], to = [para, keyed-table, code]
1247        //   tier 1 pairs from[0]<->to[1] → from_residual=[1], to_residual=[0,2]
1248        //   fi=0 (from[1]=code), ti=0 (to[0]=para): mismatch
1249        //     to_residual[0]=0 < from_residual[0]=1 → OnlyTo branch fires
1250        let make_keyed_table = |key: &str| AdfNode {
1251            node_type: "table".to_string(),
1252            attrs: Some(serde_json::json!({"localId": key})),
1253            content: None,
1254            text: None,
1255            marks: None,
1256            local_id: None,
1257            parameters: None,
1258        };
1259        let from = vec![make_keyed_table("t1"), AdfNode::code_block(None, "code")];
1260        let to = vec![
1261            p("orphan-para"),
1262            make_keyed_table("t1"),
1263            AdfNode::code_block(None, "code"),
1264        ];
1265        let pairs = match_nodes(&from, &to);
1266        let only_to = pairs
1267            .iter()
1268            .filter(|p| matches!(p, MatchPair::OnlyTo(_)))
1269            .count();
1270        assert!(only_to >= 1, "expected at least one OnlyTo, got {pairs:?}");
1271    }
1272
1273    #[test]
1274    fn diff_blocks_emits_only_from_and_only_to_deltas() {
1275        // Force OnlyFrom (a block exists in from but not in to) and OnlyTo
1276        // (a block exists in to but not in from) by giving each side only
1277        // one block of an unmatched type.
1278        let from = vec![p("only-from")];
1279        let to = vec![AdfNode::code_block(None, "only-to")];
1280        let deltas = diff_blocks(&from, &to, &DiffOptions::default());
1281        let has_removed = deltas.iter().any(|d| matches!(d, NodeDelta::Removed(_)));
1282        let has_added = deltas.iter().any(|d| matches!(d, NodeDelta::Added(_)));
1283        assert!(has_removed && has_added, "got {deltas:?}");
1284    }
1285
1286    // ── Per-block diff "no change" returns None ───────────────────
1287
1288    #[test]
1289    fn diff_code_block_returns_none_when_text_matches() {
1290        let from = AdfNode::code_block(Some("rust"), "fn a() {}");
1291        let to = AdfNode::code_block(Some("rust"), "fn a() {}");
1292        assert!(diff_code_block(&from, &to, &DiffOptions::default()).is_none());
1293    }
1294
1295    #[test]
1296    fn diff_table_returns_none_when_no_cells_changed() {
1297        let make_t = || {
1298            AdfNode::table(vec![AdfNode::table_row(vec![
1299                AdfNode::table_cell(vec![p("a")]),
1300                AdfNode::table_cell(vec![p("b")]),
1301            ])])
1302        };
1303        assert!(diff_table(&make_t(), &make_t(), &DiffOptions::default()).is_none());
1304    }
1305
1306    #[test]
1307    fn diff_list_returns_none_when_items_match() {
1308        let make_l = || {
1309            AdfNode::bullet_list(vec![
1310                AdfNode::list_item(vec![p("one")]),
1311                AdfNode::list_item(vec![p("two")]),
1312            ])
1313        };
1314        assert!(diff_list(&make_l(), &make_l(), &DiffOptions::default()).is_none());
1315    }
1316
1317    #[test]
1318    fn diff_opaque_returns_none_when_identical() {
1319        let panel = AdfNode {
1320            node_type: "panel".to_string(),
1321            attrs: Some(serde_json::json!({"panelType": "info"})),
1322            content: Some(vec![p("note")]),
1323            text: None,
1324            marks: None,
1325            local_id: None,
1326            parameters: None,
1327        };
1328        assert!(diff_opaque(&panel, &panel, &DiffOptions::default()).is_none());
1329    }
1330
1331    // ── Table edge cases ──────────────────────────────────────────
1332
1333    #[test]
1334    fn diff_table_with_unequal_row_counts() {
1335        let from = AdfNode::table(vec![AdfNode::table_row(vec![AdfNode::table_cell(vec![
1336            p("a"),
1337        ])])]);
1338        let to = AdfNode::table(vec![
1339            AdfNode::table_row(vec![AdfNode::table_cell(vec![p("a")])]),
1340            AdfNode::table_row(vec![AdfNode::table_cell(vec![p("b")])]),
1341        ]);
1342        let delta = diff_table(&from, &to, &DiffOptions::default()).unwrap();
1343        if let NodeDelta::Table(t) = delta {
1344            assert!(t.cells.iter().any(|c| c.row == 1));
1345        } else {
1346            panic!("expected table delta");
1347        }
1348    }
1349
1350    #[test]
1351    fn diff_table_with_table_header_cells() {
1352        // table_rows accepts both tableCell and tableHeader.
1353        let from = AdfNode::table(vec![AdfNode::table_row(vec![AdfNode::table_header(vec![
1354            p("h1"),
1355        ])])]);
1356        let to = AdfNode::table(vec![AdfNode::table_row(vec![AdfNode::table_header(vec![
1357            p("h2"),
1358        ])])]);
1359        let delta = diff_table(&from, &to, &DiffOptions::default()).unwrap();
1360        if let NodeDelta::Table(t) = delta {
1361            assert_eq!(t.cells.len(), 1);
1362        } else {
1363            panic!("expected table delta");
1364        }
1365    }
1366
1367    // ── snapshot_blocks ───────────────────────────────────────────
1368
1369    #[test]
1370    fn snapshot_blocks_renders_each_block_as_added_delta() {
1371        let blocks = vec![p("alpha"), p("beta")];
1372        let snaps = snapshot_blocks(&blocks);
1373        assert_eq!(snaps.len(), 2);
1374        for s in snaps {
1375            assert!(matches!(s, NodeDelta::Added(_)));
1376        }
1377    }
1378
1379    // ── Word/char counters ────────────────────────────────────────
1380
1381    #[test]
1382    fn word_counts_skips_pure_whitespace_changes() {
1383        let (added, removed) = word_counts("hello world", "hello   world");
1384        // Whitespace changes shouldn't bump word counts (the trim filter).
1385        assert_eq!(added, 0);
1386        assert_eq!(removed, 0);
1387    }
1388
1389    #[test]
1390    fn char_counts_handles_full_replacement() {
1391        let (added, removed) = char_counts("foo", "bar");
1392        assert!(added >= 3 && removed >= 3);
1393    }
1394
1395    // ── List with only additions (skip > items_modified path) ────
1396
1397    #[test]
1398    fn diff_list_pure_addition() {
1399        let from = AdfNode::bullet_list(vec![]);
1400        let to = AdfNode::bullet_list(vec![
1401            AdfNode::list_item(vec![p("a")]),
1402            AdfNode::list_item(vec![p("b")]),
1403        ]);
1404        let delta = diff_list(&from, &to, &DiffOptions::default()).unwrap();
1405        if let NodeDelta::List(l) = delta {
1406            assert_eq!(l.items_added.len(), 2);
1407            assert!(l.items_removed.is_empty());
1408            assert!(l.items_modified.is_empty());
1409        } else {
1410            panic!("expected list delta");
1411        }
1412    }
1413
1414    #[test]
1415    fn diff_list_pure_removal() {
1416        let from = AdfNode::bullet_list(vec![
1417            AdfNode::list_item(vec![p("a")]),
1418            AdfNode::list_item(vec![p("b")]),
1419        ]);
1420        let to = AdfNode::bullet_list(vec![]);
1421        let delta = diff_list(&from, &to, &DiffOptions::default()).unwrap();
1422        if let NodeDelta::List(l) = delta {
1423            assert_eq!(l.items_removed.len(), 2);
1424            assert!(l.items_added.is_empty());
1425            assert!(l.items_modified.is_empty());
1426        } else {
1427            panic!("expected list delta");
1428        }
1429    }
1430
1431    // ── code_language: missing attrs / missing field ─────────────
1432
1433    #[test]
1434    fn code_language_returns_none_when_attrs_missing() {
1435        let n = AdfNode {
1436            node_type: "codeBlock".to_string(),
1437            attrs: None,
1438            content: None,
1439            text: None,
1440            marks: None,
1441            local_id: None,
1442            parameters: None,
1443        };
1444        assert!(code_language(&n).is_none());
1445    }
1446
1447    #[test]
1448    fn code_language_returns_none_when_attrs_lack_language() {
1449        let n = AdfNode {
1450            node_type: "codeBlock".to_string(),
1451            attrs: Some(serde_json::json!({"other": "x"})),
1452            content: None,
1453            text: None,
1454            marks: None,
1455            local_id: None,
1456            parameters: None,
1457        };
1458        assert!(code_language(&n).is_none());
1459    }
1460
1461    // ── natural_key fallbacks ─────────────────────────────────────
1462
1463    #[test]
1464    fn natural_key_uses_id_attr_for_media_node() {
1465        let n = AdfNode {
1466            node_type: "media".to_string(),
1467            attrs: Some(serde_json::json!({"id": "media-uuid-1"})),
1468            content: None,
1469            text: None,
1470            marks: None,
1471            local_id: None,
1472            parameters: None,
1473        };
1474        assert_eq!(natural_key(&n).as_deref(), Some("media-uuid-1"));
1475    }
1476
1477    #[test]
1478    fn natural_key_uses_url_attr_for_inline_card() {
1479        let n = AdfNode {
1480            node_type: "inlineCard".to_string(),
1481            attrs: Some(serde_json::json!({"url": "https://example.com/x"})),
1482            content: None,
1483            text: None,
1484            marks: None,
1485            local_id: None,
1486            parameters: None,
1487        };
1488        assert_eq!(natural_key(&n).as_deref(), Some("https://example.com/x"));
1489    }
1490
1491    #[test]
1492    fn natural_key_returns_none_for_unknown_node_type() {
1493        let n = AdfNode {
1494            node_type: "unknown".to_string(),
1495            attrs: Some(serde_json::json!({"some": "value"})),
1496            content: None,
1497            text: None,
1498            marks: None,
1499            local_id: None,
1500            parameters: None,
1501        };
1502        assert!(natural_key(&n).is_none());
1503    }
1504
1505    #[test]
1506    fn natural_key_returns_none_when_node_has_no_attrs() {
1507        let n = AdfNode {
1508            node_type: "table".to_string(),
1509            attrs: None,
1510            content: None,
1511            text: None,
1512            marks: None,
1513            local_id: None,
1514            parameters: None,
1515        };
1516        assert!(natural_key(&n).is_none());
1517    }
1518
1519    // ── heading_level: missing attrs ──────────────────────────────
1520
1521    #[test]
1522    fn paragraph_added_within_matched_section_accumulates_into_stats() {
1523        // Same heading on both sides + an extra paragraph in `to`. The
1524        // matcher emits an `Added` block delta inside the modified section,
1525        // which exercises the `NodeDelta::Added` arm of `accumulate_delta`.
1526        let from = doc(vec![h(2, "S"), p("kept")]);
1527        let to = doc(vec![h(2, "S"), p("kept"), p("hello world")]);
1528        let d = diff_documents(&from, &to, &DiffOptions::default());
1529        // `chars_added` and `words_added` should reflect the snapshot of the
1530        // newly-added paragraph.
1531        assert!(d.stats.chars_added >= 11, "got {:?}", d.stats);
1532        assert_eq!(d.stats.words_added, 2);
1533    }
1534
1535    #[test]
1536    fn paragraph_removed_within_matched_section_accumulates_into_stats() {
1537        let from = doc(vec![h(2, "S"), p("kept"), p("removed text")]);
1538        let to = doc(vec![h(2, "S"), p("kept")]);
1539        let d = diff_documents(&from, &to, &DiffOptions::default());
1540        assert!(d.stats.chars_removed >= 12, "got {:?}", d.stats);
1541        assert_eq!(d.stats.words_removed, 2);
1542    }
1543
1544    #[test]
1545    fn table_rows_skips_non_row_children() {
1546        // A table whose direct children include a non-tableRow node
1547        // exercises the false branch of `if row.node_type == "tableRow"`
1548        // inside `table_rows`.
1549        let from = AdfNode::table(vec![
1550            p("not-a-row"), // skipped by the row-type filter
1551            AdfNode::table_row(vec![AdfNode::table_cell(vec![p("alpha")])]),
1552        ]);
1553        let to = AdfNode::table(vec![
1554            p("not-a-row"),
1555            AdfNode::table_row(vec![AdfNode::table_cell(vec![p("beta")])]),
1556        ]);
1557        let delta = diff_table(&from, &to, &DiffOptions::default()).unwrap();
1558        // Inspect via the serialized form to avoid an unreachable
1559        // destructuring branch — `delta` is structurally guaranteed to be
1560        // `Table` by the call above.
1561        let json = serde_json::to_value(&delta).unwrap();
1562        assert_eq!(json["kind"], "table");
1563        assert_eq!(json["cells"].as_array().unwrap().len(), 1);
1564        assert_eq!(json["cells"][0]["from_text"], "alpha");
1565        assert_eq!(json["cells"][0]["to_text"], "beta");
1566    }
1567
1568    #[test]
1569    fn table_rows_handles_table_with_no_content() {
1570        // A `table` node with no children at all exercises the false branch
1571        // of `if let Some(children)` inside `table_rows`.
1572        let empty_table = AdfNode {
1573            node_type: "table".to_string(),
1574            attrs: None,
1575            content: None,
1576            text: None,
1577            marks: None,
1578            local_id: None,
1579            parameters: None,
1580        };
1581        let result = diff_table(&empty_table, &empty_table, &DiffOptions::default());
1582        assert!(result.is_none());
1583    }
1584
1585    #[test]
1586    fn table_rows_skips_non_cell_children() {
1587        // A `tableRow` with a non-cell child (here a paragraph) exercises
1588        // the false branch of the cell-type filter inside `table_rows`.
1589        let from = AdfNode::table(vec![AdfNode::table_row(vec![
1590            AdfNode::table_cell(vec![p("alpha")]),
1591            p("ignored"),
1592        ])]);
1593        let to = AdfNode::table(vec![AdfNode::table_row(vec![
1594            AdfNode::table_cell(vec![p("beta")]),
1595            p("ignored"),
1596        ])]);
1597        let delta = diff_table(&from, &to, &DiffOptions::default()).unwrap();
1598        if let NodeDelta::Table(t) = delta {
1599            // Only the real cell shows up.
1600            assert_eq!(t.cells.len(), 1);
1601        } else {
1602            panic!("expected table delta");
1603        }
1604    }
1605
1606    #[test]
1607    fn table_rows_skips_rows_without_content() {
1608        // A `tableRow` without any children exercises the early-skip branch
1609        // (`if let Some(row_children)` false).
1610        let from = AdfNode::table(vec![AdfNode {
1611            node_type: "tableRow".to_string(),
1612            attrs: None,
1613            content: None,
1614            text: None,
1615            marks: None,
1616            local_id: None,
1617            parameters: None,
1618        }]);
1619        let to = from.clone();
1620        // Identical empty rows produce no cells, hence no delta.
1621        assert!(diff_table(&from, &to, &DiffOptions::default()).is_none());
1622    }
1623
1624    #[test]
1625    fn heading_level_returns_none_when_attrs_missing() {
1626        let n = AdfNode {
1627            node_type: "heading".to_string(),
1628            attrs: None,
1629            content: None,
1630            text: None,
1631            marks: None,
1632            local_id: None,
1633            parameters: None,
1634        };
1635        assert!(heading_level(&n).is_none());
1636    }
1637}