Skip to main content

weave_core/
merge.rs

1use std::collections::{HashMap, HashSet};
2use std::io::Write;
3use std::process::Command;
4use std::sync::{mpsc, LazyLock};
5use std::time::Duration;
6
7use serde::Serialize;
8use sem_core::model::change::ChangeType;
9use sem_core::model::entity::SemanticEntity;
10use sem_core::model::identity::match_entities;
11use sem_core::parser::plugins::create_default_registry;
12use sem_core::parser::registry::ParserRegistry;
13
14/// Static parser registry shared across all merge operations.
15/// Avoids recreating 11 tree-sitter language parsers per merge call.
16static PARSER_REGISTRY: LazyLock<ParserRegistry> = LazyLock::new(create_default_registry);
17
18use crate::conflict::{classify_conflict, ConflictKind, EntityConflict, MarkerFormat, MergeStats};
19use crate::region::{extract_regions, EntityRegion, FileRegion};
20use crate::validate::SemanticWarning;
21use crate::reconstruct::reconstruct;
22
23/// How an individual entity was resolved during merge.
24#[derive(Debug, Clone, Serialize)]
25#[serde(rename_all = "snake_case")]
26pub enum ResolutionStrategy {
27    Unchanged,
28    OursOnly,
29    TheirsOnly,
30    ContentEqual,
31    DiffyMerged,
32    DecoratorMerged,
33    InnerMerged,
34    ConflictBothModified,
35    ConflictModifyDelete,
36    ConflictBothAdded,
37    ConflictRenameRename,
38    AddedOurs,
39    AddedTheirs,
40    Deleted,
41    Renamed { from: String, to: String },
42    Fallback,
43}
44
45/// Audit record for a single entity's merge resolution.
46#[derive(Debug, Clone, Serialize)]
47pub struct EntityAudit {
48    pub name: String,
49    #[serde(rename = "type")]
50    pub entity_type: String,
51    pub resolution: ResolutionStrategy,
52}
53
54/// Result of a merge operation.
55#[derive(Debug)]
56pub struct MergeResult {
57    pub content: String,
58    pub conflicts: Vec<EntityConflict>,
59    pub warnings: Vec<SemanticWarning>,
60    pub stats: MergeStats,
61    pub audit: Vec<EntityAudit>,
62}
63
64impl MergeResult {
65    pub fn is_clean(&self) -> bool {
66        self.conflicts.is_empty()
67            && !self.content.lines().any(|l| l.starts_with("<<<<<<< ours"))
68    }
69}
70
71/// The resolved content for a single entity after merging.
72#[derive(Debug, Clone)]
73pub enum ResolvedEntity {
74    /// Clean resolution — use this content.
75    Clean(EntityRegion),
76    /// Conflict — render conflict markers.
77    Conflict(EntityConflict),
78    /// Inner merge with per-member scoped conflicts.
79    /// Content already contains per-member conflict markers; emit as-is.
80    ScopedConflict {
81        content: String,
82        conflict: EntityConflict,
83    },
84    /// Entity was deleted.
85    Deleted,
86}
87
88/// Perform entity-level 3-way merge.
89///
90/// Falls back to line-level merge (via diffy) when:
91/// - No parser matches the file type
92/// - Parser returns 0 entities for non-empty content
93/// - File exceeds 1MB
94pub fn entity_merge(
95    base: &str,
96    ours: &str,
97    theirs: &str,
98    file_path: &str,
99) -> MergeResult {
100    entity_merge_fmt(base, ours, theirs, file_path, &MarkerFormat::default())
101}
102
103/// Perform entity-level 3-way merge with configurable marker format.
104pub fn entity_merge_fmt(
105    base: &str,
106    ours: &str,
107    theirs: &str,
108    file_path: &str,
109    marker_format: &MarkerFormat,
110) -> MergeResult {
111    let timeout_secs = std::env::var("WEAVE_TIMEOUT")
112        .ok()
113        .and_then(|v| v.parse::<u64>().ok())
114        .unwrap_or(5);
115
116    // Timeout: if entity merge takes too long, diffy is likely hitting
117    // pathological input. Fall back to git merge-file which always terminates.
118    let base_owned = base.to_string();
119    let ours_owned = ours.to_string();
120    let theirs_owned = theirs.to_string();
121    let path_owned = file_path.to_string();
122    let fmt_owned = marker_format.clone();
123
124    let (tx, rx) = mpsc::channel();
125    std::thread::spawn(move || {
126        let result = entity_merge_with_registry(&base_owned, &ours_owned, &theirs_owned, &path_owned, &PARSER_REGISTRY, &fmt_owned);
127        let _ = tx.send(result);
128    });
129
130    match rx.recv_timeout(Duration::from_secs(timeout_secs)) {
131        Ok(result) => result,
132        Err(_) => {
133            eprintln!("weave: merge timed out after {}s for {}, falling back to git merge-file", timeout_secs, file_path);
134            let mut stats = MergeStats::default();
135            stats.used_fallback = true;
136            git_merge_file(base, ours, theirs, &mut stats)
137        }
138    }
139}
140
141pub fn entity_merge_with_registry(
142    base: &str,
143    ours: &str,
144    theirs: &str,
145    file_path: &str,
146    registry: &ParserRegistry,
147    marker_format: &MarkerFormat,
148) -> MergeResult {
149    // Guard: if any input already contains conflict markers (e.g. AU/AA conflicts
150    // where git bakes markers into stage blobs), report as conflict immediately.
151    // We can't do a meaningful 3-way merge on pre-conflicted content.
152    if has_conflict_markers(base) || has_conflict_markers(ours) || has_conflict_markers(theirs) {
153        let mut stats = MergeStats::default();
154        stats.entities_conflicted = 1;
155        stats.used_fallback = true;
156        // Use whichever input has markers as the merged content (preserves
157        // the conflict for the user to resolve manually).
158        let content = if has_conflict_markers(ours) {
159            ours
160        } else if has_conflict_markers(theirs) {
161            theirs
162        } else {
163            base
164        };
165        let complexity = classify_conflict(Some(base), Some(ours), Some(theirs));
166        return MergeResult {
167            content: content.to_string(),
168            conflicts: vec![EntityConflict {
169                entity_name: "(file)".to_string(),
170                entity_type: "file".to_string(),
171                kind: ConflictKind::BothModified,
172                complexity,
173                ours_content: Some(ours.to_string()),
174                theirs_content: Some(theirs.to_string()),
175                base_content: Some(base.to_string()),
176            }],
177            warnings: vec![],
178            stats,
179            audit: vec![],
180        };
181    }
182
183    // Fast path: if ours == theirs, no merge needed
184    if ours == theirs {
185        return MergeResult {
186            content: ours.to_string(),
187            conflicts: vec![],
188            warnings: vec![],
189            stats: MergeStats::default(),
190            audit: vec![],
191        };
192    }
193
194    // Fast path: if base == ours, take theirs entirely
195    if base == ours {
196        return MergeResult {
197            content: theirs.to_string(),
198            conflicts: vec![],
199            warnings: vec![],
200            stats: MergeStats {
201                entities_theirs_only: 1,
202                ..Default::default()
203            },
204            audit: vec![],
205        };
206    }
207
208    // Fast path: if base == theirs, take ours entirely
209    if base == theirs {
210        return MergeResult {
211            content: ours.to_string(),
212            conflicts: vec![],
213            warnings: vec![],
214            stats: MergeStats {
215                entities_ours_only: 1,
216                ..Default::default()
217            },
218            audit: vec![],
219        };
220    }
221
222    // Binary file detection: if any version has null bytes, use git merge-file directly
223    if is_binary(base) || is_binary(ours) || is_binary(theirs) {
224        let mut stats = MergeStats::default();
225        stats.used_fallback = true;
226        return git_merge_file(base, ours, theirs, &mut stats);
227    }
228
229    // Large file fallback
230    if base.len() > 1_000_000 || ours.len() > 1_000_000 || theirs.len() > 1_000_000 {
231        return line_level_fallback(base, ours, theirs, file_path);
232    }
233
234    // If the file type isn't natively supported, the registry returns the fallback
235    // plugin (20-line chunks). Entity merge on arbitrary chunks produces WORSE
236    // results than line-level merge (confirmed on GitButler's .svelte files where
237    // chunk boundaries don't align with structural boundaries). So we skip entity
238    // merge entirely for fallback-plugin files and go straight to line-level merge.
239    let plugin = match registry.get_plugin(file_path) {
240        Some(p) if p.id() != "fallback" => p,
241        _ => return line_level_fallback(base, ours, theirs, file_path),
242    };
243
244    // Extract entities from all three versions. Keep unfiltered lists for inner merge
245    // (child entities provide tree-sitter-based method decomposition for classes).
246    let base_all = plugin.extract_entities(base, file_path);
247    let ours_all = plugin.extract_entities(ours, file_path);
248    let theirs_all = plugin.extract_entities(theirs, file_path);
249
250    // Filter out nested entities for top-level matching and region extraction
251    let base_entities = filter_nested_entities(base_all.clone());
252    let ours_entities = filter_nested_entities(ours_all.clone());
253    let theirs_entities = filter_nested_entities(theirs_all.clone());
254
255    // Fallback if parser returns nothing for non-empty content
256    if base_entities.is_empty() && !base.trim().is_empty() {
257        return line_level_fallback(base, ours, theirs, file_path);
258    }
259    // Allow empty entities if content is actually empty
260    if ours_entities.is_empty() && !ours.trim().is_empty() && theirs_entities.is_empty() && !theirs.trim().is_empty() {
261        return line_level_fallback(base, ours, theirs, file_path);
262    }
263
264    // Fallback if too many duplicate entity names. Entity matching is O(n*m) on
265    // same-named entities which can hang on files with many `var app = ...` etc.
266    if has_excessive_duplicates(&base_entities) || has_excessive_duplicates(&ours_entities) || has_excessive_duplicates(&theirs_entities) {
267        return line_level_fallback(base, ours, theirs, file_path);
268    }
269
270    // Extract regions from all three
271    let base_regions = extract_regions(base, &base_entities);
272    let ours_regions = extract_regions(ours, &ours_entities);
273    let theirs_regions = extract_regions(theirs, &theirs_entities);
274
275    // Build region content maps (entity_id → content from file lines, preserving
276    // surrounding syntax like `export` that sem-core's entity.content may strip)
277    let base_region_content = build_region_content_map(&base_regions);
278    let ours_region_content = build_region_content_map(&ours_regions);
279    let theirs_region_content = build_region_content_map(&theirs_regions);
280
281    // Match entities: base↔ours and base↔theirs
282    let ours_changes = match_entities(&base_entities, &ours_entities, file_path, None, None, None);
283    let theirs_changes = match_entities(&base_entities, &theirs_entities, file_path, None, None, None);
284
285    // Build lookup maps
286    let base_entity_map: HashMap<&str, &SemanticEntity> =
287        base_entities.iter().map(|e| (e.id.as_str(), e)).collect();
288    let ours_entity_map: HashMap<&str, &SemanticEntity> =
289        ours_entities.iter().map(|e| (e.id.as_str(), e)).collect();
290    let theirs_entity_map: HashMap<&str, &SemanticEntity> =
291        theirs_entities.iter().map(|e| (e.id.as_str(), e)).collect();
292
293    // Classify what happened to each entity in each branch
294    let mut ours_change_map: HashMap<String, ChangeType> = HashMap::new();
295    for change in &ours_changes.changes {
296        ours_change_map.insert(change.entity_id.clone(), change.change_type);
297    }
298    let mut theirs_change_map: HashMap<String, ChangeType> = HashMap::new();
299    for change in &theirs_changes.changes {
300        theirs_change_map.insert(change.entity_id.clone(), change.change_type);
301    }
302
303    // Detect renames using structural_hash (RefFilter / IntelliMerge-inspired).
304    // When one branch renames an entity, connect the old and new IDs so the merge
305    // treats it as the same entity rather than a delete+add.
306    let ours_rename_to_base = build_rename_map(&base_entities, &ours_entities);
307    let theirs_rename_to_base = build_rename_map(&base_entities, &theirs_entities);
308    // Reverse maps: base_id → renamed_id in that branch
309    let base_to_ours_rename: HashMap<String, String> = ours_rename_to_base
310        .iter()
311        .map(|(new, old)| (old.clone(), new.clone()))
312        .collect();
313    let base_to_theirs_rename: HashMap<String, String> = theirs_rename_to_base
314        .iter()
315        .map(|(new, old)| (old.clone(), new.clone()))
316        .collect();
317
318    // Collect all entity IDs across all versions
319    let mut all_entity_ids: Vec<String> = Vec::new();
320    let mut seen: HashSet<String> = HashSet::new();
321    // Track renamed IDs so we don't process them twice
322    let mut skip_ids: HashSet<String> = HashSet::new();
323    // The "new" IDs from renames should be skipped — they'll be handled via the base ID
324    for new_id in ours_rename_to_base.keys() {
325        skip_ids.insert(new_id.clone());
326    }
327    for new_id in theirs_rename_to_base.keys() {
328        skip_ids.insert(new_id.clone());
329    }
330
331    // Start with ours ordering (skeleton)
332    for entity in &ours_entities {
333        if skip_ids.contains(&entity.id) {
334            continue;
335        }
336        if seen.insert(entity.id.clone()) {
337            all_entity_ids.push(entity.id.clone());
338        }
339    }
340    // Add theirs-only entities
341    for entity in &theirs_entities {
342        if skip_ids.contains(&entity.id) {
343            continue;
344        }
345        if seen.insert(entity.id.clone()) {
346            all_entity_ids.push(entity.id.clone());
347        }
348    }
349    // Add base-only entities (deleted in both → skip, deleted in one → handled below)
350    for entity in &base_entities {
351        if seen.insert(entity.id.clone()) {
352            all_entity_ids.push(entity.id.clone());
353        }
354    }
355
356    let mut stats = MergeStats::default();
357    let mut conflicts: Vec<EntityConflict> = Vec::new();
358    let mut audit: Vec<EntityAudit> = Vec::new();
359    let mut resolved_entities: HashMap<String, ResolvedEntity> = HashMap::new();
360
361    // Detect rename/rename conflicts: same base entity renamed differently in both branches.
362    // These must be flagged before the entity resolution loop, which would otherwise silently
363    // pick ours and also include theirs as an unmatched entity.
364    let mut rename_conflict_ids: HashSet<String> = HashSet::new();
365    for (base_id, ours_new_id) in &base_to_ours_rename {
366        if let Some(theirs_new_id) = base_to_theirs_rename.get(base_id) {
367            if ours_new_id != theirs_new_id {
368                rename_conflict_ids.insert(base_id.clone());
369            }
370        }
371    }
372
373    for entity_id in &all_entity_ids {
374        // Handle rename/rename conflicts: both branches renamed this base entity differently
375        if rename_conflict_ids.contains(entity_id) {
376            let ours_new_id = &base_to_ours_rename[entity_id];
377            let theirs_new_id = &base_to_theirs_rename[entity_id];
378            let base_entity = base_entity_map.get(entity_id.as_str());
379            let ours_entity = ours_entity_map.get(ours_new_id.as_str());
380            let theirs_entity = theirs_entity_map.get(theirs_new_id.as_str());
381            let base_name = base_entity.map(|e| e.name.as_str()).unwrap_or(entity_id);
382            let ours_name = ours_entity.map(|e| e.name.as_str()).unwrap_or(ours_new_id);
383            let theirs_name = theirs_entity.map(|e| e.name.as_str()).unwrap_or(theirs_new_id);
384
385            let base_rc = base_entity.map(|e| base_region_content.get(e.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| e.content.clone()));
386            let ours_rc = ours_entity.map(|e| ours_region_content.get(e.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| e.content.clone()));
387            let theirs_rc = theirs_entity.map(|e| theirs_region_content.get(e.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| e.content.clone()));
388
389            stats.entities_conflicted += 1;
390            let conflict = EntityConflict {
391                entity_name: base_name.to_string(),
392                entity_type: base_entity.map(|e| e.entity_type.clone()).unwrap_or_default(),
393                kind: ConflictKind::RenameRename {
394                    base_name: base_name.to_string(),
395                    ours_name: ours_name.to_string(),
396                    theirs_name: theirs_name.to_string(),
397                },
398                complexity: crate::conflict::ConflictComplexity::Syntax,
399                ours_content: ours_rc,
400                theirs_content: theirs_rc,
401                base_content: base_rc,
402            };
403            conflicts.push(conflict.clone());
404            audit.push(EntityAudit {
405                name: base_name.to_string(),
406                entity_type: base_entity.map(|e| e.entity_type.clone()).unwrap_or_default(),
407                resolution: ResolutionStrategy::ConflictRenameRename,
408            });
409            let resolution = ResolvedEntity::Conflict(conflict);
410            resolved_entities.insert(entity_id.clone(), resolution.clone());
411            resolved_entities.insert(ours_new_id.clone(), resolution);
412            // Mark theirs renamed ID as Deleted so reconstruct doesn't emit the conflict twice
413            // (once from ours skeleton, once from theirs-only insertion)
414            resolved_entities.insert(theirs_new_id.clone(), ResolvedEntity::Deleted);
415            continue;
416        }
417
418        let in_base = base_entity_map.get(entity_id.as_str());
419        // Follow rename chains: if base entity was renamed in ours/theirs, use renamed version
420        let ours_id = base_to_ours_rename.get(entity_id.as_str()).map(|s| s.as_str()).unwrap_or(entity_id.as_str());
421        let theirs_id = base_to_theirs_rename.get(entity_id.as_str()).map(|s| s.as_str()).unwrap_or(entity_id.as_str());
422        let in_ours = ours_entity_map.get(ours_id).or_else(|| ours_entity_map.get(entity_id.as_str()));
423        let in_theirs = theirs_entity_map.get(theirs_id).or_else(|| theirs_entity_map.get(entity_id.as_str()));
424
425        let ours_change = ours_change_map.get(entity_id);
426        let theirs_change = theirs_change_map.get(entity_id);
427
428        let (resolution, strategy) = resolve_entity(
429            entity_id,
430            in_base,
431            in_ours,
432            in_theirs,
433            ours_change,
434            theirs_change,
435            &base_region_content,
436            &ours_region_content,
437            &theirs_region_content,
438            &base_all,
439            &ours_all,
440            &theirs_all,
441            &mut stats,
442            marker_format,
443        );
444
445        // Build audit entry from entity info
446        let entity_name = in_ours.map(|e| e.name.as_str())
447            .or_else(|| in_theirs.map(|e| e.name.as_str()))
448            .or_else(|| in_base.map(|e| e.name.as_str()))
449            .unwrap_or(entity_id)
450            .to_string();
451        let entity_type = in_ours.map(|e| e.entity_type.as_str())
452            .or_else(|| in_theirs.map(|e| e.entity_type.as_str()))
453            .or_else(|| in_base.map(|e| e.entity_type.as_str()))
454            .unwrap_or("")
455            .to_string();
456        audit.push(EntityAudit {
457            name: entity_name,
458            entity_type,
459            resolution: strategy,
460        });
461
462        match &resolution {
463            ResolvedEntity::Conflict(ref c) => conflicts.push(c.clone()),
464            ResolvedEntity::ScopedConflict { conflict, .. } => conflicts.push(conflict.clone()),
465            _ => {}
466        }
467
468        resolved_entities.insert(entity_id.clone(), resolution.clone());
469        // Also store under renamed IDs so reconstruct can find them
470        if let Some(ours_renamed_id) = base_to_ours_rename.get(entity_id.as_str()) {
471            resolved_entities.insert(ours_renamed_id.clone(), resolution.clone());
472        }
473        if let Some(theirs_renamed_id) = base_to_theirs_rename.get(entity_id.as_str()) {
474            resolved_entities.insert(theirs_renamed_id.clone(), resolution);
475        }
476    }
477
478    // Merge interstitial regions
479    let (merged_interstitials, interstitial_conflicts) =
480        merge_interstitials(&base_regions, &ours_regions, &theirs_regions, marker_format);
481    stats.entities_conflicted += interstitial_conflicts.len();
482    conflicts.extend(interstitial_conflicts);
483
484    // Reconstruct the file
485    let content = reconstruct(
486        &ours_regions,
487        &theirs_regions,
488        &theirs_entities,
489        &ours_entity_map,
490        &resolved_entities,
491        &merged_interstitials,
492        marker_format,
493    );
494
495    // Post-merge cleanup: remove duplicate lines and normalize blank lines
496    let content = post_merge_cleanup(&content);
497
498    // Post-merge validation: verify the merged result is structurally sound.
499    // Catches silent data loss from entity merge / reconstruction bugs.
500    let mut warnings = vec![];
501    if conflicts.is_empty() && stats.entities_both_changed_merged > 0 {
502        let merged_entities = plugin.extract_entities(&content, file_path);
503        if merged_entities.is_empty() && !content.trim().is_empty() {
504            warnings.push(crate::validate::SemanticWarning {
505                entity_name: "(file)".to_string(),
506                entity_type: "file".to_string(),
507                file_path: file_path.to_string(),
508                kind: crate::validate::WarningKind::ParseFailedAfterMerge,
509                related: vec![],
510            });
511        }
512
513        // Entity coverage check: every resolved-clean entity's content should
514        // appear in the merged output. If it doesn't, reconstruct dropped it.
515        if conflicts.is_empty() {
516            for (_, resolved) in &resolved_entities {
517                if let ResolvedEntity::Clean(region) = resolved {
518                    let trimmed = region.content.trim();
519                    if !trimmed.is_empty() && trimmed.len() > 20 && !content.contains(trimmed) {
520                        // Entity resolved cleanly but its content is missing from output.
521                        // Fall back to git merge-file to avoid silent data loss.
522                        return git_merge_file(base, ours, theirs, &mut stats);
523                    }
524                }
525            }
526        }
527
528        // Entity count check: re-parsed merged output should have at least as many
529        // entities as the minimum of ours/theirs (minus deletions). A significant
530        // drop means entities were silently lost.
531        if conflicts.is_empty() && !merged_entities.is_empty() {
532            let merged_top = filter_nested_entities(merged_entities);
533            let deleted_count = resolved_entities.values()
534                .filter(|r| matches!(r, ResolvedEntity::Deleted))
535                .count();
536            let expected_min = ours_entities.len().min(theirs_entities.len()).saturating_sub(deleted_count);
537            if expected_min > 3 && merged_top.len() < expected_min * 80 / 100 {
538                return git_merge_file(base, ours, theirs, &mut stats);
539            }
540        }
541    }
542
543    let entity_result = MergeResult {
544        content,
545        conflicts,
546        warnings,
547        stats: stats.clone(),
548        audit,
549    };
550
551    // Floor: never produce more conflict markers than git merge-file.
552    // Entity merge can split one git conflict into multiple per-entity conflicts,
553    // or interstitial merges can produce conflicts not tracked in the conflicts vec.
554    let entity_markers = entity_result.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
555    if entity_markers > 0 {
556        let git_result = git_merge_file(base, ours, theirs, &mut stats);
557        let git_markers = git_result.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
558        if entity_markers > git_markers {
559            return git_result;
560        }
561    }
562
563    // Safety net: detect silent data loss from entity merge.
564    // If the merged result is significantly shorter than expected, fall back to git.
565    if entity_markers == 0 {
566        let merged_len = entity_result.content.len();
567        let max_input_len = ours.len().max(theirs.len());
568        let min_input_len = ours.len().min(theirs.len());
569        // Expected length: at least 90% of the shorter input (both branches
570        // contribute content, so the merge should be at least as long as the
571        // shorter one minus some deletions).
572        if min_input_len > 200 && merged_len < min_input_len * 90 / 100 {
573            return git_merge_file(base, ours, theirs, &mut stats);
574        }
575        // Also check: merged shouldn't be much shorter than max input unless
576        // there were intentional deletions from one branch
577        if max_input_len > 500 && merged_len < max_input_len * 70 / 100 {
578            // Check if the length reduction is explained by one branch deleting content
579            let base_len = base.len();
580            let ours_deleted = base_len > ours.len() && (base_len - ours.len()) > max_input_len * 20 / 100;
581            let theirs_deleted = base_len > theirs.len() && (base_len - theirs.len()) > max_input_len * 20 / 100;
582            if !ours_deleted && !theirs_deleted {
583                return git_merge_file(base, ours, theirs, &mut stats);
584            }
585        }
586    }
587
588    entity_result
589}
590
591fn resolve_entity(
592    _entity_id: &str,
593    in_base: Option<&&SemanticEntity>,
594    in_ours: Option<&&SemanticEntity>,
595    in_theirs: Option<&&SemanticEntity>,
596    _ours_change: Option<&ChangeType>,
597    _theirs_change: Option<&ChangeType>,
598    base_region_content: &HashMap<&str, &str>,
599    ours_region_content: &HashMap<&str, &str>,
600    theirs_region_content: &HashMap<&str, &str>,
601    base_all: &[SemanticEntity],
602    ours_all: &[SemanticEntity],
603    theirs_all: &[SemanticEntity],
604    stats: &mut MergeStats,
605    marker_format: &MarkerFormat,
606) -> (ResolvedEntity, ResolutionStrategy) {
607    // Helper: get region content (from file lines) for an entity, falling back to entity.content
608    let region_content = |entity: &SemanticEntity, map: &HashMap<&str, &str>| -> String {
609        map.get(entity.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| entity.content.clone())
610    };
611
612    match (in_base, in_ours, in_theirs) {
613        // Entity exists in all three versions
614        (Some(base), Some(ours), Some(theirs)) => {
615            // Check modification status via structural hash AND region content.
616            // Region content may differ even when structural hash is the same
617            // (e.g., doc comment added/changed but function body unchanged).
618            let base_rc_lazy = || region_content(base, base_region_content);
619            let ours_rc_lazy = || region_content(ours, ours_region_content);
620            let theirs_rc_lazy = || region_content(theirs, theirs_region_content);
621
622            let ours_modified = ours.content_hash != base.content_hash
623                || ours_rc_lazy() != base_rc_lazy();
624            let theirs_modified = theirs.content_hash != base.content_hash
625                || theirs_rc_lazy() != base_rc_lazy();
626
627            match (ours_modified, theirs_modified) {
628                (false, false) => {
629                    // Neither changed
630                    stats.entities_unchanged += 1;
631                    (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::Unchanged)
632                }
633                (true, false) => {
634                    // Only ours changed
635                    stats.entities_ours_only += 1;
636                    (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::OursOnly)
637                }
638                (false, true) => {
639                    // Only theirs changed
640                    stats.entities_theirs_only += 1;
641                    (ResolvedEntity::Clean(entity_to_region_with_content(theirs, &region_content(theirs, theirs_region_content))), ResolutionStrategy::TheirsOnly)
642                }
643                (true, true) => {
644                    // Both changed — try intra-entity merge
645                    if ours.content_hash == theirs.content_hash {
646                        // Same change in both — take ours
647                        stats.entities_both_changed_merged += 1;
648                        (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::ContentEqual)
649                    } else {
650                        // Try diffy 3-way merge on region content (preserves full syntax)
651                        let base_rc = region_content(base, base_region_content);
652                        let ours_rc = region_content(ours, ours_region_content);
653                        let theirs_rc = region_content(theirs, theirs_region_content);
654
655                        // Whitespace-aware shortcut: if one side only changed
656                        // whitespace/formatting, take the other side's content changes.
657                        // This handles the common case where one agent reformats while
658                        // another makes semantic changes.
659                        if is_whitespace_only_diff(&base_rc, &ours_rc) {
660                            stats.entities_theirs_only += 1;
661                            return (ResolvedEntity::Clean(entity_to_region_with_content(theirs, &theirs_rc)), ResolutionStrategy::TheirsOnly);
662                        }
663                        if is_whitespace_only_diff(&base_rc, &theirs_rc) {
664                            stats.entities_ours_only += 1;
665                            return (ResolvedEntity::Clean(entity_to_region_with_content(ours, &ours_rc)), ResolutionStrategy::OursOnly);
666                        }
667
668                        match diffy_merge(&base_rc, &ours_rc, &theirs_rc) {
669                            Some(merged) => {
670                                stats.entities_both_changed_merged += 1;
671                                stats.resolved_via_diffy += 1;
672                                (ResolvedEntity::Clean(EntityRegion {
673                                    entity_id: ours.id.clone(),
674                                    entity_name: ours.name.clone(),
675                                    entity_type: ours.entity_type.clone(),
676                                    content: merged,
677                                    start_line: ours.start_line,
678                                    end_line: ours.end_line,
679                                }), ResolutionStrategy::DiffyMerged)
680                            }
681                            None => {
682                                // Strategy 1: decorator/annotation-aware merge
683                                // Decorators are unordered annotations — merge them commutatively
684                                if let Some(merged) = try_decorator_aware_merge(&base_rc, &ours_rc, &theirs_rc) {
685                                    stats.entities_both_changed_merged += 1;
686                                    stats.resolved_via_diffy += 1;
687                                    return (ResolvedEntity::Clean(EntityRegion {
688                                        entity_id: ours.id.clone(),
689                                        entity_name: ours.name.clone(),
690                                        entity_type: ours.entity_type.clone(),
691                                        content: merged,
692                                        start_line: ours.start_line,
693                                        end_line: ours.end_line,
694                                    }), ResolutionStrategy::DecoratorMerged);
695                                }
696
697                                // Strategy 2: inner entity merge for container types
698                                // (LastMerge insight: class members are unordered children)
699                                if is_container_entity_type(&ours.entity_type) {
700                                    let base_children = in_base
701                                        .map(|b| get_child_entities(b, base_all))
702                                        .unwrap_or_default();
703                                    let ours_children = get_child_entities(ours, ours_all);
704                                    let theirs_children = in_theirs
705                                        .map(|t| get_child_entities(t, theirs_all))
706                                        .unwrap_or_default();
707                                    let base_start = in_base.map(|b| b.start_line).unwrap_or(1);
708                                    let ours_start = ours.start_line;
709                                    let theirs_start = in_theirs.map(|t| t.start_line).unwrap_or(1);
710                                    if let Some(inner) = try_inner_entity_merge(
711                                        &base_rc, &ours_rc, &theirs_rc,
712                                        &base_children, &ours_children, &theirs_children,
713                                        base_start, ours_start, theirs_start,
714                                        marker_format,
715                                    ) {
716                                        if inner.has_conflicts {
717                                            // Inner merge produced per-member conflicts:
718                                            // content has scoped markers for just the conflicted
719                                            // members; clean members are merged normally.
720                                            stats.entities_conflicted += 1;
721                                            stats.resolved_via_inner_merge += 1;
722                                            let complexity = classify_conflict(Some(&base_rc), Some(&ours_rc), Some(&theirs_rc));
723                                            return (ResolvedEntity::ScopedConflict {
724                                                content: inner.content,
725                                                conflict: EntityConflict {
726                                                    entity_name: ours.name.clone(),
727                                                    entity_type: ours.entity_type.clone(),
728                                                    kind: ConflictKind::BothModified,
729                                                    complexity,
730                                                    ours_content: Some(ours_rc),
731                                                    theirs_content: Some(theirs_rc),
732                                                    base_content: Some(base_rc),
733                                                },
734                                            }, ResolutionStrategy::InnerMerged);
735                                        } else {
736                                            stats.entities_both_changed_merged += 1;
737                                            stats.resolved_via_inner_merge += 1;
738                                            return (ResolvedEntity::Clean(EntityRegion {
739                                                entity_id: ours.id.clone(),
740                                                entity_name: ours.name.clone(),
741                                                entity_type: ours.entity_type.clone(),
742                                                content: inner.content,
743                                                start_line: ours.start_line,
744                                                end_line: ours.end_line,
745                                            }), ResolutionStrategy::InnerMerged);
746                                        }
747                                    }
748                                }
749                                stats.entities_conflicted += 1;
750                                let complexity = classify_conflict(Some(&base_rc), Some(&ours_rc), Some(&theirs_rc));
751                                (ResolvedEntity::Conflict(EntityConflict {
752                                    entity_name: ours.name.clone(),
753                                    entity_type: ours.entity_type.clone(),
754                                    kind: ConflictKind::BothModified,
755                                    complexity,
756                                    ours_content: Some(ours_rc),
757                                    theirs_content: Some(theirs_rc),
758                                    base_content: Some(base_rc),
759                                }), ResolutionStrategy::ConflictBothModified)
760                            }
761                        }
762                    }
763                }
764            }
765        }
766
767        // Entity in base and ours, but not theirs → theirs deleted it
768        (Some(_base), Some(ours), None) => {
769            let ours_modified = ours.content_hash != _base.content_hash;
770            if ours_modified {
771                // Modify/delete conflict
772                stats.entities_conflicted += 1;
773                let ours_rc = region_content(ours, ours_region_content);
774                let base_rc = region_content(_base, base_region_content);
775                let complexity = classify_conflict(Some(&base_rc), Some(&ours_rc), None);
776                (ResolvedEntity::Conflict(EntityConflict {
777                    entity_name: ours.name.clone(),
778                    entity_type: ours.entity_type.clone(),
779                    kind: ConflictKind::ModifyDelete {
780                        modified_in_ours: true,
781                    },
782                    complexity,
783                    ours_content: Some(ours_rc),
784                    theirs_content: None,
785                    base_content: Some(base_rc),
786                }), ResolutionStrategy::ConflictModifyDelete)
787            } else {
788                // Theirs deleted, ours unchanged → accept deletion
789                stats.entities_deleted += 1;
790                (ResolvedEntity::Deleted, ResolutionStrategy::Deleted)
791            }
792        }
793
794        // Entity in base and theirs, but not ours → ours deleted it
795        (Some(_base), None, Some(theirs)) => {
796            let theirs_modified = theirs.content_hash != _base.content_hash;
797            if theirs_modified {
798                // Modify/delete conflict
799                stats.entities_conflicted += 1;
800                let theirs_rc = region_content(theirs, theirs_region_content);
801                let base_rc = region_content(_base, base_region_content);
802                let complexity = classify_conflict(Some(&base_rc), None, Some(&theirs_rc));
803                (ResolvedEntity::Conflict(EntityConflict {
804                    entity_name: theirs.name.clone(),
805                    entity_type: theirs.entity_type.clone(),
806                    kind: ConflictKind::ModifyDelete {
807                        modified_in_ours: false,
808                    },
809                    complexity,
810                    ours_content: None,
811                    theirs_content: Some(theirs_rc),
812                    base_content: Some(base_rc),
813                }), ResolutionStrategy::ConflictModifyDelete)
814            } else {
815                // Ours deleted, theirs unchanged → accept deletion
816                stats.entities_deleted += 1;
817                (ResolvedEntity::Deleted, ResolutionStrategy::Deleted)
818            }
819        }
820
821        // Entity only in ours (added by ours)
822        (None, Some(ours), None) => {
823            stats.entities_added_ours += 1;
824            (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::AddedOurs)
825        }
826
827        // Entity only in theirs (added by theirs)
828        (None, None, Some(theirs)) => {
829            stats.entities_added_theirs += 1;
830            (ResolvedEntity::Clean(entity_to_region_with_content(theirs, &region_content(theirs, theirs_region_content))), ResolutionStrategy::AddedTheirs)
831        }
832
833        // Entity in both ours and theirs but not base (both added)
834        (None, Some(ours), Some(theirs)) => {
835            if ours.content_hash == theirs.content_hash {
836                // Same content added by both → take ours
837                stats.entities_added_ours += 1;
838                (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::ContentEqual)
839            } else {
840                // Different content → conflict
841                stats.entities_conflicted += 1;
842                let ours_rc = region_content(ours, ours_region_content);
843                let theirs_rc = region_content(theirs, theirs_region_content);
844                let complexity = classify_conflict(None, Some(&ours_rc), Some(&theirs_rc));
845                (ResolvedEntity::Conflict(EntityConflict {
846                    entity_name: ours.name.clone(),
847                    entity_type: ours.entity_type.clone(),
848                    kind: ConflictKind::BothAdded,
849                    complexity,
850                    ours_content: Some(ours_rc),
851                    theirs_content: Some(theirs_rc),
852                    base_content: None,
853                }), ResolutionStrategy::ConflictBothAdded)
854            }
855        }
856
857        // Entity only in base (deleted by both)
858        (Some(_), None, None) => {
859            stats.entities_deleted += 1;
860            (ResolvedEntity::Deleted, ResolutionStrategy::Deleted)
861        }
862
863        // Should not happen
864        (None, None, None) => (ResolvedEntity::Deleted, ResolutionStrategy::Deleted),
865    }
866}
867
868fn entity_to_region_with_content(entity: &SemanticEntity, content: &str) -> EntityRegion {
869    EntityRegion {
870        entity_id: entity.id.clone(),
871        entity_name: entity.name.clone(),
872        entity_type: entity.entity_type.clone(),
873        content: content.to_string(),
874        start_line: entity.start_line,
875        end_line: entity.end_line,
876    }
877}
878
879/// Build a map from entity_id to region content (from file lines).
880/// This preserves surrounding syntax (like `export`) that sem-core's entity.content may strip.
881/// Returns borrowed references since regions live for the merge duration.
882fn build_region_content_map(regions: &[FileRegion]) -> HashMap<&str, &str> {
883    regions
884        .iter()
885        .filter_map(|r| match r {
886            FileRegion::Entity(e) => Some((e.entity_id.as_str(), e.content.as_str())),
887            _ => None,
888        })
889        .collect()
890}
891
892/// Check if the only differences between two strings are whitespace changes.
893/// This includes: indentation changes, trailing whitespace, blank line additions/removals.
894fn is_whitespace_only_diff(a: &str, b: &str) -> bool {
895    if a == b {
896        return true; // identical, not really a "whitespace-only diff" but safe
897    }
898    let a_normalized: Vec<&str> = a.lines().map(|l| l.trim()).filter(|l| !l.is_empty()).collect();
899    let b_normalized: Vec<&str> = b.lines().map(|l| l.trim()).filter(|l| !l.is_empty()).collect();
900    a_normalized == b_normalized
901}
902
903/// Check if a line is a decorator or annotation.
904/// Covers Python (@decorator), Java/TS (@Annotation), and comment-style annotations.
905fn is_decorator_line(line: &str) -> bool {
906    let trimmed = line.trim();
907    trimmed.starts_with('@')
908        && !trimmed.starts_with("@param")
909        && !trimmed.starts_with("@return")
910        && !trimmed.starts_with("@type")
911        && !trimmed.starts_with("@see")
912}
913
914/// Split content into (decorators, body) where decorators are leading @-prefixed lines.
915fn split_decorators(content: &str) -> (Vec<&str>, &str) {
916    let mut decorator_end = 0;
917    let mut byte_offset = 0;
918    for line in content.lines() {
919        if is_decorator_line(line) || line.trim().is_empty() {
920            decorator_end += 1;
921            byte_offset += line.len() + 1; // +1 for newline
922        } else {
923            break;
924        }
925    }
926    // Trim trailing empty lines from decorator section
927    let lines: Vec<&str> = content.lines().collect();
928    while decorator_end > 0 && lines.get(decorator_end - 1).map_or(false, |l| l.trim().is_empty()) {
929        byte_offset -= lines[decorator_end - 1].len() + 1;
930        decorator_end -= 1;
931    }
932    let decorators: Vec<&str> = lines[..decorator_end]
933        .iter()
934        .filter(|l| is_decorator_line(l))
935        .copied()
936        .collect();
937    let body = &content[byte_offset.min(content.len())..];
938    (decorators, body)
939}
940
941/// Try decorator-aware merge: when both sides add different decorators/annotations,
942/// merge them commutatively (like imports). Also try merging the bodies separately.
943///
944/// This handles the common pattern where one agent adds @cache and another adds @deprecated
945/// to the same function — they should both be preserved.
946fn try_decorator_aware_merge(base: &str, ours: &str, theirs: &str) -> Option<String> {
947    let (base_decorators, base_body) = split_decorators(base);
948    let (ours_decorators, ours_body) = split_decorators(ours);
949    let (theirs_decorators, theirs_body) = split_decorators(theirs);
950
951    // Only useful if at least one side has decorators
952    if ours_decorators.is_empty() && theirs_decorators.is_empty() {
953        return None;
954    }
955
956    // Merge bodies using diffy (or take unchanged side)
957    let merged_body = if base_body == ours_body && base_body == theirs_body {
958        base_body.to_string()
959    } else if base_body == ours_body {
960        theirs_body.to_string()
961    } else if base_body == theirs_body {
962        ours_body.to_string()
963    } else {
964        // Both changed body — try diffy on just the body
965        diffy_merge(base_body, ours_body, theirs_body)?
966    };
967
968    // Merge decorators commutatively (set union)
969    let base_set: HashSet<&str> = base_decorators.iter().copied().collect();
970    let ours_set: HashSet<&str> = ours_decorators.iter().copied().collect();
971    let theirs_set: HashSet<&str> = theirs_decorators.iter().copied().collect();
972
973    // Deletions
974    let ours_deleted: HashSet<&str> = base_set.difference(&ours_set).copied().collect();
975    let theirs_deleted: HashSet<&str> = base_set.difference(&theirs_set).copied().collect();
976
977    // Start with base decorators, remove deletions
978    let mut merged_decorators: Vec<&str> = base_decorators
979        .iter()
980        .filter(|d| !ours_deleted.contains(**d) && !theirs_deleted.contains(**d))
981        .copied()
982        .collect();
983
984    // Add new decorators from ours (not in base)
985    for d in &ours_decorators {
986        if !base_set.contains(d) && !merged_decorators.contains(d) {
987            merged_decorators.push(d);
988        }
989    }
990    // Add new decorators from theirs (not in base, not already added)
991    for d in &theirs_decorators {
992        if !base_set.contains(d) && !merged_decorators.contains(d) {
993            merged_decorators.push(d);
994        }
995    }
996
997    // Reconstruct
998    let mut result = String::new();
999    for d in &merged_decorators {
1000        result.push_str(d);
1001        result.push('\n');
1002    }
1003    result.push_str(&merged_body);
1004
1005    Some(result)
1006}
1007
1008/// Try 3-way merge on text using diffy. Returns None if there are conflicts.
1009fn diffy_merge(base: &str, ours: &str, theirs: &str) -> Option<String> {
1010    let result = diffy::merge(base, ours, theirs);
1011    match result {
1012        Ok(merged) => Some(merged),
1013        Err(_conflicted) => None,
1014    }
1015}
1016
1017/// Try 3-way merge using git merge-file. Returns None on conflict or error.
1018/// This uses a different diff algorithm than diffy and can sometimes merge
1019/// cases that diffy cannot (and vice versa).
1020fn git_merge_string(base: &str, ours: &str, theirs: &str) -> Option<String> {
1021    let dir = tempfile::tempdir().ok()?;
1022    let base_path = dir.path().join("base");
1023    let ours_path = dir.path().join("ours");
1024    let theirs_path = dir.path().join("theirs");
1025
1026    std::fs::write(&base_path, base).ok()?;
1027    std::fs::write(&ours_path, ours).ok()?;
1028    std::fs::write(&theirs_path, theirs).ok()?;
1029
1030    let output = Command::new("git")
1031        .arg("merge-file")
1032        .arg("-p")
1033        .arg(&ours_path)
1034        .arg(&base_path)
1035        .arg(&theirs_path)
1036        .output()
1037        .ok()?;
1038
1039    if output.status.success() {
1040        String::from_utf8(output.stdout).ok()
1041    } else {
1042        None
1043    }
1044}
1045
1046/// Merge interstitial regions from all three versions.
1047/// Uses commutative (set-based) merge for import blocks — inspired by
1048/// LastMerge/Mergiraf's "unordered children" concept.
1049/// Falls back to line-level 3-way merge for non-import content.
1050fn merge_interstitials(
1051    base_regions: &[FileRegion],
1052    ours_regions: &[FileRegion],
1053    theirs_regions: &[FileRegion],
1054    marker_format: &MarkerFormat,
1055) -> (HashMap<String, String>, Vec<EntityConflict>) {
1056    let base_map: HashMap<&str, &str> = base_regions
1057        .iter()
1058        .filter_map(|r| match r {
1059            FileRegion::Interstitial(i) => Some((i.position_key.as_str(), i.content.as_str())),
1060            _ => None,
1061        })
1062        .collect();
1063
1064    let ours_map: HashMap<&str, &str> = ours_regions
1065        .iter()
1066        .filter_map(|r| match r {
1067            FileRegion::Interstitial(i) => Some((i.position_key.as_str(), i.content.as_str())),
1068            _ => None,
1069        })
1070        .collect();
1071
1072    let theirs_map: HashMap<&str, &str> = theirs_regions
1073        .iter()
1074        .filter_map(|r| match r {
1075            FileRegion::Interstitial(i) => Some((i.position_key.as_str(), i.content.as_str())),
1076            _ => None,
1077        })
1078        .collect();
1079
1080    let mut all_keys: HashSet<&str> = HashSet::new();
1081    all_keys.extend(base_map.keys());
1082    all_keys.extend(ours_map.keys());
1083    all_keys.extend(theirs_map.keys());
1084
1085    let mut merged: HashMap<String, String> = HashMap::new();
1086    let mut interstitial_conflicts: Vec<EntityConflict> = Vec::new();
1087
1088    for key in all_keys {
1089        let base_content = base_map.get(key).copied().unwrap_or("");
1090        let ours_content = ours_map.get(key).copied().unwrap_or("");
1091        let theirs_content = theirs_map.get(key).copied().unwrap_or("");
1092
1093        // If all same, no merge needed
1094        if ours_content == theirs_content {
1095            merged.insert(key.to_string(), ours_content.to_string());
1096        } else if base_content == ours_content {
1097            merged.insert(key.to_string(), theirs_content.to_string());
1098        } else if base_content == theirs_content {
1099            merged.insert(key.to_string(), ours_content.to_string());
1100        } else {
1101            // Both changed — check if this is an import-heavy region
1102            if is_import_region(base_content)
1103                || is_import_region(ours_content)
1104                || is_import_region(theirs_content)
1105            {
1106                // Commutative merge: treat import lines as a set
1107                let result = merge_imports_commutatively(base_content, ours_content, theirs_content);
1108                merged.insert(key.to_string(), result);
1109            } else {
1110                // Regular line-level merge
1111                match diffy::merge(base_content, ours_content, theirs_content) {
1112                    Ok(m) => {
1113                        merged.insert(key.to_string(), m);
1114                    }
1115                    Err(_conflicted) => {
1116                        // Create a proper conflict instead of silently embedding
1117                        // raw conflict markers into the output.
1118                        let complexity = classify_conflict(
1119                            Some(base_content),
1120                            Some(ours_content),
1121                            Some(theirs_content),
1122                        );
1123                        let conflict = EntityConflict {
1124                            entity_name: key.to_string(),
1125                            entity_type: "interstitial".to_string(),
1126                            kind: ConflictKind::BothModified,
1127                            complexity,
1128                            ours_content: Some(ours_content.to_string()),
1129                            theirs_content: Some(theirs_content.to_string()),
1130                            base_content: Some(base_content.to_string()),
1131                        };
1132                        merged.insert(key.to_string(), conflict.to_conflict_markers(marker_format));
1133                        interstitial_conflicts.push(conflict);
1134                    }
1135                }
1136            }
1137        }
1138    }
1139
1140    (merged, interstitial_conflicts)
1141}
1142
1143/// Check if a region is predominantly import/use statements.
1144/// Handles both single-line imports and multi-line import blocks
1145/// (e.g. `import { type a, type b } from "..."` spread across lines).
1146fn is_import_region(content: &str) -> bool {
1147    let lines: Vec<&str> = content
1148        .lines()
1149        .filter(|l| !l.trim().is_empty())
1150        .collect();
1151    if lines.is_empty() {
1152        return false;
1153    }
1154    let mut import_count = 0;
1155    let mut in_multiline_import = false;
1156    for line in &lines {
1157        if in_multiline_import {
1158            import_count += 1;
1159            let trimmed = line.trim();
1160            if trimmed.starts_with('}') || trimmed.ends_with(')') {
1161                in_multiline_import = false;
1162            }
1163        } else if is_import_line(line) {
1164            import_count += 1;
1165            let trimmed = line.trim();
1166            // Detect start of multi-line import: `import {` or `import (` without closing on same line
1167            if (trimmed.contains('{') && !trimmed.contains('}'))
1168                || (trimmed.starts_with("import (") && !trimmed.contains(')'))
1169            {
1170                in_multiline_import = true;
1171            }
1172        }
1173    }
1174    // If >50% of non-empty lines are imports, treat as import region
1175    import_count * 2 > lines.len()
1176}
1177
1178/// Post-merge cleanup: remove consecutive duplicate lines and normalize blank lines.
1179///
1180/// Fixes two classes of merge artifacts:
1181/// 1. Duplicate lines/blocks that appear when both sides add the same content
1182///    (e.g. duplicate typedefs, forward declarations)
1183/// 2. Missing blank lines between entities or declarations, and excessive
1184///    blank lines (3+ consecutive) collapsed to 2
1185fn post_merge_cleanup(content: &str) -> String {
1186    let lines: Vec<&str> = content.lines().collect();
1187    let mut result: Vec<&str> = Vec::with_capacity(lines.len());
1188
1189    // Pass 1: Remove consecutive duplicate lines that look like declarations or imports.
1190    // Only dedup lines that are plausibly merge artifacts (imports, exports, forward decls).
1191    // Preserve intentional duplicates like repeated assertions, assignments, or data lines.
1192    for line in &lines {
1193        if line.trim().is_empty() {
1194            result.push(line);
1195            continue;
1196        }
1197        if let Some(prev) = result.last() {
1198            if !prev.trim().is_empty() && *prev == *line && looks_like_declaration(line) {
1199                continue; // skip consecutive exact duplicate of declaration-like line
1200            }
1201        }
1202        result.push(line);
1203    }
1204
1205    // Pass 2: Collapse 3+ consecutive blank lines to 2 (one separator blank line).
1206    let mut final_lines: Vec<&str> = Vec::with_capacity(result.len());
1207    let mut consecutive_blanks = 0;
1208    for line in &result {
1209        if line.trim().is_empty() {
1210            consecutive_blanks += 1;
1211            if consecutive_blanks <= 2 {
1212                final_lines.push(line);
1213            }
1214        } else {
1215            consecutive_blanks = 0;
1216            final_lines.push(line);
1217        }
1218    }
1219
1220    let mut out = final_lines.join("\n");
1221    if content.ends_with('\n') && !out.ends_with('\n') {
1222        out.push('\n');
1223    }
1224    out
1225}
1226
1227/// Check if a line looks like a declaration/import that merge might duplicate.
1228/// Returns false for lines that could be intentionally repeated (assertions,
1229/// assignments, data initializers, struct fields, etc.).
1230fn looks_like_declaration(line: &str) -> bool {
1231    let trimmed = line.trim();
1232    trimmed.starts_with("import ")
1233        || trimmed.starts_with("from ")
1234        || trimmed.starts_with("use ")
1235        || trimmed.starts_with("export ")
1236        || trimmed.starts_with("require(")
1237        || trimmed.starts_with("#include")
1238        || trimmed.starts_with("typedef ")
1239        || trimmed.starts_with("using ")
1240        || (trimmed.starts_with("pub ") && trimmed.contains("mod "))
1241}
1242
1243/// Check if a line is a top-level import/use/require statement.
1244///
1245/// Only matches unindented lines to avoid picking up conditional imports
1246/// inside `if TYPE_CHECKING:` blocks or similar constructs.
1247fn is_import_line(line: &str) -> bool {
1248    // Skip indented lines: these are inside conditional blocks (TYPE_CHECKING, etc.)
1249    if line.starts_with(' ') || line.starts_with('\t') {
1250        return false;
1251    }
1252    let trimmed = line.trim();
1253    trimmed.starts_with("import ")
1254        || trimmed.starts_with("from ")
1255        || trimmed.starts_with("use ")
1256        || trimmed.starts_with("require(")
1257        || trimmed.starts_with("const ") && trimmed.contains("require(")
1258        || trimmed.starts_with("package ")
1259        || trimmed.starts_with("#include ")
1260        || trimmed.starts_with("using ")
1261}
1262
1263/// A complete import statement (possibly multi-line) as a single unit.
1264#[derive(Debug, Clone)]
1265struct ImportStatement {
1266    /// The full text of the import (may span multiple lines)
1267    lines: Vec<String>,
1268    /// The source module (e.g. "./foo", "react", "std::io")
1269    source: String,
1270    /// For multi-line imports: the individual specifiers (e.g. ["type a", "type b"])
1271    specifiers: Vec<String>,
1272    /// Whether this is a multi-line import block
1273    is_multiline: bool,
1274}
1275
1276/// Parse content into import statements, handling multi-line imports as single units.
1277fn parse_import_statements(content: &str) -> (Vec<ImportStatement>, Vec<String>) {
1278    let mut imports: Vec<ImportStatement> = Vec::new();
1279    let mut non_import_lines: Vec<String> = Vec::new();
1280    let lines: Vec<&str> = content.lines().collect();
1281    let mut i = 0;
1282
1283    while i < lines.len() {
1284        let line = lines[i];
1285
1286        if line.trim().is_empty() {
1287            non_import_lines.push(line.to_string());
1288            i += 1;
1289            continue;
1290        }
1291
1292        if is_import_line(line) {
1293            let trimmed = line.trim();
1294            // Check for multi-line import: `import {` without `}` on same line
1295            let starts_multiline = (trimmed.contains('{') && !trimmed.contains('}'))
1296                || (trimmed.starts_with("import (") && !trimmed.contains(')'));
1297
1298            if starts_multiline {
1299                let mut block_lines = vec![line.to_string()];
1300                let mut specifiers = Vec::new();
1301                let close_char = if trimmed.contains('{') { '}' } else { ')' };
1302                i += 1;
1303
1304                // Collect lines until closing brace/paren
1305                while i < lines.len() {
1306                    let inner = lines[i];
1307                    block_lines.push(inner.to_string());
1308                    let inner_trimmed = inner.trim();
1309
1310                    if inner_trimmed.starts_with(close_char) {
1311                        // This is the closing line (e.g. `} from "./foo"`)
1312                        break;
1313                    } else if !inner_trimmed.is_empty() {
1314                        // This is a specifier line — strip trailing comma
1315                        let spec = inner_trimmed.trim_end_matches(',').trim().to_string();
1316                        if !spec.is_empty() {
1317                            specifiers.push(spec);
1318                        }
1319                    }
1320                    i += 1;
1321                }
1322
1323                let full_text = block_lines.join("\n");
1324                let source = import_source_prefix(&full_text).to_string();
1325                imports.push(ImportStatement {
1326                    lines: block_lines,
1327                    source,
1328                    specifiers,
1329                    is_multiline: true,
1330                });
1331            } else {
1332                // Single-line import
1333                let source = import_source_prefix(line).to_string();
1334                imports.push(ImportStatement {
1335                    lines: vec![line.to_string()],
1336                    source,
1337                    specifiers: Vec::new(),
1338                    is_multiline: false,
1339                });
1340            }
1341        } else {
1342            non_import_lines.push(line.to_string());
1343        }
1344        i += 1;
1345    }
1346
1347    (imports, non_import_lines)
1348}
1349
1350/// Merge import blocks commutatively (as unordered sets), preserving grouping.
1351///
1352/// Handles both single-line imports and multi-line import blocks.
1353/// For multi-line imports from the same source, merges specifiers as a set.
1354/// Single-line imports are merged as before: set union with deletions.
1355fn merge_imports_commutatively(base: &str, ours: &str, theirs: &str) -> String {
1356    let (base_imports, _) = parse_import_statements(base);
1357    let (ours_imports, _) = parse_import_statements(ours);
1358    let (theirs_imports, _) = parse_import_statements(theirs);
1359
1360    let has_multiline = base_imports.iter().any(|i| i.is_multiline)
1361        || ours_imports.iter().any(|i| i.is_multiline)
1362        || theirs_imports.iter().any(|i| i.is_multiline);
1363
1364    if has_multiline {
1365        return merge_imports_with_multiline(base, ours, theirs,
1366            &base_imports, &ours_imports, &theirs_imports);
1367    }
1368
1369    // Original single-line-only logic
1370    let base_lines: HashSet<&str> = base.lines().filter(|l| is_import_line(l)).collect();
1371    let ours_lines: HashSet<&str> = ours.lines().filter(|l| is_import_line(l)).collect();
1372
1373    let theirs_deleted: HashSet<&str> = base_lines.difference(
1374        &theirs.lines().filter(|l| is_import_line(l)).collect::<HashSet<&str>>()
1375    ).copied().collect();
1376
1377    let theirs_added: Vec<&str> = theirs
1378        .lines()
1379        .filter(|l| is_import_line(l) && !base_lines.contains(l) && !ours_lines.contains(l))
1380        .collect();
1381
1382    let mut groups: Vec<Vec<&str>> = Vec::new();
1383    let mut current_group: Vec<&str> = Vec::new();
1384
1385    for line in ours.lines() {
1386        if line.trim().is_empty() {
1387            if !current_group.is_empty() {
1388                groups.push(current_group);
1389                current_group = Vec::new();
1390            }
1391        } else if is_import_line(line) {
1392            if theirs_deleted.contains(line) {
1393                continue;
1394            }
1395            current_group.push(line);
1396        } else {
1397            current_group.push(line);
1398        }
1399    }
1400    if !current_group.is_empty() {
1401        groups.push(current_group);
1402    }
1403
1404    for add in &theirs_added {
1405        let prefix = import_source_prefix(add);
1406        let mut best_group = if groups.is_empty() { 0 } else { groups.len() - 1 };
1407        for (i, group) in groups.iter().enumerate() {
1408            if group.iter().any(|l| {
1409                is_import_line(l) && import_source_prefix(l) == prefix
1410            }) {
1411                best_group = i;
1412                break;
1413            }
1414        }
1415        if best_group < groups.len() {
1416            groups[best_group].push(add);
1417        } else {
1418            groups.push(vec![add]);
1419        }
1420    }
1421
1422    // Sort import lines within each group alphabetically so new imports
1423    // land in the conventional position rather than appended at the end.
1424    for group in &mut groups {
1425        // Only sort lines that are imports; keep non-import lines (comments) in place.
1426        let import_indices: Vec<usize> = group.iter().enumerate()
1427            .filter(|(_, l)| is_import_line(l))
1428            .map(|(i, _)| i)
1429            .collect();
1430        let mut import_lines: Vec<&str> = import_indices.iter().map(|&i| group[i]).collect();
1431        import_lines.sort_unstable();
1432        for (j, &idx) in import_indices.iter().enumerate() {
1433            group[idx] = import_lines[j];
1434        }
1435    }
1436
1437    let mut result_lines: Vec<&str> = Vec::new();
1438    for (i, group) in groups.iter().enumerate() {
1439        if i > 0 {
1440            result_lines.push("");
1441        }
1442        result_lines.extend(group);
1443    }
1444
1445    let mut result = result_lines.join("\n");
1446    let ours_trailing = ours.len() - ours.trim_end_matches('\n').len();
1447    let result_trailing = result.len() - result.trim_end_matches('\n').len();
1448    for _ in result_trailing..ours_trailing {
1449        result.push('\n');
1450    }
1451    result
1452}
1453
1454/// Merge imports when multi-line import blocks are involved.
1455/// Matches imports by source module, merges specifiers as a set.
1456fn merge_imports_with_multiline(
1457    _base_raw: &str,
1458    ours_raw: &str,
1459    _theirs_raw: &str,
1460    base_imports: &[ImportStatement],
1461    ours_imports: &[ImportStatement],
1462    theirs_imports: &[ImportStatement],
1463) -> String {
1464    // Build source → specifier sets for base and theirs
1465    let base_specs: HashMap<&str, HashSet<&str>> = base_imports.iter().map(|imp| {
1466        let specs: HashSet<&str> = imp.specifiers.iter().map(|s| s.as_str()).collect();
1467        (imp.source.as_str(), specs)
1468    }).collect();
1469
1470    let theirs_specs: HashMap<&str, HashSet<&str>> = theirs_imports.iter().map(|imp| {
1471        let specs: HashSet<&str> = imp.specifiers.iter().map(|s| s.as_str()).collect();
1472        (imp.source.as_str(), specs)
1473    }).collect();
1474
1475    // Single-line import tracking: base lines and theirs-deleted
1476    let base_single: HashSet<String> = base_imports.iter()
1477        .filter(|i| !i.is_multiline)
1478        .map(|i| i.lines[0].clone())
1479        .collect();
1480    let theirs_single: HashSet<String> = theirs_imports.iter()
1481        .filter(|i| !i.is_multiline)
1482        .map(|i| i.lines[0].clone())
1483        .collect();
1484    let theirs_deleted_single: HashSet<&str> = base_single.iter()
1485        .filter(|l| !theirs_single.contains(l.as_str()))
1486        .map(|l| l.as_str())
1487        .collect();
1488
1489    // Process ours imports, merging in theirs specifiers
1490    let mut result_parts: Vec<String> = Vec::new();
1491    let mut handled_theirs_sources: HashSet<&str> = HashSet::new();
1492
1493    // Walk through ours_raw to preserve formatting (blank lines, comments)
1494    let lines: Vec<&str> = ours_raw.lines().collect();
1495    let mut i = 0;
1496    let mut ours_imp_idx = 0;
1497
1498    while i < lines.len() {
1499        let line = lines[i];
1500
1501        if line.trim().is_empty() {
1502            result_parts.push(line.to_string());
1503            i += 1;
1504            continue;
1505        }
1506
1507        if is_import_line(line) {
1508            let trimmed = line.trim();
1509            let starts_multiline = (trimmed.contains('{') && !trimmed.contains('}'))
1510                || (trimmed.starts_with("import (") && !trimmed.contains(')'));
1511
1512            if starts_multiline && ours_imp_idx < ours_imports.len() {
1513                let imp = &ours_imports[ours_imp_idx];
1514                // Find the matching import by source
1515                let source = imp.source.as_str();
1516                handled_theirs_sources.insert(source);
1517
1518                // Merge specifiers: ours + theirs additions - theirs deletions
1519                let base_spec_set = base_specs.get(source).cloned().unwrap_or_default();
1520                let theirs_spec_set = theirs_specs.get(source).cloned().unwrap_or_default();
1521                // Added by theirs: in theirs but not in base
1522                let theirs_added: HashSet<&str> = theirs_spec_set.difference(&base_spec_set).copied().collect();
1523                // Deleted by theirs: in base but not in theirs
1524                let theirs_removed: HashSet<&str> = base_spec_set.difference(&theirs_spec_set).copied().collect();
1525
1526                // Final set: ours (in original order) + theirs_added - theirs_removed
1527                let mut final_specs: Vec<&str> = imp.specifiers.iter()
1528                    .map(|s| s.as_str())
1529                    .filter(|s| !theirs_removed.contains(s))
1530                    .collect();
1531                for added in &theirs_added {
1532                    if !final_specs.contains(added) {
1533                        final_specs.push(added);
1534                    }
1535                }
1536
1537                // Detect indentation from the original block
1538                let indent = if imp.lines.len() > 1 {
1539                    let second = &imp.lines[1];
1540                    &second[..second.len() - second.trim_start().len()]
1541                } else {
1542                    "     "
1543                };
1544
1545                // Reconstruct multi-line import
1546                result_parts.push(imp.lines[0].clone()); // `import {`
1547                for spec in &final_specs {
1548                    result_parts.push(format!("{}{},", indent, spec));
1549                }
1550                // Closing line from ours
1551                if let Some(last) = imp.lines.last() {
1552                    result_parts.push(last.clone());
1553                }
1554
1555                // Skip past the original multi-line block in ours_raw
1556                let close_char = if trimmed.contains('{') { '}' } else { ')' };
1557                i += 1;
1558                while i < lines.len() {
1559                    if lines[i].trim().starts_with(close_char) {
1560                        i += 1;
1561                        break;
1562                    }
1563                    i += 1;
1564                }
1565                ours_imp_idx += 1;
1566                continue;
1567            } else {
1568                // Single-line import
1569                if ours_imp_idx < ours_imports.len() {
1570                    let imp = &ours_imports[ours_imp_idx];
1571                    handled_theirs_sources.insert(imp.source.as_str());
1572                    ours_imp_idx += 1;
1573                }
1574                // Check if theirs deleted this single-line import
1575                if !theirs_deleted_single.contains(line) {
1576                    result_parts.push(line.to_string());
1577                }
1578            }
1579        } else {
1580            result_parts.push(line.to_string());
1581        }
1582        i += 1;
1583    }
1584
1585    // Add any new imports from theirs that have new sources
1586    for imp in theirs_imports {
1587        if handled_theirs_sources.contains(imp.source.as_str()) {
1588            continue;
1589        }
1590        // Check if this source exists in base (if so, it was handled above)
1591        if base_specs.contains_key(imp.source.as_str()) {
1592            continue;
1593        }
1594        // Truly new import from theirs
1595        for line in &imp.lines {
1596            result_parts.push(line.clone());
1597        }
1598    }
1599
1600    let mut result = result_parts.join("\n");
1601    let ours_trailing = ours_raw.len() - ours_raw.trim_end_matches('\n').len();
1602    let result_trailing = result.len() - result.trim_end_matches('\n').len();
1603    for _ in result_trailing..ours_trailing {
1604        result.push('\n');
1605    }
1606    result
1607}
1608
1609/// Extract the source/module prefix from an import line for group matching.
1610/// e.g. "from collections import OrderedDict" -> "collections"
1611///      "import React from 'react'" -> "react"
1612///      "use std::collections::HashMap;" -> "std::collections"
1613fn import_source_prefix(line: &str) -> &str {
1614    // For multi-line imports, search all lines for the source module
1615    // (e.g. `} from "./foo"` on the closing line)
1616    for l in line.lines() {
1617        let trimmed = l.trim();
1618        // Python: "from X import Y" -> X
1619        if let Some(rest) = trimmed.strip_prefix("from ") {
1620            return rest.split_whitespace().next().unwrap_or("");
1621        }
1622        // JS/TS closing line: `} from 'Y'` or `} from "Y"`
1623        if trimmed.starts_with('}') && trimmed.contains("from ") {
1624            if let Some(quote_start) = trimmed.find(|c: char| c == '\'' || c == '"') {
1625                let after = &trimmed[quote_start + 1..];
1626                if let Some(quote_end) = after.find(|c: char| c == '\'' || c == '"') {
1627                    return &after[..quote_end];
1628                }
1629            }
1630        }
1631        // JS/TS: "import X from 'Y'" -> Y (between quotes)
1632        if trimmed.starts_with("import ") {
1633            if let Some(quote_start) = trimmed.find(|c: char| c == '\'' || c == '"') {
1634                let after = &trimmed[quote_start + 1..];
1635                if let Some(quote_end) = after.find(|c: char| c == '\'' || c == '"') {
1636                    return &after[..quote_end];
1637                }
1638            }
1639        }
1640        // Rust: "use X::Y;" -> X
1641        if let Some(rest) = trimmed.strip_prefix("use ") {
1642            return rest.split("::").next().unwrap_or("").trim_end_matches(';');
1643        }
1644    }
1645    line.trim()
1646}
1647
1648/// Fallback to line-level 3-way merge when entity extraction isn't possible.
1649///
1650/// Uses Sesame-inspired separator preprocessing (arXiv:2407.18888) to get
1651/// finer-grained alignment before line-level merge. Inserts newlines around
1652/// syntactic separators ({, }, ;) so that changes in different code blocks
1653/// align independently, reducing spurious conflicts.
1654///
1655/// Sesame expansion is skipped for data formats (JSON, YAML, TOML, lock files)
1656/// where `{`, `}`, `;` are structural content rather than code separators.
1657/// Expanding them destroys alignment and produces far more conflicts (confirmed
1658/// on GitButler: YAML went from 68 git markers to 192 weave markers with Sesame).
1659fn line_level_fallback(base: &str, ours: &str, theirs: &str, file_path: &str) -> MergeResult {
1660    let mut stats = MergeStats::default();
1661    stats.used_fallback = true;
1662
1663    // Skip Sesame preprocessing for data formats where {/}/; are content, not separators
1664    let skip = skip_sesame(file_path);
1665
1666    if skip {
1667        // Use git merge-file for data formats so we match git's output exactly.
1668        // diffy::merge uses a different diff algorithm that can produce more
1669        // conflict markers on structured data like lock files.
1670        return git_merge_file(base, ours, theirs, &mut stats);
1671    }
1672
1673    // Try Sesame expansion + diffy first, then compare against git merge-file.
1674    // Use whichever produces fewer conflict markers so we're never worse than git.
1675    let base_expanded = expand_separators(base);
1676    let ours_expanded = expand_separators(ours);
1677    let theirs_expanded = expand_separators(theirs);
1678
1679    let sesame_result = match diffy::merge(&base_expanded, &ours_expanded, &theirs_expanded) {
1680        Ok(merged) => {
1681            let content = collapse_separators(&merged, base);
1682            Some(MergeResult {
1683                content: post_merge_cleanup(&content),
1684                conflicts: vec![],
1685                warnings: vec![],
1686                stats: stats.clone(),
1687                audit: vec![],
1688            })
1689        }
1690        Err(_) => {
1691            // Sesame expansion conflicted, try plain diffy
1692            match diffy::merge(base, ours, theirs) {
1693                Ok(merged) => Some(MergeResult {
1694                    content: merged,
1695                    conflicts: vec![],
1696                    warnings: vec![],
1697                    stats: stats.clone(),
1698                    audit: vec![],
1699                }),
1700                Err(conflicted) => {
1701                    let _markers = conflicted.lines().filter(|l| l.starts_with("<<<<<<<")).count();
1702                    let mut s = stats.clone();
1703                    s.entities_conflicted = 1;
1704                    Some(MergeResult {
1705                        content: conflicted,
1706                        conflicts: vec![EntityConflict {
1707                            entity_name: "(file)".to_string(),
1708                            entity_type: "file".to_string(),
1709                            kind: ConflictKind::BothModified,
1710                            complexity: classify_conflict(Some(base), Some(ours), Some(theirs)),
1711                            ours_content: Some(ours.to_string()),
1712                            theirs_content: Some(theirs.to_string()),
1713                            base_content: Some(base.to_string()),
1714                        }],
1715                        warnings: vec![],
1716                        stats: s,
1717                        audit: vec![],
1718                    })
1719                }
1720            }
1721        }
1722    };
1723
1724    // Get git merge-file result as our floor
1725    let git_result = git_merge_file(base, ours, theirs, &mut stats);
1726
1727    // Compare: use sesame result only if it has fewer or equal markers
1728    match sesame_result {
1729        Some(sesame) if sesame.conflicts.is_empty() && !git_result.conflicts.is_empty() => {
1730            // Sesame resolved cleanly, git didn't: use sesame
1731            sesame
1732        }
1733        Some(sesame) if !sesame.conflicts.is_empty() && !git_result.conflicts.is_empty() => {
1734            // Both conflicted: use whichever has fewer markers
1735            let sesame_markers = sesame.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
1736            let git_markers = git_result.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
1737            if sesame_markers <= git_markers { sesame } else { git_result }
1738        }
1739        _ => git_result,
1740    }
1741}
1742
1743/// Shell out to `git merge-file` for an exact match with git's line-level merge.
1744///
1745/// We use this instead of `diffy::merge` for data formats (lock files, JSON, YAML, TOML)
1746/// where weave can't improve on git. `diffy` uses a different diff algorithm that can
1747/// produce more conflict markers on structured data (e.g. 22 markers vs git's 19 on uv.lock).
1748fn git_merge_file(base: &str, ours: &str, theirs: &str, stats: &mut MergeStats) -> MergeResult {
1749    let dir = match tempfile::tempdir() {
1750        Ok(d) => d,
1751        Err(_) => return diffy_fallback(base, ours, theirs, stats),
1752    };
1753
1754    let base_path = dir.path().join("base");
1755    let ours_path = dir.path().join("ours");
1756    let theirs_path = dir.path().join("theirs");
1757
1758    let write_ok = (|| -> std::io::Result<()> {
1759        std::fs::File::create(&base_path)?.write_all(base.as_bytes())?;
1760        std::fs::File::create(&ours_path)?.write_all(ours.as_bytes())?;
1761        std::fs::File::create(&theirs_path)?.write_all(theirs.as_bytes())?;
1762        Ok(())
1763    })();
1764
1765    if write_ok.is_err() {
1766        return diffy_fallback(base, ours, theirs, stats);
1767    }
1768
1769    // git merge-file writes result to the first file (ours) in place
1770    let output = Command::new("git")
1771        .arg("merge-file")
1772        .arg("-p") // print to stdout instead of modifying ours in place
1773        .arg("--diff3") // include ||||||| base section for jj compatibility
1774        .arg("-L").arg("ours")
1775        .arg("-L").arg("base")
1776        .arg("-L").arg("theirs")
1777        .arg(&ours_path)
1778        .arg(&base_path)
1779        .arg(&theirs_path)
1780        .output();
1781
1782    match output {
1783        Ok(result) => {
1784            let content = String::from_utf8_lossy(&result.stdout).into_owned();
1785            if result.status.success() {
1786                // Exit 0 = clean merge
1787                MergeResult {
1788                    content: post_merge_cleanup(&content),
1789                    conflicts: vec![],
1790                    warnings: vec![],
1791                    stats: stats.clone(),
1792                    audit: vec![],
1793                }
1794            } else {
1795                // Exit >0 = conflicts (exit code = number of conflicts)
1796                stats.entities_conflicted = 1;
1797                MergeResult {
1798                    content,
1799                    conflicts: vec![EntityConflict {
1800                        entity_name: "(file)".to_string(),
1801                        entity_type: "file".to_string(),
1802                        kind: ConflictKind::BothModified,
1803                        complexity: classify_conflict(Some(base), Some(ours), Some(theirs)),
1804                        ours_content: Some(ours.to_string()),
1805                        theirs_content: Some(theirs.to_string()),
1806                        base_content: Some(base.to_string()),
1807                    }],
1808                    warnings: vec![],
1809                    stats: stats.clone(),
1810                    audit: vec![],
1811                }
1812            }
1813        }
1814        // git not available, fall back to diffy
1815        Err(_) => diffy_fallback(base, ours, theirs, stats),
1816    }
1817}
1818
1819/// Fallback to diffy::merge when git merge-file is unavailable.
1820fn diffy_fallback(base: &str, ours: &str, theirs: &str, stats: &mut MergeStats) -> MergeResult {
1821    match diffy::merge(base, ours, theirs) {
1822        Ok(merged) => {
1823            let content = post_merge_cleanup(&merged);
1824            MergeResult {
1825                content,
1826                conflicts: vec![],
1827                warnings: vec![],
1828                stats: stats.clone(),
1829                audit: vec![],
1830            }
1831        }
1832        Err(conflicted) => {
1833            stats.entities_conflicted = 1;
1834            MergeResult {
1835                content: conflicted,
1836                conflicts: vec![EntityConflict {
1837                    entity_name: "(file)".to_string(),
1838                    entity_type: "file".to_string(),
1839                    kind: ConflictKind::BothModified,
1840                    complexity: classify_conflict(Some(base), Some(ours), Some(theirs)),
1841                    ours_content: Some(ours.to_string()),
1842                    theirs_content: Some(theirs.to_string()),
1843                    base_content: Some(base.to_string()),
1844                }],
1845                warnings: vec![],
1846                stats: stats.clone(),
1847                audit: vec![],
1848            }
1849        }
1850    }
1851}
1852
1853/// Filter out entities that are nested inside other entities.
1854///
1855/// When a class contains methods which contain local variables, sem-core may extract
1856/// all of them as entities. But for merge purposes, nested entities are part of their
1857/// parent — we handle them via inner entity merge. Keeping them causes false conflicts
1858/// (e.g. two methods both declaring `const user` would appear as BothAdded).
1859/// Check if entity list has too many duplicate names, which causes matching to hang.
1860fn has_excessive_duplicates(entities: &[SemanticEntity]) -> bool {
1861    let threshold = std::env::var("WEAVE_MAX_DUPLICATES")
1862        .ok()
1863        .and_then(|v| v.parse::<usize>().ok())
1864        .unwrap_or(10);
1865    let mut counts: HashMap<&str, usize> = HashMap::new();
1866    for e in entities {
1867        *counts.entry(&e.name).or_default() += 1;
1868    }
1869    counts.values().any(|&c| c >= threshold)
1870}
1871
1872/// Filter out entities that are nested inside other entities.
1873/// O(n log n) via sort + stack, replacing the previous O(n^2) approach.
1874fn filter_nested_entities(mut entities: Vec<SemanticEntity>) -> Vec<SemanticEntity> {
1875    if entities.len() <= 1 {
1876        return entities;
1877    }
1878
1879    // Sort by start_line ASC, then by end_line DESC (widest span first).
1880    // A parent entity always appears before its children in this order.
1881    entities.sort_by(|a, b| {
1882        a.start_line.cmp(&b.start_line).then(b.end_line.cmp(&a.end_line))
1883    });
1884
1885    // Stack-based filter: track the end_line of the current outermost entity.
1886    let mut result: Vec<SemanticEntity> = Vec::with_capacity(entities.len());
1887    let mut max_end: usize = 0;
1888
1889    for entity in entities {
1890        if entity.start_line > max_end || max_end == 0 {
1891            // Not nested: new top-level entity
1892            max_end = entity.end_line;
1893            result.push(entity);
1894        } else if entity.start_line == result.last().map_or(0, |e| e.start_line)
1895            && entity.end_line == result.last().map_or(0, |e| e.end_line)
1896        {
1897            // Exact same span (e.g. decorated_definition wrapping function_definition)
1898            result.push(entity);
1899        }
1900        // else: strictly nested, skip
1901    }
1902
1903    result
1904}
1905
1906/// Get child entities of a parent, sorted by start line.
1907fn get_child_entities<'a>(
1908    parent: &SemanticEntity,
1909    all_entities: &'a [SemanticEntity],
1910) -> Vec<&'a SemanticEntity> {
1911    let mut children: Vec<&SemanticEntity> = all_entities
1912        .iter()
1913        .filter(|e| e.parent_id.as_deref() == Some(&parent.id))
1914        .collect();
1915    children.sort_by_key(|e| e.start_line);
1916    children
1917}
1918
1919/// Compute a body hash for rename detection: the entity content with the entity
1920/// name replaced at word boundaries by a placeholder, so entities with identical
1921/// bodies but different names produce the same hash.
1922///
1923/// Uses word-boundary matching to avoid partial replacements (e.g. replacing
1924/// "get" inside "getAll"). Works across all languages since it operates on
1925/// the content string, not language-specific AST features.
1926fn body_hash(entity: &SemanticEntity) -> u64 {
1927    use std::collections::hash_map::DefaultHasher;
1928    use std::hash::{Hash, Hasher};
1929    let normalized = replace_at_word_boundaries(&entity.content, &entity.name, "__ENTITY__");
1930    let mut hasher = DefaultHasher::new();
1931    normalized.hash(&mut hasher);
1932    hasher.finish()
1933}
1934
1935/// Replace `needle` with `replacement` only at word boundaries.
1936/// A word boundary means the character before/after the match is not
1937/// alphanumeric or underscore (i.e. not an identifier character).
1938fn replace_at_word_boundaries(content: &str, needle: &str, replacement: &str) -> String {
1939    if needle.is_empty() {
1940        return content.to_string();
1941    }
1942    let bytes = content.as_bytes();
1943    let mut result = String::with_capacity(content.len());
1944    let mut i = 0;
1945    while i < content.len() {
1946        if content.is_char_boundary(i) && content[i..].starts_with(needle) {
1947            let before_ok = i == 0 || {
1948                let prev_idx = content[..i]
1949                    .char_indices()
1950                    .next_back()
1951                    .map(|(idx, _)| idx)
1952                    .unwrap_or(0);
1953                !is_ident_char(bytes[prev_idx])
1954            };
1955            let after_idx = i + needle.len();
1956            let after_ok = after_idx >= content.len()
1957                || (content.is_char_boundary(after_idx)
1958                    && !is_ident_char(bytes[after_idx]));
1959            if before_ok && after_ok {
1960                result.push_str(replacement);
1961                i += needle.len();
1962                continue;
1963            }
1964        }
1965        if content.is_char_boundary(i) {
1966            let ch = content[i..].chars().next().unwrap();
1967            result.push(ch);
1968            i += ch.len_utf8();
1969        } else {
1970            i += 1;
1971        }
1972    }
1973    result
1974}
1975
1976fn is_ident_char(b: u8) -> bool {
1977    b.is_ascii_alphanumeric() || b == b'_'
1978}
1979
1980/// Build a rename map from new_id → base_id using confidence-scored matching.
1981///
1982/// Detects when an entity in the branch has the same body as an entity
1983/// in base but a different name/ID, indicating it was renamed.
1984/// Uses body_hash (name-stripped content hash) and structural_hash with
1985/// confidence scoring to resolve ambiguous matches correctly.
1986fn build_rename_map(
1987    base_entities: &[SemanticEntity],
1988    branch_entities: &[SemanticEntity],
1989) -> HashMap<String, String> {
1990    let mut rename_map: HashMap<String, String> = HashMap::new();
1991
1992    let base_ids: HashSet<&str> = base_entities.iter().map(|e| e.id.as_str()).collect();
1993
1994    // Build body_hash → base entities (multiple can have same hash)
1995    let mut base_by_body: HashMap<u64, Vec<&SemanticEntity>> = HashMap::new();
1996    for entity in base_entities {
1997        base_by_body.entry(body_hash(entity)).or_default().push(entity);
1998    }
1999
2000    // Also keep structural_hash index as fallback
2001    let mut base_by_structural: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
2002    for entity in base_entities {
2003        if let Some(ref sh) = entity.structural_hash {
2004            base_by_structural.entry(sh.as_str()).or_default().push(entity);
2005        }
2006    }
2007
2008    // Collect all candidate (branch_entity, base_entity, confidence) triples
2009    struct RenameCandidate<'a> {
2010        branch: &'a SemanticEntity,
2011        base: &'a SemanticEntity,
2012        confidence: f64,
2013    }
2014    let mut candidates: Vec<RenameCandidate> = Vec::new();
2015
2016    for branch_entity in branch_entities {
2017        if base_ids.contains(branch_entity.id.as_str()) {
2018            continue;
2019        }
2020
2021        let bh = body_hash(branch_entity);
2022
2023        // Body hash matches
2024        if let Some(base_entities_for_hash) = base_by_body.get(&bh) {
2025            for &base_entity in base_entities_for_hash {
2026                let same_type = base_entity.entity_type == branch_entity.entity_type;
2027                let same_parent = base_entity.parent_id == branch_entity.parent_id;
2028                let confidence = match (same_type, same_parent) {
2029                    (true, true) => 0.95,
2030                    (true, false) => 0.8,
2031                    (false, _) => 0.6,
2032                };
2033                candidates.push(RenameCandidate { branch: branch_entity, base: base_entity, confidence });
2034            }
2035        }
2036
2037        // Structural hash fallback (lower confidence)
2038        if let Some(ref sh) = branch_entity.structural_hash {
2039            if let Some(base_entities_for_sh) = base_by_structural.get(sh.as_str()) {
2040                for &base_entity in base_entities_for_sh {
2041                    // Skip if already covered by body hash match
2042                    if candidates.iter().any(|c| c.branch.id == branch_entity.id && c.base.id == base_entity.id) {
2043                        continue;
2044                    }
2045                    candidates.push(RenameCandidate { branch: branch_entity, base: base_entity, confidence: 0.6 });
2046                }
2047            }
2048        }
2049    }
2050
2051    // Sort by confidence descending, assign greedily
2052    candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
2053
2054    let mut used_base_ids: HashSet<String> = HashSet::new();
2055    let mut used_branch_ids: HashSet<String> = HashSet::new();
2056
2057    for candidate in &candidates {
2058        if candidate.confidence < 0.6 {
2059            break;
2060        }
2061        if used_base_ids.contains(&candidate.base.id) || used_branch_ids.contains(&candidate.branch.id) {
2062            continue;
2063        }
2064        // Don't rename if the base entity's ID still exists in branch (it wasn't actually renamed)
2065        let base_id_in_branch = branch_entities.iter().any(|e| e.id == candidate.base.id);
2066        if base_id_in_branch {
2067            continue;
2068        }
2069        rename_map.insert(candidate.branch.id.clone(), candidate.base.id.clone());
2070        used_base_ids.insert(candidate.base.id.clone());
2071        used_branch_ids.insert(candidate.branch.id.clone());
2072    }
2073
2074    rename_map
2075}
2076
2077/// Check if an entity type is a container that may benefit from inner entity merge.
2078fn is_container_entity_type(entity_type: &str) -> bool {
2079    matches!(
2080        entity_type,
2081        "class" | "interface" | "enum" | "impl" | "trait" | "module" | "impl_item" | "trait_item"
2082            | "struct" | "union" | "namespace" | "struct_item" | "struct_specifier"
2083            | "variable" | "export"
2084    )
2085}
2086
2087/// A named member chunk extracted from a class/container body.
2088#[derive(Debug, Clone)]
2089struct MemberChunk {
2090    /// The member name (method name, field name, etc.)
2091    name: String,
2092    /// Full content of the member including its body
2093    content: String,
2094}
2095
2096/// Result of an inner entity merge attempt.
2097struct InnerMergeResult {
2098    /// Merged content (may contain per-member conflict markers)
2099    content: String,
2100    /// Whether any members had conflicts
2101    has_conflicts: bool,
2102}
2103
2104/// Convert sem-core child entities to MemberChunks for inner merge.
2105///
2106/// Uses child entity line positions to extract content from the container text,
2107/// including any leading decorators/annotations that tree-sitter attaches as
2108/// sibling nodes rather than part of the method node.
2109fn children_to_chunks(
2110    children: &[&SemanticEntity],
2111    container_content: &str,
2112    container_start_line: usize,
2113) -> Vec<MemberChunk> {
2114    if children.is_empty() {
2115        return Vec::new();
2116    }
2117
2118    let lines: Vec<&str> = container_content.lines().collect();
2119    let mut chunks = Vec::new();
2120
2121    for (i, child) in children.iter().enumerate() {
2122        let child_start_idx = child.start_line.saturating_sub(container_start_line);
2123        // +1 because end_line is inclusive but we need an exclusive upper bound for slicing
2124        let child_end_idx = child.end_line.saturating_sub(container_start_line) + 1;
2125
2126        if child_end_idx > lines.len() + 1 || child_start_idx >= lines.len() {
2127            // Position out of range, fall back to entity content
2128            chunks.push(MemberChunk {
2129                name: child.name.clone(),
2130                content: child.content.clone(),
2131            });
2132            continue;
2133        }
2134        let child_end_idx = child_end_idx.min(lines.len());
2135
2136        // Determine the earliest line we can claim (after previous child's end, or body start)
2137        let floor = if i > 0 {
2138            children[i - 1].end_line.saturating_sub(container_start_line) + 1
2139        } else {
2140            // First child: start after the container header line (the `{` or `:` line)
2141            // Find the line containing `{` or ending with `:`
2142            let header_end = lines
2143                .iter()
2144                .position(|l| l.contains('{') || l.trim().ends_with(':'))
2145                .map(|p| p + 1)
2146                .unwrap_or(0);
2147            header_end
2148        };
2149
2150        // Scan backwards from child_start_idx to include decorators/annotations/comments
2151        let mut content_start = child_start_idx;
2152        while content_start > floor {
2153            let prev = content_start - 1;
2154            let trimmed = lines[prev].trim();
2155            if trimmed.starts_with('@')
2156                || trimmed.starts_with("#[")
2157                || trimmed.starts_with("//")
2158                || trimmed.starts_with("///")
2159                || trimmed.starts_with("/**")
2160                || trimmed.starts_with("* ")
2161                || trimmed == "*/"
2162            {
2163                content_start = prev;
2164            } else if trimmed.is_empty() && content_start > floor + 1 {
2165                // Allow one blank line between decorator and method
2166                content_start = prev;
2167            } else {
2168                break;
2169            }
2170        }
2171
2172        // Skip leading blank lines
2173        while content_start < child_start_idx && lines[content_start].trim().is_empty() {
2174            content_start += 1;
2175        }
2176
2177        let chunk_content: String = lines[content_start..child_end_idx].join("\n");
2178        chunks.push(MemberChunk {
2179            name: child.name.clone(),
2180            content: chunk_content,
2181        });
2182    }
2183
2184    chunks
2185}
2186
2187/// Generate a scoped conflict marker for a single member within a container merge.
2188fn scoped_conflict_marker(
2189    name: &str,
2190    base: Option<&str>,
2191    ours: Option<&str>,
2192    theirs: Option<&str>,
2193    ours_deleted: bool,
2194    theirs_deleted: bool,
2195    fmt: &MarkerFormat,
2196) -> String {
2197    let open = "<".repeat(fmt.marker_length);
2198    let sep = "=".repeat(fmt.marker_length);
2199    let close = ">".repeat(fmt.marker_length);
2200
2201    let o = ours.unwrap_or("");
2202    let t = theirs.unwrap_or("");
2203
2204    // Narrow conflict markers to just the differing lines
2205    let ours_lines: Vec<&str> = o.lines().collect();
2206    let theirs_lines: Vec<&str> = t.lines().collect();
2207    let (prefix_len, suffix_len) = if ours.is_some() && theirs.is_some() {
2208        crate::conflict::narrow_conflict_lines(&ours_lines, &theirs_lines)
2209    } else {
2210        (0, 0)
2211    };
2212    let has_narrowing = prefix_len > 0 || suffix_len > 0;
2213    let ours_mid = &ours_lines[prefix_len..ours_lines.len() - suffix_len];
2214    let theirs_mid = &theirs_lines[prefix_len..theirs_lines.len() - suffix_len];
2215
2216    let mut out = String::new();
2217
2218    // Emit common prefix as clean text
2219    if has_narrowing {
2220        for line in &ours_lines[..prefix_len] {
2221            out.push_str(line);
2222            out.push('\n');
2223        }
2224    }
2225
2226    // Opening marker
2227    if fmt.enhanced {
2228        if ours_deleted {
2229            out.push_str(&format!("{} ours ({} deleted)\n", open, name));
2230        } else {
2231            out.push_str(&format!("{} ours ({})\n", open, name));
2232        }
2233    } else {
2234        out.push_str(&format!("{} ours\n", open));
2235    }
2236
2237    // Ours content (narrowed or full)
2238    if ours.is_some() {
2239        if has_narrowing {
2240            for line in ours_mid {
2241                out.push_str(line);
2242                out.push('\n');
2243            }
2244        } else {
2245            out.push_str(o);
2246            if !o.ends_with('\n') {
2247                out.push('\n');
2248            }
2249        }
2250    }
2251
2252    // Base section for diff3 format (standard mode only)
2253    if !fmt.enhanced {
2254        let base_marker = "|".repeat(fmt.marker_length);
2255        out.push_str(&format!("{} base\n", base_marker));
2256        let b = base.unwrap_or("");
2257        if has_narrowing {
2258            let base_lines: Vec<&str> = b.lines().collect();
2259            let base_prefix = prefix_len.min(base_lines.len());
2260            let base_suffix = suffix_len.min(base_lines.len().saturating_sub(base_prefix));
2261            for line in &base_lines[base_prefix..base_lines.len() - base_suffix] {
2262                out.push_str(line);
2263                out.push('\n');
2264            }
2265        } else {
2266            out.push_str(b);
2267            if !b.is_empty() && !b.ends_with('\n') {
2268                out.push('\n');
2269            }
2270        }
2271    }
2272
2273    // Separator
2274    out.push_str(&format!("{}\n", sep));
2275
2276    // Theirs content (narrowed or full)
2277    if theirs.is_some() {
2278        if has_narrowing {
2279            for line in theirs_mid {
2280                out.push_str(line);
2281                out.push('\n');
2282            }
2283        } else {
2284            out.push_str(t);
2285            if !t.ends_with('\n') {
2286                out.push('\n');
2287            }
2288        }
2289    }
2290
2291    // Closing marker
2292    if fmt.enhanced {
2293        if theirs_deleted {
2294            out.push_str(&format!("{} theirs ({} deleted)\n", close, name));
2295        } else {
2296            out.push_str(&format!("{} theirs ({})\n", close, name));
2297        }
2298    } else {
2299        out.push_str(&format!("{} theirs\n", close));
2300    }
2301
2302    // Emit common suffix as clean text
2303    if has_narrowing {
2304        for line in &ours_lines[ours_lines.len() - suffix_len..] {
2305            out.push_str(line);
2306            out.push('\n');
2307        }
2308    }
2309
2310    out
2311}
2312
2313/// Try recursive inner entity merge for container types (classes, impls, etc.).
2314///
2315/// Inspired by LastMerge (arXiv:2507.19687): class members are "unordered children" —
2316/// reordering them is not a conflict. We chunk the class body into members, match by
2317/// name, and merge each member independently.
2318///
2319/// Returns Some(result) if chunking succeeded, None if we can't parse the container.
2320/// The result may contain per-member conflict markers (scoped conflicts).
2321fn try_inner_entity_merge(
2322    base: &str,
2323    ours: &str,
2324    theirs: &str,
2325    base_children: &[&SemanticEntity],
2326    ours_children: &[&SemanticEntity],
2327    theirs_children: &[&SemanticEntity],
2328    base_start_line: usize,
2329    ours_start_line: usize,
2330    theirs_start_line: usize,
2331    marker_format: &MarkerFormat,
2332) -> Option<InnerMergeResult> {
2333    // Try sem-core child entities first (tree-sitter-accurate boundaries),
2334    // fall back to indentation heuristic if children aren't available.
2335    // When children_to_chunks produces chunks, try indentation as a fallback
2336    // if the tree-sitter chunks lead to conflicts (the indentation heuristic
2337    // can include trailing context that helps diffy merge adjacent changes).
2338    let use_children = !ours_children.is_empty() || !theirs_children.is_empty();
2339    let (base_chunks, ours_chunks, theirs_chunks) = if use_children {
2340        (
2341            children_to_chunks(base_children, base, base_start_line),
2342            children_to_chunks(ours_children, ours, ours_start_line),
2343            children_to_chunks(theirs_children, theirs, theirs_start_line),
2344        )
2345    } else {
2346        (
2347            extract_member_chunks(base)?,
2348            extract_member_chunks(ours)?,
2349            extract_member_chunks(theirs)?,
2350        )
2351    };
2352
2353    // Need at least 1 member to attempt inner merge
2354    // (Even single-member containers benefit from decorator-aware merge)
2355    if base_chunks.is_empty() && ours_chunks.is_empty() && theirs_chunks.is_empty() {
2356        return None;
2357    }
2358
2359    // Build name → content maps
2360    let base_map: HashMap<&str, &str> = base_chunks
2361        .iter()
2362        .map(|c| (c.name.as_str(), c.content.as_str()))
2363        .collect();
2364    let ours_map: HashMap<&str, &str> = ours_chunks
2365        .iter()
2366        .map(|c| (c.name.as_str(), c.content.as_str()))
2367        .collect();
2368    let theirs_map: HashMap<&str, &str> = theirs_chunks
2369        .iter()
2370        .map(|c| (c.name.as_str(), c.content.as_str()))
2371        .collect();
2372
2373    // Collect all member names
2374    let mut all_names: Vec<String> = Vec::new();
2375    let mut seen: HashSet<String> = HashSet::new();
2376    // Use ours ordering as skeleton
2377    for chunk in &ours_chunks {
2378        if seen.insert(chunk.name.clone()) {
2379            all_names.push(chunk.name.clone());
2380        }
2381    }
2382    // Add theirs-only members
2383    for chunk in &theirs_chunks {
2384        if seen.insert(chunk.name.clone()) {
2385            all_names.push(chunk.name.clone());
2386        }
2387    }
2388
2389    // Extract header/footer (class declaration line and closing brace)
2390    let (ours_header, ours_footer) = extract_container_wrapper(ours)?;
2391
2392    let mut merged_members: Vec<String> = Vec::new();
2393    let mut has_conflict = false;
2394
2395    for name in &all_names {
2396        let in_base = base_map.get(name.as_str());
2397        let in_ours = ours_map.get(name.as_str());
2398        let in_theirs = theirs_map.get(name.as_str());
2399
2400        match (in_base, in_ours, in_theirs) {
2401            // In all three
2402            (Some(b), Some(o), Some(t)) => {
2403                if o == t {
2404                    merged_members.push(o.to_string());
2405                } else if b == o {
2406                    merged_members.push(t.to_string());
2407                } else if b == t {
2408                    merged_members.push(o.to_string());
2409                } else {
2410                    // Both changed differently: try diffy, then git merge-file, then decorator merge
2411                    if let Some(merged) = diffy_merge(b, o, t) {
2412                        merged_members.push(merged);
2413                    } else if let Some(merged) = git_merge_string(b, o, t) {
2414                        merged_members.push(merged);
2415                    } else if let Some(merged) = try_decorator_aware_merge(b, o, t) {
2416                        merged_members.push(merged);
2417                    } else {
2418                        // Emit per-member conflict markers
2419                        has_conflict = true;
2420                        merged_members.push(scoped_conflict_marker(name, Some(b), Some(o), Some(t), false, false, marker_format));
2421                    }
2422                }
2423            }
2424            // Deleted by theirs, ours unchanged or not in base
2425            (Some(b), Some(o), None) => {
2426                if *b == *o {
2427                    // Ours unchanged, theirs deleted → accept deletion
2428                } else {
2429                    // Ours modified, theirs deleted → per-member conflict
2430                    has_conflict = true;
2431                    merged_members.push(scoped_conflict_marker(name, Some(b), Some(o), None, false, true, marker_format));
2432                }
2433            }
2434            // Deleted by ours, theirs unchanged or not in base
2435            (Some(b), None, Some(t)) => {
2436                if *b == *t {
2437                    // Theirs unchanged, ours deleted → accept deletion
2438                } else {
2439                    // Theirs modified, ours deleted → per-member conflict
2440                    has_conflict = true;
2441                    merged_members.push(scoped_conflict_marker(name, Some(b), None, Some(t), true, false, marker_format));
2442                }
2443            }
2444            // Added by ours only
2445            (None, Some(o), None) => {
2446                merged_members.push(o.to_string());
2447            }
2448            // Added by theirs only
2449            (None, None, Some(t)) => {
2450                merged_members.push(t.to_string());
2451            }
2452            // Added by both with different content
2453            (None, Some(o), Some(t)) => {
2454                if o == t {
2455                    merged_members.push(o.to_string());
2456                } else {
2457                    has_conflict = true;
2458                    merged_members.push(scoped_conflict_marker(name, None, Some(o), Some(t), false, false, marker_format));
2459                }
2460            }
2461            // Deleted by both
2462            (Some(_), None, None) => {}
2463            (None, None, None) => {}
2464        }
2465    }
2466
2467    // Reconstruct: header + merged members + footer
2468    let mut result = String::new();
2469    result.push_str(ours_header);
2470    if !ours_header.ends_with('\n') {
2471        result.push('\n');
2472    }
2473
2474    // Detect if members are single-line (fields, variants) vs multi-line (methods)
2475    let has_multiline_members = merged_members.iter().any(|m| m.contains('\n'));
2476    // Check if the original content had blank lines between members
2477    let original_has_blank_separators = {
2478        let body = ours_header.len()..ours.rfind(ours_footer).unwrap_or(ours.len());
2479        let body_content = &ours[body];
2480        body_content.contains("\n\n")
2481    };
2482
2483    for (i, member) in merged_members.iter().enumerate() {
2484        result.push_str(member);
2485        if !member.ends_with('\n') {
2486            result.push('\n');
2487        }
2488        // Add blank line between multi-line members only if the original had them
2489        if i < merged_members.len() - 1 && has_multiline_members && original_has_blank_separators && !member.ends_with("\n\n") {
2490            result.push('\n');
2491        }
2492    }
2493
2494    result.push_str(ours_footer);
2495    if !ours_footer.ends_with('\n') && ours.ends_with('\n') {
2496        result.push('\n');
2497    }
2498
2499    // If children_to_chunks led to conflicts, retry with indentation heuristic.
2500    // The indentation approach includes trailing blank lines in chunks, giving
2501    // diffy more context to merge adjacent changes from different branches.
2502    if has_conflict && use_children {
2503        if let (Some(bc), Some(oc), Some(tc)) = (
2504            extract_member_chunks(base),
2505            extract_member_chunks(ours),
2506            extract_member_chunks(theirs),
2507        ) {
2508            if !bc.is_empty() || !oc.is_empty() || !tc.is_empty() {
2509                let fallback = try_inner_merge_with_chunks(
2510                    &bc, &oc, &tc, ours, ours_header, ours_footer,
2511                    has_multiline_members, marker_format,
2512                );
2513                if let Some(fb) = fallback {
2514                    if !fb.has_conflicts {
2515                        return Some(fb);
2516                    }
2517                }
2518            }
2519        }
2520    }
2521
2522    Some(InnerMergeResult {
2523        content: result,
2524        has_conflicts: has_conflict,
2525    })
2526}
2527
2528/// Inner merge helper using pre-extracted chunks. Used for indentation-heuristic fallback.
2529fn try_inner_merge_with_chunks(
2530    base_chunks: &[MemberChunk],
2531    ours_chunks: &[MemberChunk],
2532    theirs_chunks: &[MemberChunk],
2533    ours: &str,
2534    ours_header: &str,
2535    ours_footer: &str,
2536    has_multiline_hint: bool,
2537    marker_format: &MarkerFormat,
2538) -> Option<InnerMergeResult> {
2539    let base_map: HashMap<&str, &str> = base_chunks.iter().map(|c| (c.name.as_str(), c.content.as_str())).collect();
2540    let ours_map: HashMap<&str, &str> = ours_chunks.iter().map(|c| (c.name.as_str(), c.content.as_str())).collect();
2541    let theirs_map: HashMap<&str, &str> = theirs_chunks.iter().map(|c| (c.name.as_str(), c.content.as_str())).collect();
2542
2543    let mut all_names: Vec<String> = Vec::new();
2544    let mut seen: HashSet<String> = HashSet::new();
2545    for chunk in ours_chunks {
2546        if seen.insert(chunk.name.clone()) {
2547            all_names.push(chunk.name.clone());
2548        }
2549    }
2550    for chunk in theirs_chunks {
2551        if seen.insert(chunk.name.clone()) {
2552            all_names.push(chunk.name.clone());
2553        }
2554    }
2555
2556    let mut merged_members: Vec<String> = Vec::new();
2557    let mut has_conflict = false;
2558
2559    for name in &all_names {
2560        let in_base = base_map.get(name.as_str());
2561        let in_ours = ours_map.get(name.as_str());
2562        let in_theirs = theirs_map.get(name.as_str());
2563
2564        match (in_base, in_ours, in_theirs) {
2565            (Some(b), Some(o), Some(t)) => {
2566                if o == t {
2567                    merged_members.push(o.to_string());
2568                } else if b == o {
2569                    merged_members.push(t.to_string());
2570                } else if b == t {
2571                    merged_members.push(o.to_string());
2572                } else if let Some(merged) = diffy_merge(b, o, t) {
2573                    merged_members.push(merged);
2574                } else if let Some(merged) = git_merge_string(b, o, t) {
2575                    merged_members.push(merged);
2576                } else {
2577                    has_conflict = true;
2578                    merged_members.push(scoped_conflict_marker(name, Some(b), Some(o), Some(t), false, false, marker_format));
2579                }
2580            }
2581            (Some(b), Some(o), None) => {
2582                if *b != *o { merged_members.push(o.to_string()); }
2583            }
2584            (Some(b), None, Some(t)) => {
2585                if *b != *t { merged_members.push(t.to_string()); }
2586            }
2587            (None, Some(o), None) => merged_members.push(o.to_string()),
2588            (None, None, Some(t)) => merged_members.push(t.to_string()),
2589            (None, Some(o), Some(t)) => {
2590                if o == t {
2591                    merged_members.push(o.to_string());
2592                } else {
2593                    has_conflict = true;
2594                    merged_members.push(scoped_conflict_marker(name, None, Some(o), Some(t), false, false, marker_format));
2595                }
2596            }
2597            (Some(_), None, None) | (None, None, None) => {}
2598        }
2599    }
2600
2601    let has_multiline_members = has_multiline_hint || merged_members.iter().any(|m| m.contains('\n'));
2602    let mut result = String::new();
2603    result.push_str(ours_header);
2604    if !ours_header.ends_with('\n') { result.push('\n'); }
2605    for (i, member) in merged_members.iter().enumerate() {
2606        result.push_str(member);
2607        if !member.ends_with('\n') { result.push('\n'); }
2608        if i < merged_members.len() - 1 && has_multiline_members && !member.ends_with("\n\n") {
2609            result.push('\n');
2610        }
2611    }
2612    result.push_str(ours_footer);
2613    if !ours_footer.ends_with('\n') && ours.ends_with('\n') { result.push('\n'); }
2614
2615    Some(InnerMergeResult {
2616        content: result,
2617        has_conflicts: has_conflict,
2618    })
2619}
2620
2621/// Extract the header (class declaration) and footer (closing brace) from a container.
2622/// Supports both brace-delimited (JS/TS/Java/Rust/C) and indentation-based (Python) containers.
2623fn extract_container_wrapper(content: &str) -> Option<(&str, &str)> {
2624    let lines: Vec<&str> = content.lines().collect();
2625    if lines.len() < 2 {
2626        return None;
2627    }
2628
2629    // Check if this is a Python-style container (ends with `:` instead of `{`)
2630    let is_python_style = lines.iter().any(|l| {
2631        let trimmed = l.trim();
2632        (trimmed.starts_with("class ") || trimmed.starts_with("def "))
2633            && trimmed.ends_with(':')
2634    }) && !lines.iter().any(|l| l.contains('{'));
2635
2636    if is_python_style {
2637        // Python: header is the `class Foo:` line, no footer
2638        let header_end = lines.iter().position(|l| l.trim().ends_with(':'))?;
2639        let header_byte_end: usize = lines[..=header_end]
2640            .iter()
2641            .map(|l| l.len() + 1)
2642            .sum();
2643        let header = &content[..header_byte_end.min(content.len())];
2644        // No closing brace in Python — footer is empty
2645        let footer = &content[content.len()..];
2646        Some((header, footer))
2647    } else {
2648        // Brace-delimited: header up to `{`, footer from last `}`
2649        let header_end = lines.iter().position(|l| l.contains('{'))?;
2650        let header_byte_end = lines[..=header_end]
2651            .iter()
2652            .map(|l| l.len() + 1)
2653            .sum::<usize>();
2654        let header = &content[..header_byte_end.min(content.len())];
2655
2656        let footer_start = lines.iter().rposition(|l| {
2657            let trimmed = l.trim();
2658            trimmed == "}" || trimmed == "};"
2659        })?;
2660
2661        let footer_byte_start: usize = lines[..footer_start]
2662            .iter()
2663            .map(|l| l.len() + 1)
2664            .sum();
2665        let footer = &content[footer_byte_start.min(content.len())..];
2666
2667        Some((header, footer))
2668    }
2669}
2670
2671/// Extract named member chunks from a container body.
2672///
2673/// Identifies member boundaries by indentation: members start at the first
2674/// indentation level inside the container. Each member extends until the next
2675/// member starts or the container closes.
2676fn extract_member_chunks(content: &str) -> Option<Vec<MemberChunk>> {
2677    let lines: Vec<&str> = content.lines().collect();
2678    if lines.len() < 2 {
2679        return None;
2680    }
2681
2682    // Check if Python-style (indentation-based)
2683    let is_python_style = lines.iter().any(|l| {
2684        let trimmed = l.trim();
2685        (trimmed.starts_with("class ") || trimmed.starts_with("def "))
2686            && trimmed.ends_with(':')
2687    }) && !lines.iter().any(|l| l.contains('{'));
2688
2689    // Find the body range
2690    let body_start = if is_python_style {
2691        lines.iter().position(|l| l.trim().ends_with(':'))? + 1
2692    } else {
2693        lines.iter().position(|l| l.contains('{'))? + 1
2694    };
2695    let body_end = if is_python_style {
2696        // Python: body extends to end of content
2697        lines.len()
2698    } else {
2699        lines.iter().rposition(|l| {
2700            let trimmed = l.trim();
2701            trimmed == "}" || trimmed == "};"
2702        })?
2703    };
2704
2705    if body_start >= body_end {
2706        return None;
2707    }
2708
2709    // Determine member indentation level by looking at first non-empty body line
2710    let member_indent = lines[body_start..body_end]
2711        .iter()
2712        .find(|l| !l.trim().is_empty())
2713        .map(|l| l.len() - l.trim_start().len())?;
2714
2715    let mut chunks: Vec<MemberChunk> = Vec::new();
2716    let mut current_chunk_lines: Vec<&str> = Vec::new();
2717    let mut current_name: Option<String> = None;
2718
2719    for line in &lines[body_start..body_end] {
2720        let trimmed = line.trim();
2721        if trimmed.is_empty() {
2722            // Blank lines: if we have a current chunk, include them
2723            if current_name.is_some() {
2724                // Only include if not trailing blanks
2725                current_chunk_lines.push(line);
2726            }
2727            continue;
2728        }
2729
2730        let indent = line.len() - line.trim_start().len();
2731
2732        // Is this a new member declaration at the member indent level?
2733        // Exclude closing braces, comments, and decorators/annotations
2734        if indent == member_indent
2735            && !trimmed.starts_with("//")
2736            && !trimmed.starts_with("/*")
2737            && !trimmed.starts_with("*")
2738            && !trimmed.starts_with("#")
2739            && !trimmed.starts_with("@")
2740            && !trimmed.starts_with("}")
2741            && trimmed != ","
2742        {
2743            // Save previous chunk
2744            if let Some(name) = current_name.take() {
2745                // Trim trailing blank lines
2746                while current_chunk_lines.last().map_or(false, |l| l.trim().is_empty()) {
2747                    current_chunk_lines.pop();
2748                }
2749                if !current_chunk_lines.is_empty() {
2750                    chunks.push(MemberChunk {
2751                        name,
2752                        content: current_chunk_lines.join("\n"),
2753                    });
2754                }
2755                current_chunk_lines.clear();
2756            }
2757
2758            // Start new chunk — extract member name
2759            let name = extract_member_name(trimmed);
2760            current_name = Some(name);
2761            current_chunk_lines.push(line);
2762        } else if current_name.is_some() {
2763            // Continuation of current member (body lines, nested blocks)
2764            current_chunk_lines.push(line);
2765        } else {
2766            // Content before first member (decorators, comments for first member)
2767            // Attach to next member
2768            current_chunk_lines.push(line);
2769        }
2770    }
2771
2772    // Save last chunk
2773    if let Some(name) = current_name {
2774        while current_chunk_lines.last().map_or(false, |l| l.trim().is_empty()) {
2775            current_chunk_lines.pop();
2776        }
2777        if !current_chunk_lines.is_empty() {
2778            chunks.push(MemberChunk {
2779                name,
2780                content: current_chunk_lines.join("\n"),
2781            });
2782        }
2783    }
2784
2785    // Post-process: if any chunk has a brace-only name (anonymous struct literal
2786    // entries like Go's `{ Name: "x", ... }`), derive a name from the first
2787    // key-value field inside the chunk to avoid HashMap collisions.
2788    for chunk in &mut chunks {
2789        if chunk.name == "{" || chunk.name == "{}" {
2790            if let Some(better) = derive_name_from_struct_literal(&chunk.content) {
2791                chunk.name = better;
2792            }
2793        }
2794    }
2795
2796    if chunks.is_empty() {
2797        None
2798    } else {
2799        Some(chunks)
2800    }
2801}
2802
2803/// Extract a member name from a declaration line.
2804fn extract_member_name(line: &str) -> String {
2805    let trimmed = line.trim();
2806
2807    // Go method receiver: `func (c *Calculator) Add(` -> skip receiver, find name before second `(`
2808    if trimmed.starts_with("func ") && trimmed.get(5..6) == Some("(") {
2809        // Skip past the receiver: find closing `)`, then extract name before next `(`
2810        if let Some(recv_close) = trimmed.find(')') {
2811            let after_recv = &trimmed[recv_close + 1..];
2812            if let Some(paren_pos) = after_recv.find('(') {
2813                let before = after_recv[..paren_pos].trim();
2814                let name: String = before
2815                    .chars()
2816                    .rev()
2817                    .take_while(|c| c.is_alphanumeric() || *c == '_')
2818                    .collect::<Vec<_>>()
2819                    .into_iter()
2820                    .rev()
2821                    .collect();
2822                if !name.is_empty() {
2823                    return name;
2824                }
2825            }
2826        }
2827    }
2828
2829    // Strategy 1: For method/function declarations with parentheses,
2830    // the name is the identifier immediately before `(`.
2831    // This handles all languages: Java `public int add(`, Rust `pub fn add(`,
2832    // Python `def add(`, TS `async getUser(`, Go `func add(`, etc.
2833    if let Some(paren_pos) = trimmed.find('(') {
2834        let before = trimmed[..paren_pos].trim_end();
2835        let name: String = before
2836            .chars()
2837            .rev()
2838            .take_while(|c| c.is_alphanumeric() || *c == '_')
2839            .collect::<Vec<_>>()
2840            .into_iter()
2841            .rev()
2842            .collect();
2843        if !name.is_empty() {
2844            return name;
2845        }
2846    }
2847
2848    // Strategy 2: For fields/properties/variants without parens,
2849    // strip keywords and take the first identifier.
2850    let mut s = trimmed;
2851    for keyword in &[
2852        "export ", "public ", "private ", "protected ", "static ",
2853        "abstract ", "async ", "override ", "readonly ",
2854        "pub ", "pub(crate) ", "fn ", "def ", "get ", "set ",
2855    ] {
2856        if s.starts_with(keyword) {
2857            s = &s[keyword.len()..];
2858        }
2859    }
2860    if s.starts_with("fn ") {
2861        s = &s[3..];
2862    }
2863
2864    let name: String = s
2865        .chars()
2866        .take_while(|c| c.is_alphanumeric() || *c == '_')
2867        .collect();
2868
2869    if name.is_empty() {
2870        trimmed.chars().take(20).collect()
2871    } else {
2872        name
2873    }
2874}
2875
2876/// For anonymous struct literal entries (e.g., Go slice entries starting with `{`),
2877/// derive a name from the first key-value field inside the chunk.
2878/// E.g., `{ Name: "panelTitleSearch", ... }` → `panelTitleSearch`
2879fn derive_name_from_struct_literal(content: &str) -> Option<String> {
2880    for line in content.lines().skip(1) {
2881        let trimmed = line.trim().trim_end_matches(',');
2882        // Look for `Key: "value"` or `Key: value` pattern
2883        if let Some(colon_pos) = trimmed.find(':') {
2884            let value = trimmed[colon_pos + 1..].trim();
2885            // Strip quotes from string values
2886            let value = value.trim_matches('"').trim_matches('\'');
2887            if !value.is_empty() {
2888                return Some(value.to_string());
2889            }
2890        }
2891    }
2892    None
2893}
2894
2895/// Returns true for data/config file formats where Sesame separator expansion
2896/// (`{`, `}`, `;`) is counterproductive because those chars are structural
2897/// content rather than code block separators.
2898///
2899/// Note: template files like .svelte/.vue are NOT included here because their
2900/// embedded `<script>` sections contain real code where Sesame helps.
2901/// Check if content looks binary (contains null bytes in first 8KB).
2902fn is_binary(content: &str) -> bool {
2903    content.as_bytes().iter().take(8192).any(|&b| b == 0)
2904}
2905
2906/// Check if content already contains git conflict markers.
2907/// This happens with AU/AA conflicts where git stores markers in stage blobs.
2908fn has_conflict_markers(content: &str) -> bool {
2909    content.contains("<<<<<<<") && content.contains(">>>>>>>")
2910}
2911
2912fn skip_sesame(file_path: &str) -> bool {
2913    let path_lower = file_path.to_lowercase();
2914    let extensions = [
2915        // Data/config formats
2916        ".json", ".yaml", ".yml", ".toml", ".lock", ".xml", ".csv", ".tsv",
2917        ".ini", ".cfg", ".conf", ".properties", ".env",
2918        // Markup/document formats
2919        ".md", ".markdown", ".txt", ".rst", ".svg", ".html", ".htm",
2920    ];
2921    extensions.iter().any(|ext| path_lower.ends_with(ext))
2922}
2923
2924/// Expand syntactic separators into separate lines for finer merge alignment.
2925/// Inspired by Sesame (arXiv:2407.18888): isolating separators lets line-based
2926/// merge tools see block boundaries as independent change units.
2927/// Uses byte-level iteration since separators ({, }, ;) and string delimiters
2928/// (", ', `) are all ASCII.
2929fn expand_separators(content: &str) -> String {
2930    let bytes = content.as_bytes();
2931    let mut result = Vec::with_capacity(content.len() * 2);
2932    let mut in_string = false;
2933    let mut escape_next = false;
2934    let mut string_char = b'"';
2935
2936    for &b in bytes {
2937        if escape_next {
2938            result.push(b);
2939            escape_next = false;
2940            continue;
2941        }
2942        if b == b'\\' && in_string {
2943            result.push(b);
2944            escape_next = true;
2945            continue;
2946        }
2947        if !in_string && (b == b'"' || b == b'\'' || b == b'`') {
2948            in_string = true;
2949            string_char = b;
2950            result.push(b);
2951            continue;
2952        }
2953        if in_string && b == string_char {
2954            in_string = false;
2955            result.push(b);
2956            continue;
2957        }
2958
2959        if !in_string && (b == b'{' || b == b'}' || b == b';') {
2960            if result.last() != Some(&b'\n') && !result.is_empty() {
2961                result.push(b'\n');
2962            }
2963            result.push(b);
2964            result.push(b'\n');
2965        } else {
2966            result.push(b);
2967        }
2968    }
2969
2970    // Safe: we only inserted ASCII bytes into valid UTF-8 content
2971    unsafe { String::from_utf8_unchecked(result) }
2972}
2973
2974/// Collapse separator expansion back to original formatting.
2975/// Uses the base formatting as a guide where possible.
2976fn collapse_separators(merged: &str, _base: &str) -> String {
2977    // Simple approach: join lines that contain only a separator with adjacent lines
2978    let lines: Vec<&str> = merged.lines().collect();
2979    let mut result = String::new();
2980    let mut i = 0;
2981
2982    while i < lines.len() {
2983        let trimmed = lines[i].trim();
2984        if (trimmed == "{" || trimmed == "}" || trimmed == ";") && trimmed.len() == 1 {
2985            // This is a separator-only line we may have created
2986            // Try to join with previous line if it doesn't end with a separator
2987            if !result.is_empty() && !result.ends_with('\n') {
2988                // Peek: if it's an opening brace, join with previous
2989                if trimmed == "{" {
2990                    result.push(' ');
2991                    result.push_str(trimmed);
2992                    result.push('\n');
2993                } else if trimmed == "}" {
2994                    result.push('\n');
2995                    result.push_str(trimmed);
2996                    result.push('\n');
2997                } else {
2998                    result.push_str(trimmed);
2999                    result.push('\n');
3000                }
3001            } else {
3002                result.push_str(lines[i]);
3003                result.push('\n');
3004            }
3005        } else {
3006            result.push_str(lines[i]);
3007            result.push('\n');
3008        }
3009        i += 1;
3010    }
3011
3012    // Trim any trailing extra newlines to match original style
3013    while result.ends_with("\n\n") {
3014        result.pop();
3015    }
3016
3017    result
3018}
3019
3020#[cfg(test)]
3021mod tests {
3022    use super::*;
3023
3024    #[test]
3025    fn test_replace_at_word_boundaries() {
3026        // Should replace standalone occurrences
3027        assert_eq!(replace_at_word_boundaries("fn get() {}", "get", "__E__"), "fn __E__() {}");
3028        // Should NOT replace inside longer identifiers
3029        assert_eq!(replace_at_word_boundaries("fn getAll() {}", "get", "__E__"), "fn getAll() {}");
3030        assert_eq!(replace_at_word_boundaries("fn _get() {}", "get", "__E__"), "fn _get() {}");
3031        // Should replace multiple standalone occurrences
3032        assert_eq!(
3033            replace_at_word_boundaries("pub enum Source { Source }", "Source", "__E__"),
3034            "pub enum __E__ { __E__ }"
3035        );
3036        // Should not replace substring at start/end of identifiers
3037        assert_eq!(
3038            replace_at_word_boundaries("SourceManager isSource", "Source", "__E__"),
3039            "SourceManager isSource"
3040        );
3041        // Should handle multi-byte UTF-8 characters (emojis) without panicking
3042        assert_eq!(
3043            replace_at_word_boundaries("❌ get ✅", "get", "__E__"),
3044            "❌ __E__ ✅"
3045        );
3046        assert_eq!(
3047            replace_at_word_boundaries("fn 名前() { get }", "get", "__E__"),
3048            "fn 名前() { __E__ }"
3049        );
3050        // Emoji-only content with no needle match should pass through unchanged
3051        assert_eq!(
3052            replace_at_word_boundaries("🎉🚀✨", "get", "__E__"),
3053            "🎉🚀✨"
3054        );
3055    }
3056
3057    #[test]
3058    fn test_fast_path_identical() {
3059        let content = "hello world";
3060        let result = entity_merge(content, content, content, "test.ts");
3061        assert!(result.is_clean());
3062        assert_eq!(result.content, content);
3063    }
3064
3065    #[test]
3066    fn test_fast_path_only_ours_changed() {
3067        let base = "hello";
3068        let ours = "hello world";
3069        let result = entity_merge(base, ours, base, "test.ts");
3070        assert!(result.is_clean());
3071        assert_eq!(result.content, ours);
3072    }
3073
3074    #[test]
3075    fn test_fast_path_only_theirs_changed() {
3076        let base = "hello";
3077        let theirs = "hello world";
3078        let result = entity_merge(base, base, theirs, "test.ts");
3079        assert!(result.is_clean());
3080        assert_eq!(result.content, theirs);
3081    }
3082
3083    #[test]
3084    fn test_different_functions_no_conflict() {
3085        // Core value prop: two agents add different functions to the same file
3086        let base = r#"export function existing() {
3087    return 1;
3088}
3089"#;
3090        let ours = r#"export function existing() {
3091    return 1;
3092}
3093
3094export function agentA() {
3095    return "added by agent A";
3096}
3097"#;
3098        let theirs = r#"export function existing() {
3099    return 1;
3100}
3101
3102export function agentB() {
3103    return "added by agent B";
3104}
3105"#;
3106        let result = entity_merge(base, ours, theirs, "test.ts");
3107        assert!(
3108            result.is_clean(),
3109            "Should auto-resolve: different functions added. Conflicts: {:?}",
3110            result.conflicts
3111        );
3112        assert!(
3113            result.content.contains("agentA"),
3114            "Should contain agentA function"
3115        );
3116        assert!(
3117            result.content.contains("agentB"),
3118            "Should contain agentB function"
3119        );
3120    }
3121
3122    #[test]
3123    fn test_same_function_modified_by_both_conflict() {
3124        let base = r#"export function shared() {
3125    return "original";
3126}
3127"#;
3128        let ours = r#"export function shared() {
3129    return "modified by ours";
3130}
3131"#;
3132        let theirs = r#"export function shared() {
3133    return "modified by theirs";
3134}
3135"#;
3136        let result = entity_merge(base, ours, theirs, "test.ts");
3137        // This should be a conflict since both modified the same function incompatibly
3138        assert!(
3139            !result.is_clean(),
3140            "Should conflict when both modify same function differently"
3141        );
3142        assert_eq!(result.conflicts.len(), 1);
3143        assert_eq!(result.conflicts[0].entity_name, "shared");
3144    }
3145
3146    #[test]
3147    fn test_fallback_for_unknown_filetype() {
3148        // Non-adjacent changes should merge cleanly with line-level merge
3149        let base = "line 1\nline 2\nline 3\nline 4\nline 5\n";
3150        let ours = "line 1 modified\nline 2\nline 3\nline 4\nline 5\n";
3151        let theirs = "line 1\nline 2\nline 3\nline 4\nline 5 modified\n";
3152        let result = entity_merge(base, ours, theirs, "test.xyz");
3153        assert!(
3154            result.is_clean(),
3155            "Non-adjacent changes should merge cleanly. Conflicts: {:?}",
3156            result.conflicts,
3157        );
3158    }
3159
3160    #[test]
3161    fn test_line_level_fallback() {
3162        // Non-adjacent changes merge cleanly in 3-way merge
3163        let base = "a\nb\nc\nd\ne\n";
3164        let ours = "A\nb\nc\nd\ne\n";
3165        let theirs = "a\nb\nc\nd\nE\n";
3166        let result = line_level_fallback(base, ours, theirs, "test.rs");
3167        assert!(result.is_clean());
3168        assert!(result.stats.used_fallback);
3169        assert_eq!(result.content, "A\nb\nc\nd\nE\n");
3170    }
3171
3172    #[test]
3173    fn test_line_level_fallback_conflict() {
3174        // Same line changed differently → conflict
3175        let base = "a\nb\nc\n";
3176        let ours = "X\nb\nc\n";
3177        let theirs = "Y\nb\nc\n";
3178        let result = line_level_fallback(base, ours, theirs, "test.rs");
3179        assert!(!result.is_clean());
3180        assert!(result.stats.used_fallback);
3181    }
3182
3183    #[test]
3184    fn test_expand_separators() {
3185        let code = "function foo() { return 1; }";
3186        let expanded = expand_separators(code);
3187        // Separators should be on their own lines
3188        assert!(expanded.contains("{\n"), "Opening brace should have newline after");
3189        assert!(expanded.contains(";\n"), "Semicolons should have newline after");
3190        assert!(expanded.contains("\n}"), "Closing brace should have newline before");
3191    }
3192
3193    #[test]
3194    fn test_expand_separators_preserves_strings() {
3195        let code = r#"let x = "hello { world };";"#;
3196        let expanded = expand_separators(code);
3197        // Separators inside strings should NOT be expanded
3198        assert!(
3199            expanded.contains("\"hello { world };\""),
3200            "Separators in strings should be preserved: {}",
3201            expanded
3202        );
3203    }
3204
3205    #[test]
3206    fn test_is_import_region() {
3207        assert!(is_import_region("import foo from 'foo';\nimport bar from 'bar';\n"));
3208        assert!(is_import_region("use std::io;\nuse std::fs;\n"));
3209        assert!(!is_import_region("let x = 1;\nlet y = 2;\n"));
3210        // Mixed: 1 import + 2 non-imports → not import region
3211        assert!(!is_import_region("import foo from 'foo';\nlet x = 1;\nlet y = 2;\n"));
3212        // Empty → not import region
3213        assert!(!is_import_region(""));
3214    }
3215
3216    #[test]
3217    fn test_is_import_line() {
3218        // JS/TS
3219        assert!(is_import_line("import foo from 'foo';"));
3220        assert!(is_import_line("import { bar } from 'bar';"));
3221        assert!(is_import_line("from typing import List"));
3222        // Rust
3223        assert!(is_import_line("use std::io::Read;"));
3224        // C/C++
3225        assert!(is_import_line("#include <stdio.h>"));
3226        // Node require
3227        assert!(is_import_line("const fs = require('fs');"));
3228        // Not imports
3229        assert!(!is_import_line("let x = 1;"));
3230        assert!(!is_import_line("function foo() {}"));
3231    }
3232
3233    #[test]
3234    fn test_commutative_import_merge_both_add_different() {
3235        // The key scenario: both branches add different imports
3236        let base = "import a from 'a';\nimport b from 'b';\n";
3237        let ours = "import a from 'a';\nimport b from 'b';\nimport c from 'c';\n";
3238        let theirs = "import a from 'a';\nimport b from 'b';\nimport d from 'd';\n";
3239        let result = merge_imports_commutatively(base, ours, theirs);
3240        assert!(result.contains("import a from 'a';"));
3241        assert!(result.contains("import b from 'b';"));
3242        assert!(result.contains("import c from 'c';"));
3243        assert!(result.contains("import d from 'd';"));
3244    }
3245
3246    #[test]
3247    fn test_commutative_import_merge_one_removes() {
3248        // Ours removes an import, theirs keeps it → removed
3249        let base = "import a from 'a';\nimport b from 'b';\nimport c from 'c';\n";
3250        let ours = "import a from 'a';\nimport c from 'c';\n";
3251        let theirs = "import a from 'a';\nimport b from 'b';\nimport c from 'c';\n";
3252        let result = merge_imports_commutatively(base, ours, theirs);
3253        assert!(result.contains("import a from 'a';"));
3254        assert!(!result.contains("import b from 'b';"), "Removed import should stay removed");
3255        assert!(result.contains("import c from 'c';"));
3256    }
3257
3258    #[test]
3259    fn test_commutative_import_merge_both_add_same() {
3260        // Both add the same import → should appear only once
3261        let base = "import a from 'a';\n";
3262        let ours = "import a from 'a';\nimport b from 'b';\n";
3263        let theirs = "import a from 'a';\nimport b from 'b';\n";
3264        let result = merge_imports_commutatively(base, ours, theirs);
3265        let count = result.matches("import b from 'b';").count();
3266        assert_eq!(count, 1, "Duplicate import should be deduplicated");
3267    }
3268
3269    #[test]
3270    fn test_inner_entity_merge_different_methods() {
3271        // Two agents modify different methods in the same class
3272        // This would normally conflict with diffy because the changes are adjacent
3273        let base = r#"export class Calculator {
3274    add(a: number, b: number): number {
3275        return a + b;
3276    }
3277
3278    subtract(a: number, b: number): number {
3279        return a - b;
3280    }
3281}
3282"#;
3283        let ours = r#"export class Calculator {
3284    add(a: number, b: number): number {
3285        // Added logging
3286        console.log("adding", a, b);
3287        return a + b;
3288    }
3289
3290    subtract(a: number, b: number): number {
3291        return a - b;
3292    }
3293}
3294"#;
3295        let theirs = r#"export class Calculator {
3296    add(a: number, b: number): number {
3297        return a + b;
3298    }
3299
3300    subtract(a: number, b: number): number {
3301        // Added validation
3302        if (b > a) throw new Error("negative");
3303        return a - b;
3304    }
3305}
3306"#;
3307        let result = entity_merge(base, ours, theirs, "test.ts");
3308        assert!(
3309            result.is_clean(),
3310            "Different methods modified should auto-merge via inner entity merge. Conflicts: {:?}",
3311            result.conflicts,
3312        );
3313        assert!(result.content.contains("console.log"), "Should contain ours changes");
3314        assert!(result.content.contains("negative"), "Should contain theirs changes");
3315    }
3316
3317    #[test]
3318    fn test_inner_entity_merge_both_add_different_methods() {
3319        // Both branches add different methods to the same class
3320        let base = r#"export class Calculator {
3321    add(a: number, b: number): number {
3322        return a + b;
3323    }
3324}
3325"#;
3326        let ours = r#"export class Calculator {
3327    add(a: number, b: number): number {
3328        return a + b;
3329    }
3330
3331    multiply(a: number, b: number): number {
3332        return a * b;
3333    }
3334}
3335"#;
3336        let theirs = r#"export class Calculator {
3337    add(a: number, b: number): number {
3338        return a + b;
3339    }
3340
3341    divide(a: number, b: number): number {
3342        return a / b;
3343    }
3344}
3345"#;
3346        let result = entity_merge(base, ours, theirs, "test.ts");
3347        assert!(
3348            result.is_clean(),
3349            "Both adding different methods should auto-merge. Conflicts: {:?}",
3350            result.conflicts,
3351        );
3352        assert!(result.content.contains("multiply"), "Should contain ours's new method");
3353        assert!(result.content.contains("divide"), "Should contain theirs's new method");
3354    }
3355
3356    #[test]
3357    fn test_inner_entity_merge_same_method_modified_still_conflicts() {
3358        // Both modify the same method differently → should still conflict
3359        let base = r#"export class Calculator {
3360    add(a: number, b: number): number {
3361        return a + b;
3362    }
3363
3364    subtract(a: number, b: number): number {
3365        return a - b;
3366    }
3367}
3368"#;
3369        let ours = r#"export class Calculator {
3370    add(a: number, b: number): number {
3371        return a + b + 1;
3372    }
3373
3374    subtract(a: number, b: number): number {
3375        return a - b;
3376    }
3377}
3378"#;
3379        let theirs = r#"export class Calculator {
3380    add(a: number, b: number): number {
3381        return a + b + 2;
3382    }
3383
3384    subtract(a: number, b: number): number {
3385        return a - b;
3386    }
3387}
3388"#;
3389        let result = entity_merge(base, ours, theirs, "test.ts");
3390        assert!(
3391            !result.is_clean(),
3392            "Both modifying same method differently should still conflict"
3393        );
3394    }
3395
3396    #[test]
3397    fn test_extract_member_chunks() {
3398        let class_body = r#"export class Foo {
3399    bar() {
3400        return 1;
3401    }
3402
3403    baz() {
3404        return 2;
3405    }
3406}
3407"#;
3408        let chunks = extract_member_chunks(class_body).unwrap();
3409        assert_eq!(chunks.len(), 2, "Should find 2 members, found {:?}", chunks.iter().map(|c| &c.name).collect::<Vec<_>>());
3410        assert_eq!(chunks[0].name, "bar");
3411        assert_eq!(chunks[1].name, "baz");
3412    }
3413
3414    #[test]
3415    fn test_extract_member_name() {
3416        assert_eq!(extract_member_name("add(a, b) {"), "add");
3417        assert_eq!(extract_member_name("fn add(&self, a: i32) -> i32 {"), "add");
3418        assert_eq!(extract_member_name("def add(self, a, b):"), "add");
3419        assert_eq!(extract_member_name("public static getValue(): number {"), "getValue");
3420        assert_eq!(extract_member_name("async fetchData() {"), "fetchData");
3421    }
3422
3423    #[test]
3424    fn test_commutative_import_merge_rust_use() {
3425        let base = "use std::io;\nuse std::fs;\n";
3426        let ours = "use std::io;\nuse std::fs;\nuse std::path::Path;\n";
3427        let theirs = "use std::io;\nuse std::fs;\nuse std::collections::HashMap;\n";
3428        let result = merge_imports_commutatively(base, ours, theirs);
3429        assert!(result.contains("use std::path::Path;"));
3430        assert!(result.contains("use std::collections::HashMap;"));
3431        assert!(result.contains("use std::io;"));
3432        assert!(result.contains("use std::fs;"));
3433    }
3434
3435    #[test]
3436    fn test_is_whitespace_only_diff_true() {
3437        // Same content, different indentation
3438        assert!(is_whitespace_only_diff(
3439            "    return 1;\n    return 2;\n",
3440            "      return 1;\n      return 2;\n"
3441        ));
3442        // Same content, extra blank lines
3443        assert!(is_whitespace_only_diff(
3444            "return 1;\nreturn 2;\n",
3445            "return 1;\n\nreturn 2;\n"
3446        ));
3447    }
3448
3449    #[test]
3450    fn test_is_whitespace_only_diff_false() {
3451        // Different content
3452        assert!(!is_whitespace_only_diff(
3453            "    return 1;\n",
3454            "    return 2;\n"
3455        ));
3456        // Added code
3457        assert!(!is_whitespace_only_diff(
3458            "return 1;\n",
3459            "return 1;\nconsole.log('x');\n"
3460        ));
3461    }
3462
3463    #[test]
3464    fn test_ts_interface_both_add_different_fields() {
3465        let base = "interface Config {\n    name: string;\n}\n";
3466        let ours = "interface Config {\n    name: string;\n    age: number;\n}\n";
3467        let theirs = "interface Config {\n    name: string;\n    email: string;\n}\n";
3468        let result = entity_merge(base, ours, theirs, "test.ts");
3469        eprintln!("TS interface: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3470        eprintln!("Content: {:?}", result.content);
3471        assert!(
3472            result.is_clean(),
3473            "Both adding different fields to TS interface should merge. Conflicts: {:?}",
3474            result.conflicts,
3475        );
3476        assert!(result.content.contains("age"));
3477        assert!(result.content.contains("email"));
3478    }
3479
3480    #[test]
3481    fn test_rust_enum_both_add_different_variants() {
3482        let base = "enum Color {\n    Red,\n    Blue,\n}\n";
3483        let ours = "enum Color {\n    Red,\n    Blue,\n    Green,\n}\n";
3484        let theirs = "enum Color {\n    Red,\n    Blue,\n    Yellow,\n}\n";
3485        let result = entity_merge(base, ours, theirs, "test.rs");
3486        eprintln!("Rust enum: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3487        eprintln!("Content: {:?}", result.content);
3488        assert!(
3489            result.is_clean(),
3490            "Both adding different enum variants should merge. Conflicts: {:?}",
3491            result.conflicts,
3492        );
3493        assert!(result.content.contains("Green"));
3494        assert!(result.content.contains("Yellow"));
3495    }
3496
3497    #[test]
3498    fn test_python_both_add_different_decorators() {
3499        // Both add different decorators to the same function
3500        let base = "def foo():\n    return 1\n\ndef bar():\n    return 2\n";
3501        let ours = "@cache\ndef foo():\n    return 1\n\ndef bar():\n    return 2\n";
3502        let theirs = "@deprecated\ndef foo():\n    return 1\n\ndef bar():\n    return 2\n";
3503        let result = entity_merge(base, ours, theirs, "test.py");
3504        assert!(
3505            result.is_clean(),
3506            "Both adding different decorators should merge. Conflicts: {:?}",
3507            result.conflicts,
3508        );
3509        assert!(result.content.contains("@cache"));
3510        assert!(result.content.contains("@deprecated"));
3511        assert!(result.content.contains("def foo()"));
3512    }
3513
3514    #[test]
3515    fn test_decorator_plus_body_change() {
3516        // One adds decorator, other modifies body — should merge both
3517        let base = "def foo():\n    return 1\n";
3518        let ours = "@cache\ndef foo():\n    return 1\n";
3519        let theirs = "def foo():\n    return 42\n";
3520        let result = entity_merge(base, ours, theirs, "test.py");
3521        assert!(
3522            result.is_clean(),
3523            "Decorator + body change should merge. Conflicts: {:?}",
3524            result.conflicts,
3525        );
3526        assert!(result.content.contains("@cache"));
3527        assert!(result.content.contains("return 42"));
3528    }
3529
3530    #[test]
3531    fn test_ts_class_decorator_merge() {
3532        // TypeScript decorators on class methods — both add different decorators
3533        let base = "class Foo {\n    bar() {\n        return 1;\n    }\n}\n";
3534        let ours = "class Foo {\n    @Injectable()\n    bar() {\n        return 1;\n    }\n}\n";
3535        let theirs = "class Foo {\n    @Deprecated()\n    bar() {\n        return 1;\n    }\n}\n";
3536        let result = entity_merge(base, ours, theirs, "test.ts");
3537        assert!(
3538            result.is_clean(),
3539            "Both adding different decorators to same method should merge. Conflicts: {:?}",
3540            result.conflicts,
3541        );
3542        assert!(result.content.contains("@Injectable()"));
3543        assert!(result.content.contains("@Deprecated()"));
3544        assert!(result.content.contains("bar()"));
3545    }
3546
3547    #[test]
3548    fn test_non_adjacent_intra_function_changes() {
3549        let base = r#"export function process(data: any) {
3550    const validated = validate(data);
3551    const transformed = transform(validated);
3552    const saved = save(transformed);
3553    return saved;
3554}
3555"#;
3556        let ours = r#"export function process(data: any) {
3557    const validated = validate(data);
3558    const transformed = transform(validated);
3559    const saved = save(transformed);
3560    console.log("saved", saved);
3561    return saved;
3562}
3563"#;
3564        let theirs = r#"export function process(data: any) {
3565    console.log("input", data);
3566    const validated = validate(data);
3567    const transformed = transform(validated);
3568    const saved = save(transformed);
3569    return saved;
3570}
3571"#;
3572        let result = entity_merge(base, ours, theirs, "test.ts");
3573        assert!(
3574            result.is_clean(),
3575            "Non-adjacent changes within same function should merge via diffy. Conflicts: {:?}",
3576            result.conflicts,
3577        );
3578        assert!(result.content.contains("console.log(\"saved\""));
3579        assert!(result.content.contains("console.log(\"input\""));
3580    }
3581
3582    #[test]
3583    fn test_method_reordering_with_modification() {
3584        // Agent A reorders methods in class, Agent B modifies one method
3585        // Inner entity merge matches by name, so reordering should be transparent
3586        let base = r#"class Service {
3587    getUser(id: string) {
3588        return db.find(id);
3589    }
3590
3591    createUser(data: any) {
3592        return db.create(data);
3593    }
3594
3595    deleteUser(id: string) {
3596        return db.delete(id);
3597    }
3598}
3599"#;
3600        // Ours: reorder methods (move deleteUser before createUser)
3601        let ours = r#"class Service {
3602    getUser(id: string) {
3603        return db.find(id);
3604    }
3605
3606    deleteUser(id: string) {
3607        return db.delete(id);
3608    }
3609
3610    createUser(data: any) {
3611        return db.create(data);
3612    }
3613}
3614"#;
3615        // Theirs: modify getUser
3616        let theirs = r#"class Service {
3617    getUser(id: string) {
3618        console.log("fetching", id);
3619        return db.find(id);
3620    }
3621
3622    createUser(data: any) {
3623        return db.create(data);
3624    }
3625
3626    deleteUser(id: string) {
3627        return db.delete(id);
3628    }
3629}
3630"#;
3631        let result = entity_merge(base, ours, theirs, "test.ts");
3632        eprintln!("Method reorder: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3633        eprintln!("Content:\n{}", result.content);
3634        assert!(
3635            result.is_clean(),
3636            "Method reordering + modification should merge. Conflicts: {:?}",
3637            result.conflicts,
3638        );
3639        assert!(result.content.contains("console.log(\"fetching\""), "Should contain theirs modification");
3640        assert!(result.content.contains("deleteUser"), "Should have deleteUser");
3641        assert!(result.content.contains("createUser"), "Should have createUser");
3642    }
3643
3644    #[test]
3645    fn test_doc_comment_plus_body_change() {
3646        // One side adds JSDoc comment, other modifies function body
3647        // Doc comments are part of the entity region — they should merge with body changes
3648        let base = r#"export function calculate(a: number, b: number): number {
3649    return a + b;
3650}
3651"#;
3652        let ours = r#"/**
3653 * Calculate the sum of two numbers.
3654 * @param a - First number
3655 * @param b - Second number
3656 */
3657export function calculate(a: number, b: number): number {
3658    return a + b;
3659}
3660"#;
3661        let theirs = r#"export function calculate(a: number, b: number): number {
3662    const result = a + b;
3663    console.log("result:", result);
3664    return result;
3665}
3666"#;
3667        let result = entity_merge(base, ours, theirs, "test.ts");
3668        eprintln!("Doc comment + body: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3669        eprintln!("Content:\n{}", result.content);
3670        // This tests whether weave can merge doc comment additions with body changes
3671    }
3672
3673    #[test]
3674    fn test_both_add_different_guard_clauses() {
3675        // Both add different guard clauses at the start of a function
3676        let base = r#"export function processOrder(order: Order): Result {
3677    const total = calculateTotal(order);
3678    return { success: true, total };
3679}
3680"#;
3681        let ours = r#"export function processOrder(order: Order): Result {
3682    if (!order) throw new Error("Order required");
3683    const total = calculateTotal(order);
3684    return { success: true, total };
3685}
3686"#;
3687        let theirs = r#"export function processOrder(order: Order): Result {
3688    if (order.items.length === 0) throw new Error("Empty order");
3689    const total = calculateTotal(order);
3690    return { success: true, total };
3691}
3692"#;
3693        let result = entity_merge(base, ours, theirs, "test.ts");
3694        eprintln!("Guard clauses: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3695        eprintln!("Content:\n{}", result.content);
3696        // Both add at same position — diffy may struggle since they're at the same insertion point
3697    }
3698
3699    #[test]
3700    fn test_both_modify_different_enum_variants() {
3701        // One modifies a variant's value, other adds new variants
3702        let base = r#"enum Status {
3703    Active = "active",
3704    Inactive = "inactive",
3705    Pending = "pending",
3706}
3707"#;
3708        let ours = r#"enum Status {
3709    Active = "active",
3710    Inactive = "disabled",
3711    Pending = "pending",
3712}
3713"#;
3714        let theirs = r#"enum Status {
3715    Active = "active",
3716    Inactive = "inactive",
3717    Pending = "pending",
3718    Deleted = "deleted",
3719}
3720"#;
3721        let result = entity_merge(base, ours, theirs, "test.ts");
3722        eprintln!("Enum modify+add: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3723        eprintln!("Content:\n{}", result.content);
3724        assert!(
3725            result.is_clean(),
3726            "Modify variant + add new variant should merge. Conflicts: {:?}",
3727            result.conflicts,
3728        );
3729        assert!(result.content.contains("\"disabled\""), "Should have modified Inactive");
3730        assert!(result.content.contains("Deleted"), "Should have new Deleted variant");
3731    }
3732
3733    #[test]
3734    fn test_config_object_field_additions() {
3735        // Both add different fields to a config object (exported const)
3736        let base = r#"export const config = {
3737    timeout: 5000,
3738    retries: 3,
3739};
3740"#;
3741        let ours = r#"export const config = {
3742    timeout: 5000,
3743    retries: 3,
3744    maxConnections: 10,
3745};
3746"#;
3747        let theirs = r#"export const config = {
3748    timeout: 5000,
3749    retries: 3,
3750    logLevel: "info",
3751};
3752"#;
3753        let result = entity_merge(base, ours, theirs, "test.ts");
3754        eprintln!("Config fields: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3755        eprintln!("Content:\n{}", result.content);
3756        // This tests whether inner entity merge handles object literals
3757        // (it probably won't since object fields aren't extracted as members the same way)
3758    }
3759
3760    #[test]
3761    fn test_rust_impl_block_both_add_methods() {
3762        // Both add different methods to a Rust impl block
3763        let base = r#"impl Calculator {
3764    fn add(&self, a: i32, b: i32) -> i32 {
3765        a + b
3766    }
3767}
3768"#;
3769        let ours = r#"impl Calculator {
3770    fn add(&self, a: i32, b: i32) -> i32 {
3771        a + b
3772    }
3773
3774    fn multiply(&self, a: i32, b: i32) -> i32 {
3775        a * b
3776    }
3777}
3778"#;
3779        let theirs = r#"impl Calculator {
3780    fn add(&self, a: i32, b: i32) -> i32 {
3781        a + b
3782    }
3783
3784    fn divide(&self, a: i32, b: i32) -> i32 {
3785        a / b
3786    }
3787}
3788"#;
3789        let result = entity_merge(base, ours, theirs, "test.rs");
3790        eprintln!("Rust impl: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3791        eprintln!("Content:\n{}", result.content);
3792        assert!(
3793            result.is_clean(),
3794            "Both adding methods to Rust impl should merge. Conflicts: {:?}",
3795            result.conflicts,
3796        );
3797        assert!(result.content.contains("multiply"), "Should have multiply");
3798        assert!(result.content.contains("divide"), "Should have divide");
3799    }
3800
3801    #[test]
3802    fn test_rust_impl_same_trait_different_types() {
3803        // Two impl blocks for the same trait but different types.
3804        // Each branch modifies a different impl. Both should be preserved.
3805        // Regression: sem-core <0.3.10 named both "Stream", causing collision.
3806        let base = r#"struct Foo;
3807struct Bar;
3808
3809impl Stream for Foo {
3810    type Item = i32;
3811    fn poll_next(&self) -> Option<i32> {
3812        Some(1)
3813    }
3814}
3815
3816impl Stream for Bar {
3817    type Item = String;
3818    fn poll_next(&self) -> Option<String> {
3819        Some("hello".into())
3820    }
3821}
3822
3823fn other() {}
3824"#;
3825        let ours = r#"struct Foo;
3826struct Bar;
3827
3828impl Stream for Foo {
3829    type Item = i32;
3830    fn poll_next(&self) -> Option<i32> {
3831        let x = compute();
3832        Some(x + 1)
3833    }
3834}
3835
3836impl Stream for Bar {
3837    type Item = String;
3838    fn poll_next(&self) -> Option<String> {
3839        Some("hello".into())
3840    }
3841}
3842
3843fn other() {}
3844"#;
3845        let theirs = r#"struct Foo;
3846struct Bar;
3847
3848impl Stream for Foo {
3849    type Item = i32;
3850    fn poll_next(&self) -> Option<i32> {
3851        Some(1)
3852    }
3853}
3854
3855impl Stream for Bar {
3856    type Item = String;
3857    fn poll_next(&self) -> Option<String> {
3858        let s = format!("hello {}", name);
3859        Some(s)
3860    }
3861}
3862
3863fn other() {}
3864"#;
3865        let result = entity_merge(base, ours, theirs, "test.rs");
3866        assert!(
3867            result.is_clean(),
3868            "Same trait, different types should not conflict. Conflicts: {:?}",
3869            result.conflicts,
3870        );
3871        assert!(result.content.contains("impl Stream for Foo"), "Should have Foo impl");
3872        assert!(result.content.contains("impl Stream for Bar"), "Should have Bar impl");
3873        assert!(result.content.contains("compute()"), "Should have ours' Foo change");
3874        assert!(result.content.contains("format!"), "Should have theirs' Bar change");
3875    }
3876
3877    #[test]
3878    fn test_rust_doc_comment_plus_body_change() {
3879        // One side adds Rust doc comment, other modifies body
3880        // Comment bundling ensures the doc comment is part of the entity
3881        let base = r#"fn add(a: i32, b: i32) -> i32 {
3882    a + b
3883}
3884
3885fn subtract(a: i32, b: i32) -> i32 {
3886    a - b
3887}
3888"#;
3889        let ours = r#"/// Adds two numbers together.
3890fn add(a: i32, b: i32) -> i32 {
3891    a + b
3892}
3893
3894fn subtract(a: i32, b: i32) -> i32 {
3895    a - b
3896}
3897"#;
3898        let theirs = r#"fn add(a: i32, b: i32) -> i32 {
3899    a + b
3900}
3901
3902fn subtract(a: i32, b: i32) -> i32 {
3903    a - b - 1
3904}
3905"#;
3906        let result = entity_merge(base, ours, theirs, "test.rs");
3907        assert!(
3908            result.is_clean(),
3909            "Rust doc comment + body change should merge. Conflicts: {:?}",
3910            result.conflicts,
3911        );
3912        assert!(result.content.contains("/// Adds two numbers"), "Should have ours doc comment");
3913        assert!(result.content.contains("a - b - 1"), "Should have theirs body change");
3914    }
3915
3916    #[test]
3917    fn test_both_add_different_doc_comments() {
3918        // Both add doc comments to different functions — should merge cleanly
3919        let base = r#"fn add(a: i32, b: i32) -> i32 {
3920    a + b
3921}
3922
3923fn subtract(a: i32, b: i32) -> i32 {
3924    a - b
3925}
3926"#;
3927        let ours = r#"/// Adds two numbers.
3928fn add(a: i32, b: i32) -> i32 {
3929    a + b
3930}
3931
3932fn subtract(a: i32, b: i32) -> i32 {
3933    a - b
3934}
3935"#;
3936        let theirs = r#"fn add(a: i32, b: i32) -> i32 {
3937    a + b
3938}
3939
3940/// Subtracts b from a.
3941fn subtract(a: i32, b: i32) -> i32 {
3942    a - b
3943}
3944"#;
3945        let result = entity_merge(base, ours, theirs, "test.rs");
3946        assert!(
3947            result.is_clean(),
3948            "Both adding doc comments to different functions should merge. Conflicts: {:?}",
3949            result.conflicts,
3950        );
3951        assert!(result.content.contains("/// Adds two numbers"), "Should have add's doc comment");
3952        assert!(result.content.contains("/// Subtracts b from a"), "Should have subtract's doc comment");
3953    }
3954
3955    #[test]
3956    fn test_go_import_block_both_add_different() {
3957        // Go uses import (...) blocks — both add different imports
3958        let base = "package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n)\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n";
3959        let ours = "package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"strings\"\n)\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n";
3960        let theirs = "package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"io\"\n)\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n";
3961        let result = entity_merge(base, ours, theirs, "main.go");
3962        eprintln!("Go import block: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3963        eprintln!("Content:\n{}", result.content);
3964        // This tests whether Go import blocks (a single entity) get inner-merged
3965    }
3966
3967    #[test]
3968    fn test_python_class_both_add_methods() {
3969        // Python class — both add different methods
3970        let base = "class Calculator:\n    def add(self, a, b):\n        return a + b\n";
3971        let ours = "class Calculator:\n    def add(self, a, b):\n        return a + b\n\n    def multiply(self, a, b):\n        return a * b\n";
3972        let theirs = "class Calculator:\n    def add(self, a, b):\n        return a + b\n\n    def divide(self, a, b):\n        return a / b\n";
3973        let result = entity_merge(base, ours, theirs, "test.py");
3974        eprintln!("Python class: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3975        eprintln!("Content:\n{}", result.content);
3976        assert!(
3977            result.is_clean(),
3978            "Both adding methods to Python class should merge. Conflicts: {:?}",
3979            result.conflicts,
3980        );
3981        assert!(result.content.contains("multiply"), "Should have multiply");
3982        assert!(result.content.contains("divide"), "Should have divide");
3983    }
3984
3985    #[test]
3986    fn test_interstitial_conflict_not_silently_embedded() {
3987        // Regression test: when interstitial content between entities has a
3988        // both-modified conflict, merge_interstitials must report it as a real
3989        // conflict instead of silently embedding raw diffy markers and claiming
3990        // is_clean=true.
3991        //
3992        // Scenario: a barrel export file (index.ts) with comments between
3993        // export statements. Both sides modify the SAME interstitial comment
3994        // block differently. The exports are the entities; the comment between
3995        // them is interstitial content that goes through merge_interstitials
3996        // → diffy, which cannot auto-merge conflicting edits.
3997        let base = r#"export { alpha } from "./alpha";
3998
3999// Section: data utilities
4000// TODO: add more exports here
4001
4002export { beta } from "./beta";
4003"#;
4004        let ours = r#"export { alpha } from "./alpha";
4005
4006// Section: data utilities (sorting)
4007// Sorting helpers for list views
4008
4009export { beta } from "./beta";
4010"#;
4011        let theirs = r#"export { alpha } from "./alpha";
4012
4013// Section: data utilities (filtering)
4014// Filtering helpers for search views
4015
4016export { beta } from "./beta";
4017"#;
4018        let result = entity_merge(base, ours, theirs, "index.ts");
4019
4020        // The key assertions:
4021        // 1. If the content has conflict markers, is_clean() MUST be false
4022        let has_markers = result.content.contains("<<<<<<<") || result.content.contains(">>>>>>>");
4023        if has_markers {
4024            assert!(
4025                !result.is_clean(),
4026                "BUG: is_clean()=true but merged content has conflict markers!\n\
4027                 stats: {}\nconflicts: {:?}\ncontent:\n{}",
4028                result.stats, result.conflicts, result.content
4029            );
4030            assert!(
4031                result.stats.entities_conflicted > 0,
4032                "entities_conflicted should be > 0 when markers are present"
4033            );
4034        }
4035
4036        // 2. If it was resolved cleanly, no markers should exist
4037        if result.is_clean() {
4038            assert!(
4039                !has_markers,
4040                "Clean merge should not contain conflict markers!\ncontent:\n{}",
4041                result.content
4042            );
4043        }
4044    }
4045
4046    #[test]
4047    fn test_pre_conflicted_input_not_treated_as_clean() {
4048        // Regression test for AU/AA conflicts: git can store conflict markers
4049        // directly into stage blobs. Weave must not return is_clean=true.
4050        let base = "";
4051        let theirs = "";
4052        let ours = r#"/**
4053 * MIT License
4054 */
4055
4056<<<<<<<< HEAD:src/lib/exports/index.ts
4057export { renderDocToBuffer } from "./doc-exporter";
4058export type { ExportOptions, ExportMetadata, RenderContext } from "./types";
4059========
4060export * from "./editor";
4061export * from "./types";
4062>>>>>>>> feature:packages/core/src/editor/index.ts
4063"#;
4064        let result = entity_merge(base, ours, theirs, "index.ts");
4065
4066        assert!(
4067            !result.is_clean(),
4068            "Pre-conflicted input must not be reported as clean!\n\
4069             stats: {}\nconflicts: {:?}",
4070            result.stats, result.conflicts,
4071        );
4072        assert!(result.stats.entities_conflicted > 0);
4073        assert!(!result.conflicts.is_empty());
4074    }
4075
4076    #[test]
4077    fn test_multi_line_signature_classified_as_syntax() {
4078        // Multi-line parameter list: changing a param should be Syntax, not Functional
4079        let base = "function process(\n    a: number,\n    b: string\n) {\n    return a;\n}\n";
4080        let ours = "function process(\n    a: number,\n    b: string,\n    c: boolean\n) {\n    return a;\n}\n";
4081        let theirs = "function process(\n    a: number,\n    b: number\n) {\n    return a;\n}\n";
4082        let complexity = crate::conflict::classify_conflict(Some(base), Some(ours), Some(theirs));
4083        assert_eq!(
4084            complexity,
4085            crate::conflict::ConflictComplexity::Syntax,
4086            "Multi-line signature change should be classified as Syntax, got {:?}",
4087            complexity
4088        );
4089    }
4090
4091    #[test]
4092    fn test_grouped_import_merge_preserves_groups() {
4093        let base = "import os\nimport sys\n\nfrom collections import OrderedDict\nfrom typing import List\n";
4094        let ours = "import os\nimport sys\nimport json\n\nfrom collections import OrderedDict\nfrom typing import List\n";
4095        let theirs = "import os\nimport sys\n\nfrom collections import OrderedDict\nfrom collections import defaultdict\nfrom typing import List\n";
4096        let result = merge_imports_commutatively(base, ours, theirs);
4097        // json should be in the first group (stdlib), defaultdict in the second (collections)
4098        let lines: Vec<&str> = result.lines().collect();
4099        let json_idx = lines.iter().position(|l| l.contains("json"));
4100        let blank_idx = lines.iter().position(|l| l.trim().is_empty());
4101        let defaultdict_idx = lines.iter().position(|l| l.contains("defaultdict"));
4102        assert!(json_idx.is_some(), "json import should be present");
4103        assert!(blank_idx.is_some(), "blank line separator should be present");
4104        assert!(defaultdict_idx.is_some(), "defaultdict import should be present");
4105        // json should come before the blank line, defaultdict after
4106        assert!(json_idx.unwrap() < blank_idx.unwrap(), "json should be in first group");
4107        assert!(defaultdict_idx.unwrap() > blank_idx.unwrap(), "defaultdict should be in second group");
4108    }
4109
4110    #[test]
4111    fn test_configurable_duplicate_threshold() {
4112        // Create entities with 15 same-name entities
4113        let entities: Vec<SemanticEntity> = (0..15).map(|i| SemanticEntity {
4114            id: format!("test::function::test_{}", i),
4115            file_path: "test.ts".to_string(),
4116            entity_type: "function".to_string(),
4117            name: "test".to_string(),
4118            parent_id: None,
4119            content: format!("function test() {{ return {}; }}", i),
4120            content_hash: format!("hash_{}", i),
4121            structural_hash: None,
4122            start_line: i * 3 + 1,
4123            end_line: i * 3 + 3,
4124            metadata: None,
4125        }).collect();
4126        // Default threshold (10): should trigger
4127        assert!(has_excessive_duplicates(&entities));
4128        // Set threshold to 20: should not trigger
4129        std::env::set_var("WEAVE_MAX_DUPLICATES", "20");
4130        assert!(!has_excessive_duplicates(&entities));
4131        std::env::remove_var("WEAVE_MAX_DUPLICATES");
4132    }
4133
4134    #[test]
4135    fn test_ts_multiline_import_consolidation() {
4136        // Issue #24: when incoming consolidates two imports into one multi-line import,
4137        // the `import {` opening line can get dropped.
4138        let base = "\
4139import type { Foo } from \"./foo\"
4140import {
4141     type a,
4142     type b,
4143     type c,
4144} from \"./foo\"
4145
4146export function bar() {
4147    return 1;
4148}
4149";
4150        let ours = base;
4151        let theirs = "\
4152import {
4153     type Foo,
4154     type a,
4155     type b,
4156     type c,
4157} from \"./foo\"
4158
4159export function bar() {
4160    return 1;
4161}
4162";
4163        let result = entity_merge(base, ours, theirs, "test.ts");
4164        eprintln!("TS import consolidation: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4165        eprintln!("Content:\n{}", result.content);
4166        // Theirs is the only change, result should match theirs exactly
4167        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4168        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4169        assert!(result.content.contains("} from \"./foo\""), "closing must be present");
4170        assert!(!result.content.contains("import type { Foo }"), "old separate import should be removed");
4171    }
4172
4173    #[test]
4174    fn test_ts_multiline_import_both_modify() {
4175        // Issue #24 variant: both sides modify the import block
4176        let base = "\
4177import type { Foo } from \"./foo\"
4178import {
4179     type a,
4180     type b,
4181     type c,
4182} from \"./foo\"
4183
4184export function bar() {
4185    return 1;
4186}
4187";
4188        // Ours: consolidates imports + adds type d
4189        let ours = "\
4190import {
4191     type Foo,
4192     type a,
4193     type b,
4194     type c,
4195     type d,
4196} from \"./foo\"
4197
4198export function bar() {
4199    return 1;
4200}
4201";
4202        // Theirs: consolidates imports + adds type e
4203        let theirs = "\
4204import {
4205     type Foo,
4206     type a,
4207     type b,
4208     type c,
4209     type e,
4210} from \"./foo\"
4211
4212export function bar() {
4213    return 1;
4214}
4215";
4216        let result = entity_merge(base, ours, theirs, "test.ts");
4217        eprintln!("TS import both modify: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4218        eprintln!("Content:\n{}", result.content);
4219        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4220        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4221        assert!(result.content.contains("type d,"), "ours addition must be present");
4222        assert!(result.content.contains("type e,"), "theirs addition must be present");
4223        assert!(result.content.contains("} from \"./foo\""), "closing must be present");
4224    }
4225
4226    #[test]
4227    fn test_ts_multiline_import_no_entities() {
4228        // Issue #24: file with only imports, no other entities
4229        let base = "\
4230import type { Foo } from \"./foo\"
4231import {
4232     type a,
4233     type b,
4234     type c,
4235} from \"./foo\"
4236";
4237        let ours = base;
4238        let theirs = "\
4239import {
4240     type Foo,
4241     type a,
4242     type b,
4243     type c,
4244} from \"./foo\"
4245";
4246        let result = entity_merge(base, ours, theirs, "test.ts");
4247        eprintln!("TS import no entities: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4248        eprintln!("Content:\n{}", result.content);
4249        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4250        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4251    }
4252
4253    #[test]
4254    fn test_ts_multiline_import_export_variable() {
4255        // Issue #24: import block near an export variable entity
4256        let base = "\
4257import type { Foo } from \"./foo\"
4258import {
4259     type a,
4260     type b,
4261     type c,
4262} from \"./foo\"
4263
4264export const X = 1;
4265
4266export function bar() {
4267    return 1;
4268}
4269";
4270        let ours = "\
4271import type { Foo } from \"./foo\"
4272import {
4273     type a,
4274     type b,
4275     type c,
4276     type d,
4277} from \"./foo\"
4278
4279export const X = 1;
4280
4281export function bar() {
4282    return 1;
4283}
4284";
4285        let theirs = "\
4286import {
4287     type Foo,
4288     type a,
4289     type b,
4290     type c,
4291} from \"./foo\"
4292
4293export const X = 2;
4294
4295export function bar() {
4296    return 1;
4297}
4298";
4299        let result = entity_merge(base, ours, theirs, "test.ts");
4300        eprintln!("TS import + export var: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4301        eprintln!("Content:\n{}", result.content);
4302        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4303    }
4304
4305    #[test]
4306    fn test_ts_multiline_import_adjacent_to_entity() {
4307        // Issue #24: import block directly adjacent to entity (no blank line)
4308        let base = "\
4309import type { Foo } from \"./foo\"
4310import {
4311     type a,
4312     type b,
4313     type c,
4314} from \"./foo\"
4315export function bar() {
4316    return 1;
4317}
4318";
4319        let ours = base;
4320        let theirs = "\
4321import {
4322     type Foo,
4323     type a,
4324     type b,
4325     type c,
4326} from \"./foo\"
4327export function bar() {
4328    return 1;
4329}
4330";
4331        let result = entity_merge(base, ours, theirs, "test.ts");
4332        eprintln!("TS import adjacent: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4333        eprintln!("Content:\n{}", result.content);
4334        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4335        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4336    }
4337
4338    #[test]
4339    fn test_ts_multiline_import_both_consolidate_differently() {
4340        // Issue #24: both sides consolidate imports but add different specifiers
4341        let base = "\
4342import type { Foo } from \"./foo\"
4343import {
4344     type a,
4345     type b,
4346} from \"./foo\"
4347
4348export function bar() {
4349    return 1;
4350}
4351";
4352        let ours = "\
4353import {
4354     type Foo,
4355     type a,
4356     type b,
4357     type c,
4358} from \"./foo\"
4359
4360export function bar() {
4361    return 1;
4362}
4363";
4364        let theirs = "\
4365import {
4366     type Foo,
4367     type a,
4368     type b,
4369     type d,
4370} from \"./foo\"
4371
4372export function bar() {
4373    return 1;
4374}
4375";
4376        let result = entity_merge(base, ours, theirs, "test.ts");
4377        eprintln!("TS both consolidate: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4378        eprintln!("Content:\n{}", result.content);
4379        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4380        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4381        assert!(result.content.contains("} from \"./foo\""), "closing must be present");
4382    }
4383
4384    #[test]
4385    fn test_ts_multiline_import_ours_adds_theirs_consolidates() {
4386        // Issue #24 variant: ours adds new import, theirs consolidates
4387        let base = "\
4388import type { Foo } from \"./foo\"
4389import {
4390     type a,
4391     type b,
4392     type c,
4393} from \"./foo\"
4394
4395export function bar() {
4396    return 1;
4397}
4398";
4399        // Ours: adds a new specifier to the multiline import
4400        let ours = "\
4401import type { Foo } from \"./foo\"
4402import {
4403     type a,
4404     type b,
4405     type c,
4406     type d,
4407} from \"./foo\"
4408
4409export function bar() {
4410    return 1;
4411}
4412";
4413        // Theirs: consolidates into one import
4414        let theirs = "\
4415import {
4416     type Foo,
4417     type a,
4418     type b,
4419     type c,
4420} from \"./foo\"
4421
4422export function bar() {
4423    return 1;
4424}
4425";
4426        let result = entity_merge(base, ours, theirs, "test.ts");
4427        eprintln!("TS import ours-adds theirs-consolidates: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4428        eprintln!("Content:\n{}", result.content);
4429        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4430        assert!(result.content.contains("type d,"), "ours addition must be present");
4431        assert!(result.content.contains("} from \"./foo\""), "closing must be present");
4432    }
4433}