Skip to main content

weave_core/
merge.rs

1use std::collections::{HashMap, HashSet};
2use std::io::Write;
3use std::process::Command;
4use std::sync::{mpsc, LazyLock};
5use std::time::Duration;
6
7use serde::Serialize;
8use sem_core::model::change::ChangeType;
9use sem_core::model::entity::SemanticEntity;
10use sem_core::model::identity::match_entities;
11use sem_core::parser::plugins::create_default_registry;
12use sem_core::parser::registry::ParserRegistry;
13
14/// Static parser registry shared across all merge operations.
15/// Avoids recreating 11 tree-sitter language parsers per merge call.
16static PARSER_REGISTRY: LazyLock<ParserRegistry> = LazyLock::new(create_default_registry);
17
18use crate::conflict::{classify_conflict, ConflictKind, EntityConflict, MarkerFormat, MergeStats};
19use crate::region::{extract_regions, EntityRegion, FileRegion};
20use crate::validate::SemanticWarning;
21use crate::reconstruct::reconstruct;
22
23/// How an individual entity was resolved during merge.
24#[derive(Debug, Clone, Serialize)]
25#[serde(rename_all = "snake_case")]
26pub enum ResolutionStrategy {
27    Unchanged,
28    OursOnly,
29    TheirsOnly,
30    ContentEqual,
31    DiffyMerged,
32    DecoratorMerged,
33    InnerMerged,
34    ConflictBothModified,
35    ConflictModifyDelete,
36    ConflictBothAdded,
37    ConflictRenameRename,
38    AddedOurs,
39    AddedTheirs,
40    Deleted,
41    Renamed { from: String, to: String },
42    Fallback,
43}
44
45/// Audit record for a single entity's merge resolution.
46#[derive(Debug, Clone, Serialize)]
47pub struct EntityAudit {
48    pub name: String,
49    #[serde(rename = "type")]
50    pub entity_type: String,
51    pub resolution: ResolutionStrategy,
52}
53
54/// Result of a merge operation.
55#[derive(Debug)]
56pub struct MergeResult {
57    pub content: String,
58    pub conflicts: Vec<EntityConflict>,
59    pub warnings: Vec<SemanticWarning>,
60    pub stats: MergeStats,
61    pub audit: Vec<EntityAudit>,
62}
63
64impl MergeResult {
65    pub fn is_clean(&self) -> bool {
66        self.conflicts.is_empty()
67            && !self.content.lines().any(|l| l.starts_with("<<<<<<< ours"))
68    }
69}
70
71/// The resolved content for a single entity after merging.
72#[derive(Debug, Clone)]
73pub enum ResolvedEntity {
74    /// Clean resolution — use this content.
75    Clean(EntityRegion),
76    /// Conflict — render conflict markers.
77    Conflict(EntityConflict),
78    /// Inner merge with per-member scoped conflicts.
79    /// Content already contains per-member conflict markers; emit as-is.
80    ScopedConflict {
81        content: String,
82        conflict: EntityConflict,
83    },
84    /// Entity was deleted.
85    Deleted,
86}
87
88/// Perform entity-level 3-way merge.
89///
90/// Falls back to line-level merge (via diffy) when:
91/// - No parser matches the file type
92/// - Parser returns 0 entities for non-empty content
93/// - File exceeds 1MB
94pub fn entity_merge(
95    base: &str,
96    ours: &str,
97    theirs: &str,
98    file_path: &str,
99) -> MergeResult {
100    entity_merge_fmt(base, ours, theirs, file_path, &MarkerFormat::default())
101}
102
103/// Perform entity-level 3-way merge with configurable marker format.
104pub fn entity_merge_fmt(
105    base: &str,
106    ours: &str,
107    theirs: &str,
108    file_path: &str,
109    marker_format: &MarkerFormat,
110) -> MergeResult {
111    let timeout_secs = std::env::var("WEAVE_TIMEOUT")
112        .ok()
113        .and_then(|v| v.parse::<u64>().ok())
114        .unwrap_or(5);
115
116    // Timeout: if entity merge takes too long, diffy is likely hitting
117    // pathological input. Fall back to git merge-file which always terminates.
118    let base_owned = base.to_string();
119    let ours_owned = ours.to_string();
120    let theirs_owned = theirs.to_string();
121    let path_owned = file_path.to_string();
122    let fmt_owned = marker_format.clone();
123
124    let (tx, rx) = mpsc::channel();
125    std::thread::spawn(move || {
126        let result = entity_merge_with_registry(&base_owned, &ours_owned, &theirs_owned, &path_owned, &PARSER_REGISTRY, &fmt_owned);
127        let _ = tx.send(result);
128    });
129
130    match rx.recv_timeout(Duration::from_secs(timeout_secs)) {
131        Ok(result) => result,
132        Err(_) => {
133            eprintln!("weave: merge timed out after {}s for {}, falling back to git merge-file", timeout_secs, file_path);
134            let mut stats = MergeStats::default();
135            stats.used_fallback = true;
136            git_merge_file(base, ours, theirs, &mut stats)
137        }
138    }
139}
140
141pub fn entity_merge_with_registry(
142    base: &str,
143    ours: &str,
144    theirs: &str,
145    file_path: &str,
146    registry: &ParserRegistry,
147    marker_format: &MarkerFormat,
148) -> MergeResult {
149    // Guard: if any input already contains conflict markers (e.g. AU/AA conflicts
150    // where git bakes markers into stage blobs), report as conflict immediately.
151    // We can't do a meaningful 3-way merge on pre-conflicted content.
152    if has_conflict_markers(base) || has_conflict_markers(ours) || has_conflict_markers(theirs) {
153        let mut stats = MergeStats::default();
154        stats.entities_conflicted = 1;
155        stats.used_fallback = true;
156        // Use whichever input has markers as the merged content (preserves
157        // the conflict for the user to resolve manually).
158        let content = if has_conflict_markers(ours) {
159            ours
160        } else if has_conflict_markers(theirs) {
161            theirs
162        } else {
163            base
164        };
165        let complexity = classify_conflict(Some(base), Some(ours), Some(theirs));
166        return MergeResult {
167            content: content.to_string(),
168            conflicts: vec![EntityConflict {
169                entity_name: "(file)".to_string(),
170                entity_type: "file".to_string(),
171                kind: ConflictKind::BothModified,
172                complexity,
173                ours_content: Some(ours.to_string()),
174                theirs_content: Some(theirs.to_string()),
175                base_content: Some(base.to_string()),
176            }],
177            warnings: vec![],
178            stats,
179            audit: vec![],
180        };
181    }
182
183    // Fast path: if ours == theirs, no merge needed
184    if ours == theirs {
185        return MergeResult {
186            content: ours.to_string(),
187            conflicts: vec![],
188            warnings: vec![],
189            stats: MergeStats::default(),
190            audit: vec![],
191        };
192    }
193
194    // Fast path: if base == ours, take theirs entirely
195    if base == ours {
196        return MergeResult {
197            content: theirs.to_string(),
198            conflicts: vec![],
199            warnings: vec![],
200            stats: MergeStats {
201                entities_theirs_only: 1,
202                ..Default::default()
203            },
204            audit: vec![],
205        };
206    }
207
208    // Fast path: if base == theirs, take ours entirely
209    if base == theirs {
210        return MergeResult {
211            content: ours.to_string(),
212            conflicts: vec![],
213            warnings: vec![],
214            stats: MergeStats {
215                entities_ours_only: 1,
216                ..Default::default()
217            },
218            audit: vec![],
219        };
220    }
221
222    // Binary file detection: if any version has null bytes, use git merge-file directly
223    if is_binary(base) || is_binary(ours) || is_binary(theirs) {
224        let mut stats = MergeStats::default();
225        stats.used_fallback = true;
226        return git_merge_file(base, ours, theirs, &mut stats);
227    }
228
229    // Large file fallback
230    if base.len() > 1_000_000 || ours.len() > 1_000_000 || theirs.len() > 1_000_000 {
231        return line_level_fallback(base, ours, theirs, file_path);
232    }
233
234    // If the file type isn't natively supported, the registry returns the fallback
235    // plugin (20-line chunks). Entity merge on arbitrary chunks produces WORSE
236    // results than line-level merge (confirmed on GitButler's .svelte files where
237    // chunk boundaries don't align with structural boundaries). So we skip entity
238    // merge entirely for fallback-plugin files and go straight to line-level merge.
239    let plugin = match registry.get_plugin(file_path) {
240        Some(p) if p.id() != "fallback" => p,
241        _ => return line_level_fallback(base, ours, theirs, file_path),
242    };
243
244    // Extract entities from all three versions. Keep unfiltered lists for inner merge
245    // (child entities provide tree-sitter-based method decomposition for classes).
246    let base_all = plugin.extract_entities(base, file_path);
247    let ours_all = plugin.extract_entities(ours, file_path);
248    let theirs_all = plugin.extract_entities(theirs, file_path);
249
250    // Filter out nested entities for top-level matching and region extraction
251    let base_entities = filter_nested_entities(base_all.clone());
252    let ours_entities = filter_nested_entities(ours_all.clone());
253    let theirs_entities = filter_nested_entities(theirs_all.clone());
254
255    // Fallback if parser returns nothing for non-empty content
256    if base_entities.is_empty() && !base.trim().is_empty() {
257        return line_level_fallback(base, ours, theirs, file_path);
258    }
259    // Allow empty entities if content is actually empty
260    if ours_entities.is_empty() && !ours.trim().is_empty() && theirs_entities.is_empty() && !theirs.trim().is_empty() {
261        return line_level_fallback(base, ours, theirs, file_path);
262    }
263
264    // Fallback if too many duplicate entity names. Entity matching is O(n*m) on
265    // same-named entities which can hang on files with many `var app = ...` etc.
266    if has_excessive_duplicates(&base_entities) || has_excessive_duplicates(&ours_entities) || has_excessive_duplicates(&theirs_entities) {
267        return line_level_fallback(base, ours, theirs, file_path);
268    }
269
270    // Extract regions from all three
271    let base_regions = extract_regions(base, &base_entities);
272    let ours_regions = extract_regions(ours, &ours_entities);
273    let theirs_regions = extract_regions(theirs, &theirs_entities);
274
275    // Build region content maps (entity_id → content from file lines, preserving
276    // surrounding syntax like `export` that sem-core's entity.content may strip)
277    let base_region_content = build_region_content_map(&base_regions);
278    let ours_region_content = build_region_content_map(&ours_regions);
279    let theirs_region_content = build_region_content_map(&theirs_regions);
280
281    // Match entities: base↔ours and base↔theirs
282    let ours_changes = match_entities(&base_entities, &ours_entities, file_path, None, None, None);
283    let theirs_changes = match_entities(&base_entities, &theirs_entities, file_path, None, None, None);
284
285    // Build lookup maps
286    let base_entity_map: HashMap<&str, &SemanticEntity> =
287        base_entities.iter().map(|e| (e.id.as_str(), e)).collect();
288    let ours_entity_map: HashMap<&str, &SemanticEntity> =
289        ours_entities.iter().map(|e| (e.id.as_str(), e)).collect();
290    let theirs_entity_map: HashMap<&str, &SemanticEntity> =
291        theirs_entities.iter().map(|e| (e.id.as_str(), e)).collect();
292
293    // Classify what happened to each entity in each branch
294    let mut ours_change_map: HashMap<String, ChangeType> = HashMap::new();
295    for change in &ours_changes.changes {
296        ours_change_map.insert(change.entity_id.clone(), change.change_type);
297    }
298    let mut theirs_change_map: HashMap<String, ChangeType> = HashMap::new();
299    for change in &theirs_changes.changes {
300        theirs_change_map.insert(change.entity_id.clone(), change.change_type);
301    }
302
303    // Detect renames using structural_hash (RefFilter / IntelliMerge-inspired).
304    // When one branch renames an entity, connect the old and new IDs so the merge
305    // treats it as the same entity rather than a delete+add.
306    let ours_rename_to_base = build_rename_map(&base_entities, &ours_entities);
307    let theirs_rename_to_base = build_rename_map(&base_entities, &theirs_entities);
308    // Reverse maps: base_id → renamed_id in that branch
309    let base_to_ours_rename: HashMap<String, String> = ours_rename_to_base
310        .iter()
311        .map(|(new, old)| (old.clone(), new.clone()))
312        .collect();
313    let base_to_theirs_rename: HashMap<String, String> = theirs_rename_to_base
314        .iter()
315        .map(|(new, old)| (old.clone(), new.clone()))
316        .collect();
317
318    // Collect all entity IDs across all versions
319    let mut all_entity_ids: Vec<String> = Vec::new();
320    let mut seen: HashSet<String> = HashSet::new();
321    // Track renamed IDs so we don't process them twice
322    let mut skip_ids: HashSet<String> = HashSet::new();
323    // The "new" IDs from renames should be skipped — they'll be handled via the base ID
324    for new_id in ours_rename_to_base.keys() {
325        skip_ids.insert(new_id.clone());
326    }
327    for new_id in theirs_rename_to_base.keys() {
328        skip_ids.insert(new_id.clone());
329    }
330
331    // Start with ours ordering (skeleton)
332    for entity in &ours_entities {
333        if skip_ids.contains(&entity.id) {
334            continue;
335        }
336        if seen.insert(entity.id.clone()) {
337            all_entity_ids.push(entity.id.clone());
338        }
339    }
340    // Add theirs-only entities
341    for entity in &theirs_entities {
342        if skip_ids.contains(&entity.id) {
343            continue;
344        }
345        if seen.insert(entity.id.clone()) {
346            all_entity_ids.push(entity.id.clone());
347        }
348    }
349    // Add base-only entities (deleted in both → skip, deleted in one → handled below)
350    for entity in &base_entities {
351        if seen.insert(entity.id.clone()) {
352            all_entity_ids.push(entity.id.clone());
353        }
354    }
355
356    let mut stats = MergeStats::default();
357    let mut conflicts: Vec<EntityConflict> = Vec::new();
358    let mut audit: Vec<EntityAudit> = Vec::new();
359    let mut resolved_entities: HashMap<String, ResolvedEntity> = HashMap::new();
360
361    // Detect rename/rename conflicts: same base entity renamed differently in both branches.
362    // These must be flagged before the entity resolution loop, which would otherwise silently
363    // pick ours and also include theirs as an unmatched entity.
364    let mut rename_conflict_ids: HashSet<String> = HashSet::new();
365    for (base_id, ours_new_id) in &base_to_ours_rename {
366        if let Some(theirs_new_id) = base_to_theirs_rename.get(base_id) {
367            if ours_new_id != theirs_new_id {
368                rename_conflict_ids.insert(base_id.clone());
369            }
370        }
371    }
372
373    for entity_id in &all_entity_ids {
374        // Handle rename/rename conflicts: both branches renamed this base entity differently
375        if rename_conflict_ids.contains(entity_id) {
376            let ours_new_id = &base_to_ours_rename[entity_id];
377            let theirs_new_id = &base_to_theirs_rename[entity_id];
378            let base_entity = base_entity_map.get(entity_id.as_str());
379            let ours_entity = ours_entity_map.get(ours_new_id.as_str());
380            let theirs_entity = theirs_entity_map.get(theirs_new_id.as_str());
381            let base_name = base_entity.map(|e| e.name.as_str()).unwrap_or(entity_id);
382            let ours_name = ours_entity.map(|e| e.name.as_str()).unwrap_or(ours_new_id);
383            let theirs_name = theirs_entity.map(|e| e.name.as_str()).unwrap_or(theirs_new_id);
384
385            let base_rc = base_entity.map(|e| base_region_content.get(e.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| e.content.clone()));
386            let ours_rc = ours_entity.map(|e| ours_region_content.get(e.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| e.content.clone()));
387            let theirs_rc = theirs_entity.map(|e| theirs_region_content.get(e.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| e.content.clone()));
388
389            stats.entities_conflicted += 1;
390            let conflict = EntityConflict {
391                entity_name: base_name.to_string(),
392                entity_type: base_entity.map(|e| e.entity_type.clone()).unwrap_or_default(),
393                kind: ConflictKind::RenameRename {
394                    base_name: base_name.to_string(),
395                    ours_name: ours_name.to_string(),
396                    theirs_name: theirs_name.to_string(),
397                },
398                complexity: crate::conflict::ConflictComplexity::Syntax,
399                ours_content: ours_rc,
400                theirs_content: theirs_rc,
401                base_content: base_rc,
402            };
403            conflicts.push(conflict.clone());
404            audit.push(EntityAudit {
405                name: base_name.to_string(),
406                entity_type: base_entity.map(|e| e.entity_type.clone()).unwrap_or_default(),
407                resolution: ResolutionStrategy::ConflictRenameRename,
408            });
409            let resolution = ResolvedEntity::Conflict(conflict);
410            resolved_entities.insert(entity_id.clone(), resolution.clone());
411            resolved_entities.insert(ours_new_id.clone(), resolution);
412            // Mark theirs renamed ID as Deleted so reconstruct doesn't emit the conflict twice
413            // (once from ours skeleton, once from theirs-only insertion)
414            resolved_entities.insert(theirs_new_id.clone(), ResolvedEntity::Deleted);
415            continue;
416        }
417
418        let in_base = base_entity_map.get(entity_id.as_str());
419        // Follow rename chains: if base entity was renamed in ours/theirs, use renamed version
420        let ours_id = base_to_ours_rename.get(entity_id.as_str()).map(|s| s.as_str()).unwrap_or(entity_id.as_str());
421        let theirs_id = base_to_theirs_rename.get(entity_id.as_str()).map(|s| s.as_str()).unwrap_or(entity_id.as_str());
422        let in_ours = ours_entity_map.get(ours_id).or_else(|| ours_entity_map.get(entity_id.as_str()));
423        let in_theirs = theirs_entity_map.get(theirs_id).or_else(|| theirs_entity_map.get(entity_id.as_str()));
424
425        let ours_change = ours_change_map.get(entity_id);
426        let theirs_change = theirs_change_map.get(entity_id);
427
428        let (resolution, strategy) = resolve_entity(
429            entity_id,
430            in_base,
431            in_ours,
432            in_theirs,
433            ours_change,
434            theirs_change,
435            &base_region_content,
436            &ours_region_content,
437            &theirs_region_content,
438            &base_all,
439            &ours_all,
440            &theirs_all,
441            &mut stats,
442            marker_format,
443        );
444
445        // Build audit entry from entity info
446        let entity_name = in_ours.map(|e| e.name.as_str())
447            .or_else(|| in_theirs.map(|e| e.name.as_str()))
448            .or_else(|| in_base.map(|e| e.name.as_str()))
449            .unwrap_or(entity_id)
450            .to_string();
451        let entity_type = in_ours.map(|e| e.entity_type.as_str())
452            .or_else(|| in_theirs.map(|e| e.entity_type.as_str()))
453            .or_else(|| in_base.map(|e| e.entity_type.as_str()))
454            .unwrap_or("")
455            .to_string();
456        audit.push(EntityAudit {
457            name: entity_name,
458            entity_type,
459            resolution: strategy,
460        });
461
462        match &resolution {
463            ResolvedEntity::Conflict(ref c) => conflicts.push(c.clone()),
464            ResolvedEntity::ScopedConflict { conflict, .. } => conflicts.push(conflict.clone()),
465            _ => {}
466        }
467
468        resolved_entities.insert(entity_id.clone(), resolution.clone());
469        // Also store under renamed IDs so reconstruct can find them
470        if let Some(ours_renamed_id) = base_to_ours_rename.get(entity_id.as_str()) {
471            resolved_entities.insert(ours_renamed_id.clone(), resolution.clone());
472        }
473        if let Some(theirs_renamed_id) = base_to_theirs_rename.get(entity_id.as_str()) {
474            resolved_entities.insert(theirs_renamed_id.clone(), resolution);
475        }
476    }
477
478    // Merge interstitial regions
479    let (merged_interstitials, interstitial_conflicts) =
480        merge_interstitials(&base_regions, &ours_regions, &theirs_regions, marker_format);
481    stats.entities_conflicted += interstitial_conflicts.len();
482    conflicts.extend(interstitial_conflicts);
483
484    // Reconstruct the file
485    let content = reconstruct(
486        &ours_regions,
487        &theirs_regions,
488        &theirs_entities,
489        &ours_entity_map,
490        &resolved_entities,
491        &merged_interstitials,
492        marker_format,
493    );
494
495    // Post-merge cleanup: remove duplicate lines and normalize blank lines
496    let content = post_merge_cleanup(&content);
497
498    // Post-merge parse validation: verify the merged result still parses correctly
499    // (MergeBot-inspired safety check — catch syntactically broken merges)
500    let mut warnings = vec![];
501    if conflicts.is_empty() && stats.entities_both_changed_merged > 0 {
502        let merged_entities = plugin.extract_entities(&content, file_path);
503        if merged_entities.is_empty() && !content.trim().is_empty() {
504            warnings.push(crate::validate::SemanticWarning {
505                entity_name: "(file)".to_string(),
506                entity_type: "file".to_string(),
507                file_path: file_path.to_string(),
508                kind: crate::validate::WarningKind::ParseFailedAfterMerge,
509                related: vec![],
510            });
511        }
512    }
513
514    let entity_result = MergeResult {
515        content,
516        conflicts,
517        warnings,
518        stats: stats.clone(),
519        audit,
520    };
521
522    // Floor: never produce more conflict markers than git merge-file.
523    // Entity merge can split one git conflict into multiple per-entity conflicts,
524    // or interstitial merges can produce conflicts not tracked in the conflicts vec.
525    let entity_markers = entity_result.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
526    if entity_markers > 0 {
527        let git_result = git_merge_file(base, ours, theirs, &mut stats);
528        let git_markers = git_result.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
529        if entity_markers > git_markers {
530            return git_result;
531        }
532    }
533
534    entity_result
535}
536
537fn resolve_entity(
538    _entity_id: &str,
539    in_base: Option<&&SemanticEntity>,
540    in_ours: Option<&&SemanticEntity>,
541    in_theirs: Option<&&SemanticEntity>,
542    _ours_change: Option<&ChangeType>,
543    _theirs_change: Option<&ChangeType>,
544    base_region_content: &HashMap<&str, &str>,
545    ours_region_content: &HashMap<&str, &str>,
546    theirs_region_content: &HashMap<&str, &str>,
547    base_all: &[SemanticEntity],
548    ours_all: &[SemanticEntity],
549    theirs_all: &[SemanticEntity],
550    stats: &mut MergeStats,
551    marker_format: &MarkerFormat,
552) -> (ResolvedEntity, ResolutionStrategy) {
553    // Helper: get region content (from file lines) for an entity, falling back to entity.content
554    let region_content = |entity: &SemanticEntity, map: &HashMap<&str, &str>| -> String {
555        map.get(entity.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| entity.content.clone())
556    };
557
558    match (in_base, in_ours, in_theirs) {
559        // Entity exists in all three versions
560        (Some(base), Some(ours), Some(theirs)) => {
561            // Check modification status via structural hash AND region content.
562            // Region content may differ even when structural hash is the same
563            // (e.g., doc comment added/changed but function body unchanged).
564            let base_rc_lazy = || region_content(base, base_region_content);
565            let ours_rc_lazy = || region_content(ours, ours_region_content);
566            let theirs_rc_lazy = || region_content(theirs, theirs_region_content);
567
568            let ours_modified = ours.content_hash != base.content_hash
569                || ours_rc_lazy() != base_rc_lazy();
570            let theirs_modified = theirs.content_hash != base.content_hash
571                || theirs_rc_lazy() != base_rc_lazy();
572
573            match (ours_modified, theirs_modified) {
574                (false, false) => {
575                    // Neither changed
576                    stats.entities_unchanged += 1;
577                    (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::Unchanged)
578                }
579                (true, false) => {
580                    // Only ours changed
581                    stats.entities_ours_only += 1;
582                    (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::OursOnly)
583                }
584                (false, true) => {
585                    // Only theirs changed
586                    stats.entities_theirs_only += 1;
587                    (ResolvedEntity::Clean(entity_to_region_with_content(theirs, &region_content(theirs, theirs_region_content))), ResolutionStrategy::TheirsOnly)
588                }
589                (true, true) => {
590                    // Both changed — try intra-entity merge
591                    if ours.content_hash == theirs.content_hash {
592                        // Same change in both — take ours
593                        stats.entities_both_changed_merged += 1;
594                        (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::ContentEqual)
595                    } else {
596                        // Try diffy 3-way merge on region content (preserves full syntax)
597                        let base_rc = region_content(base, base_region_content);
598                        let ours_rc = region_content(ours, ours_region_content);
599                        let theirs_rc = region_content(theirs, theirs_region_content);
600
601                        // Whitespace-aware shortcut: if one side only changed
602                        // whitespace/formatting, take the other side's content changes.
603                        // This handles the common case where one agent reformats while
604                        // another makes semantic changes.
605                        if is_whitespace_only_diff(&base_rc, &ours_rc) {
606                            stats.entities_theirs_only += 1;
607                            return (ResolvedEntity::Clean(entity_to_region_with_content(theirs, &theirs_rc)), ResolutionStrategy::TheirsOnly);
608                        }
609                        if is_whitespace_only_diff(&base_rc, &theirs_rc) {
610                            stats.entities_ours_only += 1;
611                            return (ResolvedEntity::Clean(entity_to_region_with_content(ours, &ours_rc)), ResolutionStrategy::OursOnly);
612                        }
613
614                        match diffy_merge(&base_rc, &ours_rc, &theirs_rc) {
615                            Some(merged) => {
616                                stats.entities_both_changed_merged += 1;
617                                stats.resolved_via_diffy += 1;
618                                (ResolvedEntity::Clean(EntityRegion {
619                                    entity_id: ours.id.clone(),
620                                    entity_name: ours.name.clone(),
621                                    entity_type: ours.entity_type.clone(),
622                                    content: merged,
623                                    start_line: ours.start_line,
624                                    end_line: ours.end_line,
625                                }), ResolutionStrategy::DiffyMerged)
626                            }
627                            None => {
628                                // Strategy 1: decorator/annotation-aware merge
629                                // Decorators are unordered annotations — merge them commutatively
630                                if let Some(merged) = try_decorator_aware_merge(&base_rc, &ours_rc, &theirs_rc) {
631                                    stats.entities_both_changed_merged += 1;
632                                    stats.resolved_via_diffy += 1;
633                                    return (ResolvedEntity::Clean(EntityRegion {
634                                        entity_id: ours.id.clone(),
635                                        entity_name: ours.name.clone(),
636                                        entity_type: ours.entity_type.clone(),
637                                        content: merged,
638                                        start_line: ours.start_line,
639                                        end_line: ours.end_line,
640                                    }), ResolutionStrategy::DecoratorMerged);
641                                }
642
643                                // Strategy 2: inner entity merge for container types
644                                // (LastMerge insight: class members are unordered children)
645                                if is_container_entity_type(&ours.entity_type) {
646                                    let base_children = in_base
647                                        .map(|b| get_child_entities(b, base_all))
648                                        .unwrap_or_default();
649                                    let ours_children = get_child_entities(ours, ours_all);
650                                    let theirs_children = in_theirs
651                                        .map(|t| get_child_entities(t, theirs_all))
652                                        .unwrap_or_default();
653                                    let base_start = in_base.map(|b| b.start_line).unwrap_or(1);
654                                    let ours_start = ours.start_line;
655                                    let theirs_start = in_theirs.map(|t| t.start_line).unwrap_or(1);
656                                    if let Some(inner) = try_inner_entity_merge(
657                                        &base_rc, &ours_rc, &theirs_rc,
658                                        &base_children, &ours_children, &theirs_children,
659                                        base_start, ours_start, theirs_start,
660                                        marker_format,
661                                    ) {
662                                        if inner.has_conflicts {
663                                            // Inner merge produced per-member conflicts:
664                                            // content has scoped markers for just the conflicted
665                                            // members; clean members are merged normally.
666                                            stats.entities_conflicted += 1;
667                                            stats.resolved_via_inner_merge += 1;
668                                            let complexity = classify_conflict(Some(&base_rc), Some(&ours_rc), Some(&theirs_rc));
669                                            return (ResolvedEntity::ScopedConflict {
670                                                content: inner.content,
671                                                conflict: EntityConflict {
672                                                    entity_name: ours.name.clone(),
673                                                    entity_type: ours.entity_type.clone(),
674                                                    kind: ConflictKind::BothModified,
675                                                    complexity,
676                                                    ours_content: Some(ours_rc),
677                                                    theirs_content: Some(theirs_rc),
678                                                    base_content: Some(base_rc),
679                                                },
680                                            }, ResolutionStrategy::InnerMerged);
681                                        } else {
682                                            stats.entities_both_changed_merged += 1;
683                                            stats.resolved_via_inner_merge += 1;
684                                            return (ResolvedEntity::Clean(EntityRegion {
685                                                entity_id: ours.id.clone(),
686                                                entity_name: ours.name.clone(),
687                                                entity_type: ours.entity_type.clone(),
688                                                content: inner.content,
689                                                start_line: ours.start_line,
690                                                end_line: ours.end_line,
691                                            }), ResolutionStrategy::InnerMerged);
692                                        }
693                                    }
694                                }
695                                stats.entities_conflicted += 1;
696                                let complexity = classify_conflict(Some(&base_rc), Some(&ours_rc), Some(&theirs_rc));
697                                (ResolvedEntity::Conflict(EntityConflict {
698                                    entity_name: ours.name.clone(),
699                                    entity_type: ours.entity_type.clone(),
700                                    kind: ConflictKind::BothModified,
701                                    complexity,
702                                    ours_content: Some(ours_rc),
703                                    theirs_content: Some(theirs_rc),
704                                    base_content: Some(base_rc),
705                                }), ResolutionStrategy::ConflictBothModified)
706                            }
707                        }
708                    }
709                }
710            }
711        }
712
713        // Entity in base and ours, but not theirs → theirs deleted it
714        (Some(_base), Some(ours), None) => {
715            let ours_modified = ours.content_hash != _base.content_hash;
716            if ours_modified {
717                // Modify/delete conflict
718                stats.entities_conflicted += 1;
719                let ours_rc = region_content(ours, ours_region_content);
720                let base_rc = region_content(_base, base_region_content);
721                let complexity = classify_conflict(Some(&base_rc), Some(&ours_rc), None);
722                (ResolvedEntity::Conflict(EntityConflict {
723                    entity_name: ours.name.clone(),
724                    entity_type: ours.entity_type.clone(),
725                    kind: ConflictKind::ModifyDelete {
726                        modified_in_ours: true,
727                    },
728                    complexity,
729                    ours_content: Some(ours_rc),
730                    theirs_content: None,
731                    base_content: Some(base_rc),
732                }), ResolutionStrategy::ConflictModifyDelete)
733            } else {
734                // Theirs deleted, ours unchanged → accept deletion
735                stats.entities_deleted += 1;
736                (ResolvedEntity::Deleted, ResolutionStrategy::Deleted)
737            }
738        }
739
740        // Entity in base and theirs, but not ours → ours deleted it
741        (Some(_base), None, Some(theirs)) => {
742            let theirs_modified = theirs.content_hash != _base.content_hash;
743            if theirs_modified {
744                // Modify/delete conflict
745                stats.entities_conflicted += 1;
746                let theirs_rc = region_content(theirs, theirs_region_content);
747                let base_rc = region_content(_base, base_region_content);
748                let complexity = classify_conflict(Some(&base_rc), None, Some(&theirs_rc));
749                (ResolvedEntity::Conflict(EntityConflict {
750                    entity_name: theirs.name.clone(),
751                    entity_type: theirs.entity_type.clone(),
752                    kind: ConflictKind::ModifyDelete {
753                        modified_in_ours: false,
754                    },
755                    complexity,
756                    ours_content: None,
757                    theirs_content: Some(theirs_rc),
758                    base_content: Some(base_rc),
759                }), ResolutionStrategy::ConflictModifyDelete)
760            } else {
761                // Ours deleted, theirs unchanged → accept deletion
762                stats.entities_deleted += 1;
763                (ResolvedEntity::Deleted, ResolutionStrategy::Deleted)
764            }
765        }
766
767        // Entity only in ours (added by ours)
768        (None, Some(ours), None) => {
769            stats.entities_added_ours += 1;
770            (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::AddedOurs)
771        }
772
773        // Entity only in theirs (added by theirs)
774        (None, None, Some(theirs)) => {
775            stats.entities_added_theirs += 1;
776            (ResolvedEntity::Clean(entity_to_region_with_content(theirs, &region_content(theirs, theirs_region_content))), ResolutionStrategy::AddedTheirs)
777        }
778
779        // Entity in both ours and theirs but not base (both added)
780        (None, Some(ours), Some(theirs)) => {
781            if ours.content_hash == theirs.content_hash {
782                // Same content added by both → take ours
783                stats.entities_added_ours += 1;
784                (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::ContentEqual)
785            } else {
786                // Different content → conflict
787                stats.entities_conflicted += 1;
788                let ours_rc = region_content(ours, ours_region_content);
789                let theirs_rc = region_content(theirs, theirs_region_content);
790                let complexity = classify_conflict(None, Some(&ours_rc), Some(&theirs_rc));
791                (ResolvedEntity::Conflict(EntityConflict {
792                    entity_name: ours.name.clone(),
793                    entity_type: ours.entity_type.clone(),
794                    kind: ConflictKind::BothAdded,
795                    complexity,
796                    ours_content: Some(ours_rc),
797                    theirs_content: Some(theirs_rc),
798                    base_content: None,
799                }), ResolutionStrategy::ConflictBothAdded)
800            }
801        }
802
803        // Entity only in base (deleted by both)
804        (Some(_), None, None) => {
805            stats.entities_deleted += 1;
806            (ResolvedEntity::Deleted, ResolutionStrategy::Deleted)
807        }
808
809        // Should not happen
810        (None, None, None) => (ResolvedEntity::Deleted, ResolutionStrategy::Deleted),
811    }
812}
813
814fn entity_to_region_with_content(entity: &SemanticEntity, content: &str) -> EntityRegion {
815    EntityRegion {
816        entity_id: entity.id.clone(),
817        entity_name: entity.name.clone(),
818        entity_type: entity.entity_type.clone(),
819        content: content.to_string(),
820        start_line: entity.start_line,
821        end_line: entity.end_line,
822    }
823}
824
825/// Build a map from entity_id to region content (from file lines).
826/// This preserves surrounding syntax (like `export`) that sem-core's entity.content may strip.
827/// Returns borrowed references since regions live for the merge duration.
828fn build_region_content_map(regions: &[FileRegion]) -> HashMap<&str, &str> {
829    regions
830        .iter()
831        .filter_map(|r| match r {
832            FileRegion::Entity(e) => Some((e.entity_id.as_str(), e.content.as_str())),
833            _ => None,
834        })
835        .collect()
836}
837
838/// Check if the only differences between two strings are whitespace changes.
839/// This includes: indentation changes, trailing whitespace, blank line additions/removals.
840fn is_whitespace_only_diff(a: &str, b: &str) -> bool {
841    if a == b {
842        return true; // identical, not really a "whitespace-only diff" but safe
843    }
844    let a_normalized: Vec<&str> = a.lines().map(|l| l.trim()).filter(|l| !l.is_empty()).collect();
845    let b_normalized: Vec<&str> = b.lines().map(|l| l.trim()).filter(|l| !l.is_empty()).collect();
846    a_normalized == b_normalized
847}
848
849/// Check if a line is a decorator or annotation.
850/// Covers Python (@decorator), Java/TS (@Annotation), and comment-style annotations.
851fn is_decorator_line(line: &str) -> bool {
852    let trimmed = line.trim();
853    trimmed.starts_with('@')
854        && !trimmed.starts_with("@param")
855        && !trimmed.starts_with("@return")
856        && !trimmed.starts_with("@type")
857        && !trimmed.starts_with("@see")
858}
859
860/// Split content into (decorators, body) where decorators are leading @-prefixed lines.
861fn split_decorators(content: &str) -> (Vec<&str>, &str) {
862    let mut decorator_end = 0;
863    let mut byte_offset = 0;
864    for line in content.lines() {
865        if is_decorator_line(line) || line.trim().is_empty() {
866            decorator_end += 1;
867            byte_offset += line.len() + 1; // +1 for newline
868        } else {
869            break;
870        }
871    }
872    // Trim trailing empty lines from decorator section
873    let lines: Vec<&str> = content.lines().collect();
874    while decorator_end > 0 && lines.get(decorator_end - 1).map_or(false, |l| l.trim().is_empty()) {
875        byte_offset -= lines[decorator_end - 1].len() + 1;
876        decorator_end -= 1;
877    }
878    let decorators: Vec<&str> = lines[..decorator_end]
879        .iter()
880        .filter(|l| is_decorator_line(l))
881        .copied()
882        .collect();
883    let body = &content[byte_offset.min(content.len())..];
884    (decorators, body)
885}
886
887/// Try decorator-aware merge: when both sides add different decorators/annotations,
888/// merge them commutatively (like imports). Also try merging the bodies separately.
889///
890/// This handles the common pattern where one agent adds @cache and another adds @deprecated
891/// to the same function — they should both be preserved.
892fn try_decorator_aware_merge(base: &str, ours: &str, theirs: &str) -> Option<String> {
893    let (base_decorators, base_body) = split_decorators(base);
894    let (ours_decorators, ours_body) = split_decorators(ours);
895    let (theirs_decorators, theirs_body) = split_decorators(theirs);
896
897    // Only useful if at least one side has decorators
898    if ours_decorators.is_empty() && theirs_decorators.is_empty() {
899        return None;
900    }
901
902    // Merge bodies using diffy (or take unchanged side)
903    let merged_body = if base_body == ours_body && base_body == theirs_body {
904        base_body.to_string()
905    } else if base_body == ours_body {
906        theirs_body.to_string()
907    } else if base_body == theirs_body {
908        ours_body.to_string()
909    } else {
910        // Both changed body — try diffy on just the body
911        diffy_merge(base_body, ours_body, theirs_body)?
912    };
913
914    // Merge decorators commutatively (set union)
915    let base_set: HashSet<&str> = base_decorators.iter().copied().collect();
916    let ours_set: HashSet<&str> = ours_decorators.iter().copied().collect();
917    let theirs_set: HashSet<&str> = theirs_decorators.iter().copied().collect();
918
919    // Deletions
920    let ours_deleted: HashSet<&str> = base_set.difference(&ours_set).copied().collect();
921    let theirs_deleted: HashSet<&str> = base_set.difference(&theirs_set).copied().collect();
922
923    // Start with base decorators, remove deletions
924    let mut merged_decorators: Vec<&str> = base_decorators
925        .iter()
926        .filter(|d| !ours_deleted.contains(**d) && !theirs_deleted.contains(**d))
927        .copied()
928        .collect();
929
930    // Add new decorators from ours (not in base)
931    for d in &ours_decorators {
932        if !base_set.contains(d) && !merged_decorators.contains(d) {
933            merged_decorators.push(d);
934        }
935    }
936    // Add new decorators from theirs (not in base, not already added)
937    for d in &theirs_decorators {
938        if !base_set.contains(d) && !merged_decorators.contains(d) {
939            merged_decorators.push(d);
940        }
941    }
942
943    // Reconstruct
944    let mut result = String::new();
945    for d in &merged_decorators {
946        result.push_str(d);
947        result.push('\n');
948    }
949    result.push_str(&merged_body);
950
951    Some(result)
952}
953
954/// Try 3-way merge on text using diffy. Returns None if there are conflicts.
955fn diffy_merge(base: &str, ours: &str, theirs: &str) -> Option<String> {
956    let result = diffy::merge(base, ours, theirs);
957    match result {
958        Ok(merged) => Some(merged),
959        Err(_conflicted) => None,
960    }
961}
962
963/// Try 3-way merge using git merge-file. Returns None on conflict or error.
964/// This uses a different diff algorithm than diffy and can sometimes merge
965/// cases that diffy cannot (and vice versa).
966fn git_merge_string(base: &str, ours: &str, theirs: &str) -> Option<String> {
967    let dir = tempfile::tempdir().ok()?;
968    let base_path = dir.path().join("base");
969    let ours_path = dir.path().join("ours");
970    let theirs_path = dir.path().join("theirs");
971
972    std::fs::write(&base_path, base).ok()?;
973    std::fs::write(&ours_path, ours).ok()?;
974    std::fs::write(&theirs_path, theirs).ok()?;
975
976    let output = Command::new("git")
977        .arg("merge-file")
978        .arg("-p")
979        .arg(&ours_path)
980        .arg(&base_path)
981        .arg(&theirs_path)
982        .output()
983        .ok()?;
984
985    if output.status.success() {
986        String::from_utf8(output.stdout).ok()
987    } else {
988        None
989    }
990}
991
992/// Merge interstitial regions from all three versions.
993/// Uses commutative (set-based) merge for import blocks — inspired by
994/// LastMerge/Mergiraf's "unordered children" concept.
995/// Falls back to line-level 3-way merge for non-import content.
996fn merge_interstitials(
997    base_regions: &[FileRegion],
998    ours_regions: &[FileRegion],
999    theirs_regions: &[FileRegion],
1000    marker_format: &MarkerFormat,
1001) -> (HashMap<String, String>, Vec<EntityConflict>) {
1002    let base_map: HashMap<&str, &str> = base_regions
1003        .iter()
1004        .filter_map(|r| match r {
1005            FileRegion::Interstitial(i) => Some((i.position_key.as_str(), i.content.as_str())),
1006            _ => None,
1007        })
1008        .collect();
1009
1010    let ours_map: HashMap<&str, &str> = ours_regions
1011        .iter()
1012        .filter_map(|r| match r {
1013            FileRegion::Interstitial(i) => Some((i.position_key.as_str(), i.content.as_str())),
1014            _ => None,
1015        })
1016        .collect();
1017
1018    let theirs_map: HashMap<&str, &str> = theirs_regions
1019        .iter()
1020        .filter_map(|r| match r {
1021            FileRegion::Interstitial(i) => Some((i.position_key.as_str(), i.content.as_str())),
1022            _ => None,
1023        })
1024        .collect();
1025
1026    let mut all_keys: HashSet<&str> = HashSet::new();
1027    all_keys.extend(base_map.keys());
1028    all_keys.extend(ours_map.keys());
1029    all_keys.extend(theirs_map.keys());
1030
1031    let mut merged: HashMap<String, String> = HashMap::new();
1032    let mut interstitial_conflicts: Vec<EntityConflict> = Vec::new();
1033
1034    for key in all_keys {
1035        let base_content = base_map.get(key).copied().unwrap_or("");
1036        let ours_content = ours_map.get(key).copied().unwrap_or("");
1037        let theirs_content = theirs_map.get(key).copied().unwrap_or("");
1038
1039        // If all same, no merge needed
1040        if ours_content == theirs_content {
1041            merged.insert(key.to_string(), ours_content.to_string());
1042        } else if base_content == ours_content {
1043            merged.insert(key.to_string(), theirs_content.to_string());
1044        } else if base_content == theirs_content {
1045            merged.insert(key.to_string(), ours_content.to_string());
1046        } else {
1047            // Both changed — check if this is an import-heavy region
1048            if is_import_region(base_content)
1049                || is_import_region(ours_content)
1050                || is_import_region(theirs_content)
1051            {
1052                // Commutative merge: treat import lines as a set
1053                let result = merge_imports_commutatively(base_content, ours_content, theirs_content);
1054                merged.insert(key.to_string(), result);
1055            } else {
1056                // Regular line-level merge
1057                match diffy::merge(base_content, ours_content, theirs_content) {
1058                    Ok(m) => {
1059                        merged.insert(key.to_string(), m);
1060                    }
1061                    Err(_conflicted) => {
1062                        // Create a proper conflict instead of silently embedding
1063                        // raw conflict markers into the output.
1064                        let complexity = classify_conflict(
1065                            Some(base_content),
1066                            Some(ours_content),
1067                            Some(theirs_content),
1068                        );
1069                        let conflict = EntityConflict {
1070                            entity_name: key.to_string(),
1071                            entity_type: "interstitial".to_string(),
1072                            kind: ConflictKind::BothModified,
1073                            complexity,
1074                            ours_content: Some(ours_content.to_string()),
1075                            theirs_content: Some(theirs_content.to_string()),
1076                            base_content: Some(base_content.to_string()),
1077                        };
1078                        merged.insert(key.to_string(), conflict.to_conflict_markers(marker_format));
1079                        interstitial_conflicts.push(conflict);
1080                    }
1081                }
1082            }
1083        }
1084    }
1085
1086    (merged, interstitial_conflicts)
1087}
1088
1089/// Check if a region is predominantly import/use statements.
1090/// Handles both single-line imports and multi-line import blocks
1091/// (e.g. `import { type a, type b } from "..."` spread across lines).
1092fn is_import_region(content: &str) -> bool {
1093    let lines: Vec<&str> = content
1094        .lines()
1095        .filter(|l| !l.trim().is_empty())
1096        .collect();
1097    if lines.is_empty() {
1098        return false;
1099    }
1100    let mut import_count = 0;
1101    let mut in_multiline_import = false;
1102    for line in &lines {
1103        if in_multiline_import {
1104            import_count += 1;
1105            let trimmed = line.trim();
1106            if trimmed.starts_with('}') || trimmed.ends_with(')') {
1107                in_multiline_import = false;
1108            }
1109        } else if is_import_line(line) {
1110            import_count += 1;
1111            let trimmed = line.trim();
1112            // Detect start of multi-line import: `import {` or `import (` without closing on same line
1113            if (trimmed.contains('{') && !trimmed.contains('}'))
1114                || (trimmed.starts_with("import (") && !trimmed.contains(')'))
1115            {
1116                in_multiline_import = true;
1117            }
1118        }
1119    }
1120    // If >50% of non-empty lines are imports, treat as import region
1121    import_count * 2 > lines.len()
1122}
1123
1124/// Post-merge cleanup: remove consecutive duplicate lines and normalize blank lines.
1125///
1126/// Fixes two classes of merge artifacts:
1127/// 1. Duplicate lines/blocks that appear when both sides add the same content
1128///    (e.g. duplicate typedefs, forward declarations)
1129/// 2. Missing blank lines between entities or declarations, and excessive
1130///    blank lines (3+ consecutive) collapsed to 2
1131fn post_merge_cleanup(content: &str) -> String {
1132    let lines: Vec<&str> = content.lines().collect();
1133    let mut result: Vec<&str> = Vec::with_capacity(lines.len());
1134
1135    // Pass 1: Remove consecutive duplicate lines that look like declarations or imports.
1136    // Only dedup lines that are plausibly merge artifacts (imports, exports, forward decls).
1137    // Preserve intentional duplicates like repeated assertions, assignments, or data lines.
1138    for line in &lines {
1139        if line.trim().is_empty() {
1140            result.push(line);
1141            continue;
1142        }
1143        if let Some(prev) = result.last() {
1144            if !prev.trim().is_empty() && *prev == *line && looks_like_declaration(line) {
1145                continue; // skip consecutive exact duplicate of declaration-like line
1146            }
1147        }
1148        result.push(line);
1149    }
1150
1151    // Pass 2: Collapse 3+ consecutive blank lines to 2 (one separator blank line).
1152    let mut final_lines: Vec<&str> = Vec::with_capacity(result.len());
1153    let mut consecutive_blanks = 0;
1154    for line in &result {
1155        if line.trim().is_empty() {
1156            consecutive_blanks += 1;
1157            if consecutive_blanks <= 2 {
1158                final_lines.push(line);
1159            }
1160        } else {
1161            consecutive_blanks = 0;
1162            final_lines.push(line);
1163        }
1164    }
1165
1166    let mut out = final_lines.join("\n");
1167    if content.ends_with('\n') && !out.ends_with('\n') {
1168        out.push('\n');
1169    }
1170    out
1171}
1172
1173/// Check if a line looks like a declaration/import that merge might duplicate.
1174/// Returns false for lines that could be intentionally repeated (assertions,
1175/// assignments, data initializers, struct fields, etc.).
1176fn looks_like_declaration(line: &str) -> bool {
1177    let trimmed = line.trim();
1178    trimmed.starts_with("import ")
1179        || trimmed.starts_with("from ")
1180        || trimmed.starts_with("use ")
1181        || trimmed.starts_with("export ")
1182        || trimmed.starts_with("require(")
1183        || trimmed.starts_with("#include")
1184        || trimmed.starts_with("typedef ")
1185        || trimmed.starts_with("using ")
1186        || (trimmed.starts_with("pub ") && trimmed.contains("mod "))
1187}
1188
1189/// Check if a line is a top-level import/use/require statement.
1190///
1191/// Only matches unindented lines to avoid picking up conditional imports
1192/// inside `if TYPE_CHECKING:` blocks or similar constructs.
1193fn is_import_line(line: &str) -> bool {
1194    // Skip indented lines: these are inside conditional blocks (TYPE_CHECKING, etc.)
1195    if line.starts_with(' ') || line.starts_with('\t') {
1196        return false;
1197    }
1198    let trimmed = line.trim();
1199    trimmed.starts_with("import ")
1200        || trimmed.starts_with("from ")
1201        || trimmed.starts_with("use ")
1202        || trimmed.starts_with("require(")
1203        || trimmed.starts_with("const ") && trimmed.contains("require(")
1204        || trimmed.starts_with("package ")
1205        || trimmed.starts_with("#include ")
1206        || trimmed.starts_with("using ")
1207}
1208
1209/// A complete import statement (possibly multi-line) as a single unit.
1210#[derive(Debug, Clone)]
1211struct ImportStatement {
1212    /// The full text of the import (may span multiple lines)
1213    lines: Vec<String>,
1214    /// The source module (e.g. "./foo", "react", "std::io")
1215    source: String,
1216    /// For multi-line imports: the individual specifiers (e.g. ["type a", "type b"])
1217    specifiers: Vec<String>,
1218    /// Whether this is a multi-line import block
1219    is_multiline: bool,
1220}
1221
1222/// Parse content into import statements, handling multi-line imports as single units.
1223fn parse_import_statements(content: &str) -> (Vec<ImportStatement>, Vec<String>) {
1224    let mut imports: Vec<ImportStatement> = Vec::new();
1225    let mut non_import_lines: Vec<String> = Vec::new();
1226    let lines: Vec<&str> = content.lines().collect();
1227    let mut i = 0;
1228
1229    while i < lines.len() {
1230        let line = lines[i];
1231
1232        if line.trim().is_empty() {
1233            non_import_lines.push(line.to_string());
1234            i += 1;
1235            continue;
1236        }
1237
1238        if is_import_line(line) {
1239            let trimmed = line.trim();
1240            // Check for multi-line import: `import {` without `}` on same line
1241            let starts_multiline = (trimmed.contains('{') && !trimmed.contains('}'))
1242                || (trimmed.starts_with("import (") && !trimmed.contains(')'));
1243
1244            if starts_multiline {
1245                let mut block_lines = vec![line.to_string()];
1246                let mut specifiers = Vec::new();
1247                let close_char = if trimmed.contains('{') { '}' } else { ')' };
1248                i += 1;
1249
1250                // Collect lines until closing brace/paren
1251                while i < lines.len() {
1252                    let inner = lines[i];
1253                    block_lines.push(inner.to_string());
1254                    let inner_trimmed = inner.trim();
1255
1256                    if inner_trimmed.starts_with(close_char) {
1257                        // This is the closing line (e.g. `} from "./foo"`)
1258                        break;
1259                    } else if !inner_trimmed.is_empty() {
1260                        // This is a specifier line — strip trailing comma
1261                        let spec = inner_trimmed.trim_end_matches(',').trim().to_string();
1262                        if !spec.is_empty() {
1263                            specifiers.push(spec);
1264                        }
1265                    }
1266                    i += 1;
1267                }
1268
1269                let full_text = block_lines.join("\n");
1270                let source = import_source_prefix(&full_text).to_string();
1271                imports.push(ImportStatement {
1272                    lines: block_lines,
1273                    source,
1274                    specifiers,
1275                    is_multiline: true,
1276                });
1277            } else {
1278                // Single-line import
1279                let source = import_source_prefix(line).to_string();
1280                imports.push(ImportStatement {
1281                    lines: vec![line.to_string()],
1282                    source,
1283                    specifiers: Vec::new(),
1284                    is_multiline: false,
1285                });
1286            }
1287        } else {
1288            non_import_lines.push(line.to_string());
1289        }
1290        i += 1;
1291    }
1292
1293    (imports, non_import_lines)
1294}
1295
1296/// Merge import blocks commutatively (as unordered sets), preserving grouping.
1297///
1298/// Handles both single-line imports and multi-line import blocks.
1299/// For multi-line imports from the same source, merges specifiers as a set.
1300/// Single-line imports are merged as before: set union with deletions.
1301fn merge_imports_commutatively(base: &str, ours: &str, theirs: &str) -> String {
1302    let (base_imports, _) = parse_import_statements(base);
1303    let (ours_imports, _) = parse_import_statements(ours);
1304    let (theirs_imports, _) = parse_import_statements(theirs);
1305
1306    let has_multiline = base_imports.iter().any(|i| i.is_multiline)
1307        || ours_imports.iter().any(|i| i.is_multiline)
1308        || theirs_imports.iter().any(|i| i.is_multiline);
1309
1310    if has_multiline {
1311        return merge_imports_with_multiline(base, ours, theirs,
1312            &base_imports, &ours_imports, &theirs_imports);
1313    }
1314
1315    // Original single-line-only logic
1316    let base_lines: HashSet<&str> = base.lines().filter(|l| is_import_line(l)).collect();
1317    let ours_lines: HashSet<&str> = ours.lines().filter(|l| is_import_line(l)).collect();
1318
1319    let theirs_deleted: HashSet<&str> = base_lines.difference(
1320        &theirs.lines().filter(|l| is_import_line(l)).collect::<HashSet<&str>>()
1321    ).copied().collect();
1322
1323    let theirs_added: Vec<&str> = theirs
1324        .lines()
1325        .filter(|l| is_import_line(l) && !base_lines.contains(l) && !ours_lines.contains(l))
1326        .collect();
1327
1328    let mut groups: Vec<Vec<&str>> = Vec::new();
1329    let mut current_group: Vec<&str> = Vec::new();
1330
1331    for line in ours.lines() {
1332        if line.trim().is_empty() {
1333            if !current_group.is_empty() {
1334                groups.push(current_group);
1335                current_group = Vec::new();
1336            }
1337        } else if is_import_line(line) {
1338            if theirs_deleted.contains(line) {
1339                continue;
1340            }
1341            current_group.push(line);
1342        } else {
1343            current_group.push(line);
1344        }
1345    }
1346    if !current_group.is_empty() {
1347        groups.push(current_group);
1348    }
1349
1350    for add in &theirs_added {
1351        let prefix = import_source_prefix(add);
1352        let mut best_group = if groups.is_empty() { 0 } else { groups.len() - 1 };
1353        for (i, group) in groups.iter().enumerate() {
1354            if group.iter().any(|l| {
1355                is_import_line(l) && import_source_prefix(l) == prefix
1356            }) {
1357                best_group = i;
1358                break;
1359            }
1360        }
1361        if best_group < groups.len() {
1362            groups[best_group].push(add);
1363        } else {
1364            groups.push(vec![add]);
1365        }
1366    }
1367
1368    let mut result_lines: Vec<&str> = Vec::new();
1369    for (i, group) in groups.iter().enumerate() {
1370        if i > 0 {
1371            result_lines.push("");
1372        }
1373        result_lines.extend(group);
1374    }
1375
1376    let mut result = result_lines.join("\n");
1377    let ours_trailing = ours.len() - ours.trim_end_matches('\n').len();
1378    let result_trailing = result.len() - result.trim_end_matches('\n').len();
1379    for _ in result_trailing..ours_trailing {
1380        result.push('\n');
1381    }
1382    result
1383}
1384
1385/// Merge imports when multi-line import blocks are involved.
1386/// Matches imports by source module, merges specifiers as a set.
1387fn merge_imports_with_multiline(
1388    _base_raw: &str,
1389    ours_raw: &str,
1390    _theirs_raw: &str,
1391    base_imports: &[ImportStatement],
1392    ours_imports: &[ImportStatement],
1393    theirs_imports: &[ImportStatement],
1394) -> String {
1395    // Build source → specifier sets for base and theirs
1396    let base_specs: HashMap<&str, HashSet<&str>> = base_imports.iter().map(|imp| {
1397        let specs: HashSet<&str> = imp.specifiers.iter().map(|s| s.as_str()).collect();
1398        (imp.source.as_str(), specs)
1399    }).collect();
1400
1401    let theirs_specs: HashMap<&str, HashSet<&str>> = theirs_imports.iter().map(|imp| {
1402        let specs: HashSet<&str> = imp.specifiers.iter().map(|s| s.as_str()).collect();
1403        (imp.source.as_str(), specs)
1404    }).collect();
1405
1406    // Single-line import tracking: base lines and theirs-deleted
1407    let base_single: HashSet<String> = base_imports.iter()
1408        .filter(|i| !i.is_multiline)
1409        .map(|i| i.lines[0].clone())
1410        .collect();
1411    let theirs_single: HashSet<String> = theirs_imports.iter()
1412        .filter(|i| !i.is_multiline)
1413        .map(|i| i.lines[0].clone())
1414        .collect();
1415    let theirs_deleted_single: HashSet<&str> = base_single.iter()
1416        .filter(|l| !theirs_single.contains(l.as_str()))
1417        .map(|l| l.as_str())
1418        .collect();
1419
1420    // Process ours imports, merging in theirs specifiers
1421    let mut result_parts: Vec<String> = Vec::new();
1422    let mut handled_theirs_sources: HashSet<&str> = HashSet::new();
1423
1424    // Walk through ours_raw to preserve formatting (blank lines, comments)
1425    let lines: Vec<&str> = ours_raw.lines().collect();
1426    let mut i = 0;
1427    let mut ours_imp_idx = 0;
1428
1429    while i < lines.len() {
1430        let line = lines[i];
1431
1432        if line.trim().is_empty() {
1433            result_parts.push(line.to_string());
1434            i += 1;
1435            continue;
1436        }
1437
1438        if is_import_line(line) {
1439            let trimmed = line.trim();
1440            let starts_multiline = (trimmed.contains('{') && !trimmed.contains('}'))
1441                || (trimmed.starts_with("import (") && !trimmed.contains(')'));
1442
1443            if starts_multiline && ours_imp_idx < ours_imports.len() {
1444                let imp = &ours_imports[ours_imp_idx];
1445                // Find the matching import by source
1446                let source = imp.source.as_str();
1447                handled_theirs_sources.insert(source);
1448
1449                // Merge specifiers: ours + theirs additions - theirs deletions
1450                let base_spec_set = base_specs.get(source).cloned().unwrap_or_default();
1451                let theirs_spec_set = theirs_specs.get(source).cloned().unwrap_or_default();
1452                // Added by theirs: in theirs but not in base
1453                let theirs_added: HashSet<&str> = theirs_spec_set.difference(&base_spec_set).copied().collect();
1454                // Deleted by theirs: in base but not in theirs
1455                let theirs_removed: HashSet<&str> = base_spec_set.difference(&theirs_spec_set).copied().collect();
1456
1457                // Final set: ours (in original order) + theirs_added - theirs_removed
1458                let mut final_specs: Vec<&str> = imp.specifiers.iter()
1459                    .map(|s| s.as_str())
1460                    .filter(|s| !theirs_removed.contains(s))
1461                    .collect();
1462                for added in &theirs_added {
1463                    if !final_specs.contains(added) {
1464                        final_specs.push(added);
1465                    }
1466                }
1467
1468                // Detect indentation from the original block
1469                let indent = if imp.lines.len() > 1 {
1470                    let second = &imp.lines[1];
1471                    &second[..second.len() - second.trim_start().len()]
1472                } else {
1473                    "     "
1474                };
1475
1476                // Reconstruct multi-line import
1477                result_parts.push(imp.lines[0].clone()); // `import {`
1478                for spec in &final_specs {
1479                    result_parts.push(format!("{}{},", indent, spec));
1480                }
1481                // Closing line from ours
1482                if let Some(last) = imp.lines.last() {
1483                    result_parts.push(last.clone());
1484                }
1485
1486                // Skip past the original multi-line block in ours_raw
1487                let close_char = if trimmed.contains('{') { '}' } else { ')' };
1488                i += 1;
1489                while i < lines.len() {
1490                    if lines[i].trim().starts_with(close_char) {
1491                        i += 1;
1492                        break;
1493                    }
1494                    i += 1;
1495                }
1496                ours_imp_idx += 1;
1497                continue;
1498            } else {
1499                // Single-line import
1500                if ours_imp_idx < ours_imports.len() {
1501                    let imp = &ours_imports[ours_imp_idx];
1502                    handled_theirs_sources.insert(imp.source.as_str());
1503                    ours_imp_idx += 1;
1504                }
1505                // Check if theirs deleted this single-line import
1506                if !theirs_deleted_single.contains(line) {
1507                    result_parts.push(line.to_string());
1508                }
1509            }
1510        } else {
1511            result_parts.push(line.to_string());
1512        }
1513        i += 1;
1514    }
1515
1516    // Add any new imports from theirs that have new sources
1517    for imp in theirs_imports {
1518        if handled_theirs_sources.contains(imp.source.as_str()) {
1519            continue;
1520        }
1521        // Check if this source exists in base (if so, it was handled above)
1522        if base_specs.contains_key(imp.source.as_str()) {
1523            continue;
1524        }
1525        // Truly new import from theirs
1526        for line in &imp.lines {
1527            result_parts.push(line.clone());
1528        }
1529    }
1530
1531    let mut result = result_parts.join("\n");
1532    let ours_trailing = ours_raw.len() - ours_raw.trim_end_matches('\n').len();
1533    let result_trailing = result.len() - result.trim_end_matches('\n').len();
1534    for _ in result_trailing..ours_trailing {
1535        result.push('\n');
1536    }
1537    result
1538}
1539
1540/// Extract the source/module prefix from an import line for group matching.
1541/// e.g. "from collections import OrderedDict" -> "collections"
1542///      "import React from 'react'" -> "react"
1543///      "use std::collections::HashMap;" -> "std::collections"
1544fn import_source_prefix(line: &str) -> &str {
1545    // For multi-line imports, search all lines for the source module
1546    // (e.g. `} from "./foo"` on the closing line)
1547    for l in line.lines() {
1548        let trimmed = l.trim();
1549        // Python: "from X import Y" -> X
1550        if let Some(rest) = trimmed.strip_prefix("from ") {
1551            return rest.split_whitespace().next().unwrap_or("");
1552        }
1553        // JS/TS closing line: `} from 'Y'` or `} from "Y"`
1554        if trimmed.starts_with('}') && trimmed.contains("from ") {
1555            if let Some(quote_start) = trimmed.find(|c: char| c == '\'' || c == '"') {
1556                let after = &trimmed[quote_start + 1..];
1557                if let Some(quote_end) = after.find(|c: char| c == '\'' || c == '"') {
1558                    return &after[..quote_end];
1559                }
1560            }
1561        }
1562        // JS/TS: "import X from 'Y'" -> Y (between quotes)
1563        if trimmed.starts_with("import ") {
1564            if let Some(quote_start) = trimmed.find(|c: char| c == '\'' || c == '"') {
1565                let after = &trimmed[quote_start + 1..];
1566                if let Some(quote_end) = after.find(|c: char| c == '\'' || c == '"') {
1567                    return &after[..quote_end];
1568                }
1569            }
1570        }
1571        // Rust: "use X::Y;" -> X
1572        if let Some(rest) = trimmed.strip_prefix("use ") {
1573            return rest.split("::").next().unwrap_or("").trim_end_matches(';');
1574        }
1575    }
1576    line.trim()
1577}
1578
1579/// Fallback to line-level 3-way merge when entity extraction isn't possible.
1580///
1581/// Uses Sesame-inspired separator preprocessing (arXiv:2407.18888) to get
1582/// finer-grained alignment before line-level merge. Inserts newlines around
1583/// syntactic separators ({, }, ;) so that changes in different code blocks
1584/// align independently, reducing spurious conflicts.
1585///
1586/// Sesame expansion is skipped for data formats (JSON, YAML, TOML, lock files)
1587/// where `{`, `}`, `;` are structural content rather than code separators.
1588/// Expanding them destroys alignment and produces far more conflicts (confirmed
1589/// on GitButler: YAML went from 68 git markers to 192 weave markers with Sesame).
1590fn line_level_fallback(base: &str, ours: &str, theirs: &str, file_path: &str) -> MergeResult {
1591    let mut stats = MergeStats::default();
1592    stats.used_fallback = true;
1593
1594    // Skip Sesame preprocessing for data formats where {/}/; are content, not separators
1595    let skip = skip_sesame(file_path);
1596
1597    if skip {
1598        // Use git merge-file for data formats so we match git's output exactly.
1599        // diffy::merge uses a different diff algorithm that can produce more
1600        // conflict markers on structured data like lock files.
1601        return git_merge_file(base, ours, theirs, &mut stats);
1602    }
1603
1604    // Try Sesame expansion + diffy first, then compare against git merge-file.
1605    // Use whichever produces fewer conflict markers so we're never worse than git.
1606    let base_expanded = expand_separators(base);
1607    let ours_expanded = expand_separators(ours);
1608    let theirs_expanded = expand_separators(theirs);
1609
1610    let sesame_result = match diffy::merge(&base_expanded, &ours_expanded, &theirs_expanded) {
1611        Ok(merged) => {
1612            let content = collapse_separators(&merged, base);
1613            Some(MergeResult {
1614                content: post_merge_cleanup(&content),
1615                conflicts: vec![],
1616                warnings: vec![],
1617                stats: stats.clone(),
1618                audit: vec![],
1619            })
1620        }
1621        Err(_) => {
1622            // Sesame expansion conflicted, try plain diffy
1623            match diffy::merge(base, ours, theirs) {
1624                Ok(merged) => Some(MergeResult {
1625                    content: merged,
1626                    conflicts: vec![],
1627                    warnings: vec![],
1628                    stats: stats.clone(),
1629                    audit: vec![],
1630                }),
1631                Err(conflicted) => {
1632                    let _markers = conflicted.lines().filter(|l| l.starts_with("<<<<<<<")).count();
1633                    let mut s = stats.clone();
1634                    s.entities_conflicted = 1;
1635                    Some(MergeResult {
1636                        content: conflicted,
1637                        conflicts: vec![EntityConflict {
1638                            entity_name: "(file)".to_string(),
1639                            entity_type: "file".to_string(),
1640                            kind: ConflictKind::BothModified,
1641                            complexity: classify_conflict(Some(base), Some(ours), Some(theirs)),
1642                            ours_content: Some(ours.to_string()),
1643                            theirs_content: Some(theirs.to_string()),
1644                            base_content: Some(base.to_string()),
1645                        }],
1646                        warnings: vec![],
1647                        stats: s,
1648                        audit: vec![],
1649                    })
1650                }
1651            }
1652        }
1653    };
1654
1655    // Get git merge-file result as our floor
1656    let git_result = git_merge_file(base, ours, theirs, &mut stats);
1657
1658    // Compare: use sesame result only if it has fewer or equal markers
1659    match sesame_result {
1660        Some(sesame) if sesame.conflicts.is_empty() && !git_result.conflicts.is_empty() => {
1661            // Sesame resolved cleanly, git didn't: use sesame
1662            sesame
1663        }
1664        Some(sesame) if !sesame.conflicts.is_empty() && !git_result.conflicts.is_empty() => {
1665            // Both conflicted: use whichever has fewer markers
1666            let sesame_markers = sesame.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
1667            let git_markers = git_result.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
1668            if sesame_markers <= git_markers { sesame } else { git_result }
1669        }
1670        _ => git_result,
1671    }
1672}
1673
1674/// Shell out to `git merge-file` for an exact match with git's line-level merge.
1675///
1676/// We use this instead of `diffy::merge` for data formats (lock files, JSON, YAML, TOML)
1677/// where weave can't improve on git. `diffy` uses a different diff algorithm that can
1678/// produce more conflict markers on structured data (e.g. 22 markers vs git's 19 on uv.lock).
1679fn git_merge_file(base: &str, ours: &str, theirs: &str, stats: &mut MergeStats) -> MergeResult {
1680    let dir = match tempfile::tempdir() {
1681        Ok(d) => d,
1682        Err(_) => return diffy_fallback(base, ours, theirs, stats),
1683    };
1684
1685    let base_path = dir.path().join("base");
1686    let ours_path = dir.path().join("ours");
1687    let theirs_path = dir.path().join("theirs");
1688
1689    let write_ok = (|| -> std::io::Result<()> {
1690        std::fs::File::create(&base_path)?.write_all(base.as_bytes())?;
1691        std::fs::File::create(&ours_path)?.write_all(ours.as_bytes())?;
1692        std::fs::File::create(&theirs_path)?.write_all(theirs.as_bytes())?;
1693        Ok(())
1694    })();
1695
1696    if write_ok.is_err() {
1697        return diffy_fallback(base, ours, theirs, stats);
1698    }
1699
1700    // git merge-file writes result to the first file (ours) in place
1701    let output = Command::new("git")
1702        .arg("merge-file")
1703        .arg("-p") // print to stdout instead of modifying ours in place
1704        .arg("--diff3") // include ||||||| base section for jj compatibility
1705        .arg("-L").arg("ours")
1706        .arg("-L").arg("base")
1707        .arg("-L").arg("theirs")
1708        .arg(&ours_path)
1709        .arg(&base_path)
1710        .arg(&theirs_path)
1711        .output();
1712
1713    match output {
1714        Ok(result) => {
1715            let content = String::from_utf8_lossy(&result.stdout).into_owned();
1716            if result.status.success() {
1717                // Exit 0 = clean merge
1718                MergeResult {
1719                    content: post_merge_cleanup(&content),
1720                    conflicts: vec![],
1721                    warnings: vec![],
1722                    stats: stats.clone(),
1723                    audit: vec![],
1724                }
1725            } else {
1726                // Exit >0 = conflicts (exit code = number of conflicts)
1727                stats.entities_conflicted = 1;
1728                MergeResult {
1729                    content,
1730                    conflicts: vec![EntityConflict {
1731                        entity_name: "(file)".to_string(),
1732                        entity_type: "file".to_string(),
1733                        kind: ConflictKind::BothModified,
1734                        complexity: classify_conflict(Some(base), Some(ours), Some(theirs)),
1735                        ours_content: Some(ours.to_string()),
1736                        theirs_content: Some(theirs.to_string()),
1737                        base_content: Some(base.to_string()),
1738                    }],
1739                    warnings: vec![],
1740                    stats: stats.clone(),
1741                    audit: vec![],
1742                }
1743            }
1744        }
1745        // git not available, fall back to diffy
1746        Err(_) => diffy_fallback(base, ours, theirs, stats),
1747    }
1748}
1749
1750/// Fallback to diffy::merge when git merge-file is unavailable.
1751fn diffy_fallback(base: &str, ours: &str, theirs: &str, stats: &mut MergeStats) -> MergeResult {
1752    match diffy::merge(base, ours, theirs) {
1753        Ok(merged) => {
1754            let content = post_merge_cleanup(&merged);
1755            MergeResult {
1756                content,
1757                conflicts: vec![],
1758                warnings: vec![],
1759                stats: stats.clone(),
1760                audit: vec![],
1761            }
1762        }
1763        Err(conflicted) => {
1764            stats.entities_conflicted = 1;
1765            MergeResult {
1766                content: conflicted,
1767                conflicts: vec![EntityConflict {
1768                    entity_name: "(file)".to_string(),
1769                    entity_type: "file".to_string(),
1770                    kind: ConflictKind::BothModified,
1771                    complexity: classify_conflict(Some(base), Some(ours), Some(theirs)),
1772                    ours_content: Some(ours.to_string()),
1773                    theirs_content: Some(theirs.to_string()),
1774                    base_content: Some(base.to_string()),
1775                }],
1776                warnings: vec![],
1777                stats: stats.clone(),
1778                audit: vec![],
1779            }
1780        }
1781    }
1782}
1783
1784/// Filter out entities that are nested inside other entities.
1785///
1786/// When a class contains methods which contain local variables, sem-core may extract
1787/// all of them as entities. But for merge purposes, nested entities are part of their
1788/// parent — we handle them via inner entity merge. Keeping them causes false conflicts
1789/// (e.g. two methods both declaring `const user` would appear as BothAdded).
1790/// Check if entity list has too many duplicate names, which causes matching to hang.
1791fn has_excessive_duplicates(entities: &[SemanticEntity]) -> bool {
1792    let threshold = std::env::var("WEAVE_MAX_DUPLICATES")
1793        .ok()
1794        .and_then(|v| v.parse::<usize>().ok())
1795        .unwrap_or(10);
1796    let mut counts: HashMap<&str, usize> = HashMap::new();
1797    for e in entities {
1798        *counts.entry(&e.name).or_default() += 1;
1799    }
1800    counts.values().any(|&c| c >= threshold)
1801}
1802
1803/// Filter out entities that are nested inside other entities.
1804/// O(n log n) via sort + stack, replacing the previous O(n^2) approach.
1805fn filter_nested_entities(mut entities: Vec<SemanticEntity>) -> Vec<SemanticEntity> {
1806    if entities.len() <= 1 {
1807        return entities;
1808    }
1809
1810    // Sort by start_line ASC, then by end_line DESC (widest span first).
1811    // A parent entity always appears before its children in this order.
1812    entities.sort_by(|a, b| {
1813        a.start_line.cmp(&b.start_line).then(b.end_line.cmp(&a.end_line))
1814    });
1815
1816    // Stack-based filter: track the end_line of the current outermost entity.
1817    let mut result: Vec<SemanticEntity> = Vec::with_capacity(entities.len());
1818    let mut max_end: usize = 0;
1819
1820    for entity in entities {
1821        if entity.start_line > max_end || max_end == 0 {
1822            // Not nested: new top-level entity
1823            max_end = entity.end_line;
1824            result.push(entity);
1825        } else if entity.start_line == result.last().map_or(0, |e| e.start_line)
1826            && entity.end_line == result.last().map_or(0, |e| e.end_line)
1827        {
1828            // Exact same span (e.g. decorated_definition wrapping function_definition)
1829            result.push(entity);
1830        }
1831        // else: strictly nested, skip
1832    }
1833
1834    result
1835}
1836
1837/// Get child entities of a parent, sorted by start line.
1838fn get_child_entities<'a>(
1839    parent: &SemanticEntity,
1840    all_entities: &'a [SemanticEntity],
1841) -> Vec<&'a SemanticEntity> {
1842    let mut children: Vec<&SemanticEntity> = all_entities
1843        .iter()
1844        .filter(|e| e.parent_id.as_deref() == Some(&parent.id))
1845        .collect();
1846    children.sort_by_key(|e| e.start_line);
1847    children
1848}
1849
1850/// Compute a body hash for rename detection: the entity content with the entity
1851/// name replaced at word boundaries by a placeholder, so entities with identical
1852/// bodies but different names produce the same hash.
1853///
1854/// Uses word-boundary matching to avoid partial replacements (e.g. replacing
1855/// "get" inside "getAll"). Works across all languages since it operates on
1856/// the content string, not language-specific AST features.
1857fn body_hash(entity: &SemanticEntity) -> u64 {
1858    use std::collections::hash_map::DefaultHasher;
1859    use std::hash::{Hash, Hasher};
1860    let normalized = replace_at_word_boundaries(&entity.content, &entity.name, "__ENTITY__");
1861    let mut hasher = DefaultHasher::new();
1862    normalized.hash(&mut hasher);
1863    hasher.finish()
1864}
1865
1866/// Replace `needle` with `replacement` only at word boundaries.
1867/// A word boundary means the character before/after the match is not
1868/// alphanumeric or underscore (i.e. not an identifier character).
1869fn replace_at_word_boundaries(content: &str, needle: &str, replacement: &str) -> String {
1870    if needle.is_empty() {
1871        return content.to_string();
1872    }
1873    let bytes = content.as_bytes();
1874    let mut result = String::with_capacity(content.len());
1875    let mut i = 0;
1876    while i < content.len() {
1877        if content.is_char_boundary(i) && content[i..].starts_with(needle) {
1878            let before_ok = i == 0 || {
1879                let prev_idx = content[..i]
1880                    .char_indices()
1881                    .next_back()
1882                    .map(|(idx, _)| idx)
1883                    .unwrap_or(0);
1884                !is_ident_char(bytes[prev_idx])
1885            };
1886            let after_idx = i + needle.len();
1887            let after_ok = after_idx >= content.len()
1888                || (content.is_char_boundary(after_idx)
1889                    && !is_ident_char(bytes[after_idx]));
1890            if before_ok && after_ok {
1891                result.push_str(replacement);
1892                i += needle.len();
1893                continue;
1894            }
1895        }
1896        if content.is_char_boundary(i) {
1897            let ch = content[i..].chars().next().unwrap();
1898            result.push(ch);
1899            i += ch.len_utf8();
1900        } else {
1901            i += 1;
1902        }
1903    }
1904    result
1905}
1906
1907fn is_ident_char(b: u8) -> bool {
1908    b.is_ascii_alphanumeric() || b == b'_'
1909}
1910
1911/// Build a rename map from new_id → base_id using confidence-scored matching.
1912///
1913/// Detects when an entity in the branch has the same body as an entity
1914/// in base but a different name/ID, indicating it was renamed.
1915/// Uses body_hash (name-stripped content hash) and structural_hash with
1916/// confidence scoring to resolve ambiguous matches correctly.
1917fn build_rename_map(
1918    base_entities: &[SemanticEntity],
1919    branch_entities: &[SemanticEntity],
1920) -> HashMap<String, String> {
1921    let mut rename_map: HashMap<String, String> = HashMap::new();
1922
1923    let base_ids: HashSet<&str> = base_entities.iter().map(|e| e.id.as_str()).collect();
1924
1925    // Build body_hash → base entities (multiple can have same hash)
1926    let mut base_by_body: HashMap<u64, Vec<&SemanticEntity>> = HashMap::new();
1927    for entity in base_entities {
1928        base_by_body.entry(body_hash(entity)).or_default().push(entity);
1929    }
1930
1931    // Also keep structural_hash index as fallback
1932    let mut base_by_structural: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
1933    for entity in base_entities {
1934        if let Some(ref sh) = entity.structural_hash {
1935            base_by_structural.entry(sh.as_str()).or_default().push(entity);
1936        }
1937    }
1938
1939    // Collect all candidate (branch_entity, base_entity, confidence) triples
1940    struct RenameCandidate<'a> {
1941        branch: &'a SemanticEntity,
1942        base: &'a SemanticEntity,
1943        confidence: f64,
1944    }
1945    let mut candidates: Vec<RenameCandidate> = Vec::new();
1946
1947    for branch_entity in branch_entities {
1948        if base_ids.contains(branch_entity.id.as_str()) {
1949            continue;
1950        }
1951
1952        let bh = body_hash(branch_entity);
1953
1954        // Body hash matches
1955        if let Some(base_entities_for_hash) = base_by_body.get(&bh) {
1956            for &base_entity in base_entities_for_hash {
1957                let same_type = base_entity.entity_type == branch_entity.entity_type;
1958                let same_parent = base_entity.parent_id == branch_entity.parent_id;
1959                let confidence = match (same_type, same_parent) {
1960                    (true, true) => 0.95,
1961                    (true, false) => 0.8,
1962                    (false, _) => 0.6,
1963                };
1964                candidates.push(RenameCandidate { branch: branch_entity, base: base_entity, confidence });
1965            }
1966        }
1967
1968        // Structural hash fallback (lower confidence)
1969        if let Some(ref sh) = branch_entity.structural_hash {
1970            if let Some(base_entities_for_sh) = base_by_structural.get(sh.as_str()) {
1971                for &base_entity in base_entities_for_sh {
1972                    // Skip if already covered by body hash match
1973                    if candidates.iter().any(|c| c.branch.id == branch_entity.id && c.base.id == base_entity.id) {
1974                        continue;
1975                    }
1976                    candidates.push(RenameCandidate { branch: branch_entity, base: base_entity, confidence: 0.6 });
1977                }
1978            }
1979        }
1980    }
1981
1982    // Sort by confidence descending, assign greedily
1983    candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
1984
1985    let mut used_base_ids: HashSet<String> = HashSet::new();
1986    let mut used_branch_ids: HashSet<String> = HashSet::new();
1987
1988    for candidate in &candidates {
1989        if candidate.confidence < 0.6 {
1990            break;
1991        }
1992        if used_base_ids.contains(&candidate.base.id) || used_branch_ids.contains(&candidate.branch.id) {
1993            continue;
1994        }
1995        // Don't rename if the base entity's ID still exists in branch (it wasn't actually renamed)
1996        let base_id_in_branch = branch_entities.iter().any(|e| e.id == candidate.base.id);
1997        if base_id_in_branch {
1998            continue;
1999        }
2000        rename_map.insert(candidate.branch.id.clone(), candidate.base.id.clone());
2001        used_base_ids.insert(candidate.base.id.clone());
2002        used_branch_ids.insert(candidate.branch.id.clone());
2003    }
2004
2005    rename_map
2006}
2007
2008/// Check if an entity type is a container that may benefit from inner entity merge.
2009fn is_container_entity_type(entity_type: &str) -> bool {
2010    matches!(
2011        entity_type,
2012        "class" | "interface" | "enum" | "impl" | "trait" | "module" | "impl_item" | "trait_item"
2013            | "struct" | "union" | "namespace" | "struct_item" | "struct_specifier"
2014            | "variable" | "export"
2015    )
2016}
2017
2018/// A named member chunk extracted from a class/container body.
2019#[derive(Debug, Clone)]
2020struct MemberChunk {
2021    /// The member name (method name, field name, etc.)
2022    name: String,
2023    /// Full content of the member including its body
2024    content: String,
2025}
2026
2027/// Result of an inner entity merge attempt.
2028struct InnerMergeResult {
2029    /// Merged content (may contain per-member conflict markers)
2030    content: String,
2031    /// Whether any members had conflicts
2032    has_conflicts: bool,
2033}
2034
2035/// Convert sem-core child entities to MemberChunks for inner merge.
2036///
2037/// Uses child entity line positions to extract content from the container text,
2038/// including any leading decorators/annotations that tree-sitter attaches as
2039/// sibling nodes rather than part of the method node.
2040fn children_to_chunks(
2041    children: &[&SemanticEntity],
2042    container_content: &str,
2043    container_start_line: usize,
2044) -> Vec<MemberChunk> {
2045    if children.is_empty() {
2046        return Vec::new();
2047    }
2048
2049    let lines: Vec<&str> = container_content.lines().collect();
2050    let mut chunks = Vec::new();
2051
2052    for (i, child) in children.iter().enumerate() {
2053        let child_start_idx = child.start_line.saturating_sub(container_start_line);
2054        // +1 because end_line is inclusive but we need an exclusive upper bound for slicing
2055        let child_end_idx = child.end_line.saturating_sub(container_start_line) + 1;
2056
2057        if child_end_idx > lines.len() + 1 || child_start_idx >= lines.len() {
2058            // Position out of range, fall back to entity content
2059            chunks.push(MemberChunk {
2060                name: child.name.clone(),
2061                content: child.content.clone(),
2062            });
2063            continue;
2064        }
2065        let child_end_idx = child_end_idx.min(lines.len());
2066
2067        // Determine the earliest line we can claim (after previous child's end, or body start)
2068        let floor = if i > 0 {
2069            children[i - 1].end_line.saturating_sub(container_start_line) + 1
2070        } else {
2071            // First child: start after the container header line (the `{` or `:` line)
2072            // Find the line containing `{` or ending with `:`
2073            let header_end = lines
2074                .iter()
2075                .position(|l| l.contains('{') || l.trim().ends_with(':'))
2076                .map(|p| p + 1)
2077                .unwrap_or(0);
2078            header_end
2079        };
2080
2081        // Scan backwards from child_start_idx to include decorators/annotations/comments
2082        let mut content_start = child_start_idx;
2083        while content_start > floor {
2084            let prev = content_start - 1;
2085            let trimmed = lines[prev].trim();
2086            if trimmed.starts_with('@')
2087                || trimmed.starts_with("#[")
2088                || trimmed.starts_with("//")
2089                || trimmed.starts_with("///")
2090                || trimmed.starts_with("/**")
2091                || trimmed.starts_with("* ")
2092                || trimmed == "*/"
2093            {
2094                content_start = prev;
2095            } else if trimmed.is_empty() && content_start > floor + 1 {
2096                // Allow one blank line between decorator and method
2097                content_start = prev;
2098            } else {
2099                break;
2100            }
2101        }
2102
2103        // Skip leading blank lines
2104        while content_start < child_start_idx && lines[content_start].trim().is_empty() {
2105            content_start += 1;
2106        }
2107
2108        let chunk_content: String = lines[content_start..child_end_idx].join("\n");
2109        chunks.push(MemberChunk {
2110            name: child.name.clone(),
2111            content: chunk_content,
2112        });
2113    }
2114
2115    chunks
2116}
2117
2118/// Generate a scoped conflict marker for a single member within a container merge.
2119fn scoped_conflict_marker(
2120    name: &str,
2121    base: Option<&str>,
2122    ours: Option<&str>,
2123    theirs: Option<&str>,
2124    ours_deleted: bool,
2125    theirs_deleted: bool,
2126    fmt: &MarkerFormat,
2127) -> String {
2128    let open = "<".repeat(fmt.marker_length);
2129    let sep = "=".repeat(fmt.marker_length);
2130    let close = ">".repeat(fmt.marker_length);
2131
2132    let o = ours.unwrap_or("");
2133    let t = theirs.unwrap_or("");
2134
2135    // Narrow conflict markers to just the differing lines
2136    let ours_lines: Vec<&str> = o.lines().collect();
2137    let theirs_lines: Vec<&str> = t.lines().collect();
2138    let (prefix_len, suffix_len) = if ours.is_some() && theirs.is_some() {
2139        crate::conflict::narrow_conflict_lines(&ours_lines, &theirs_lines)
2140    } else {
2141        (0, 0)
2142    };
2143    let has_narrowing = prefix_len > 0 || suffix_len > 0;
2144    let ours_mid = &ours_lines[prefix_len..ours_lines.len() - suffix_len];
2145    let theirs_mid = &theirs_lines[prefix_len..theirs_lines.len() - suffix_len];
2146
2147    let mut out = String::new();
2148
2149    // Emit common prefix as clean text
2150    if has_narrowing {
2151        for line in &ours_lines[..prefix_len] {
2152            out.push_str(line);
2153            out.push('\n');
2154        }
2155    }
2156
2157    // Opening marker
2158    if fmt.enhanced {
2159        if ours_deleted {
2160            out.push_str(&format!("{} ours ({} deleted)\n", open, name));
2161        } else {
2162            out.push_str(&format!("{} ours ({})\n", open, name));
2163        }
2164    } else {
2165        out.push_str(&format!("{} ours\n", open));
2166    }
2167
2168    // Ours content (narrowed or full)
2169    if ours.is_some() {
2170        if has_narrowing {
2171            for line in ours_mid {
2172                out.push_str(line);
2173                out.push('\n');
2174            }
2175        } else {
2176            out.push_str(o);
2177            if !o.ends_with('\n') {
2178                out.push('\n');
2179            }
2180        }
2181    }
2182
2183    // Base section for diff3 format (standard mode only)
2184    if !fmt.enhanced {
2185        let base_marker = "|".repeat(fmt.marker_length);
2186        out.push_str(&format!("{} base\n", base_marker));
2187        let b = base.unwrap_or("");
2188        if has_narrowing {
2189            let base_lines: Vec<&str> = b.lines().collect();
2190            let base_prefix = prefix_len.min(base_lines.len());
2191            let base_suffix = suffix_len.min(base_lines.len().saturating_sub(base_prefix));
2192            for line in &base_lines[base_prefix..base_lines.len() - base_suffix] {
2193                out.push_str(line);
2194                out.push('\n');
2195            }
2196        } else {
2197            out.push_str(b);
2198            if !b.is_empty() && !b.ends_with('\n') {
2199                out.push('\n');
2200            }
2201        }
2202    }
2203
2204    // Separator
2205    out.push_str(&format!("{}\n", sep));
2206
2207    // Theirs content (narrowed or full)
2208    if theirs.is_some() {
2209        if has_narrowing {
2210            for line in theirs_mid {
2211                out.push_str(line);
2212                out.push('\n');
2213            }
2214        } else {
2215            out.push_str(t);
2216            if !t.ends_with('\n') {
2217                out.push('\n');
2218            }
2219        }
2220    }
2221
2222    // Closing marker
2223    if fmt.enhanced {
2224        if theirs_deleted {
2225            out.push_str(&format!("{} theirs ({} deleted)\n", close, name));
2226        } else {
2227            out.push_str(&format!("{} theirs ({})\n", close, name));
2228        }
2229    } else {
2230        out.push_str(&format!("{} theirs\n", close));
2231    }
2232
2233    // Emit common suffix as clean text
2234    if has_narrowing {
2235        for line in &ours_lines[ours_lines.len() - suffix_len..] {
2236            out.push_str(line);
2237            out.push('\n');
2238        }
2239    }
2240
2241    out
2242}
2243
2244/// Try recursive inner entity merge for container types (classes, impls, etc.).
2245///
2246/// Inspired by LastMerge (arXiv:2507.19687): class members are "unordered children" —
2247/// reordering them is not a conflict. We chunk the class body into members, match by
2248/// name, and merge each member independently.
2249///
2250/// Returns Some(result) if chunking succeeded, None if we can't parse the container.
2251/// The result may contain per-member conflict markers (scoped conflicts).
2252fn try_inner_entity_merge(
2253    base: &str,
2254    ours: &str,
2255    theirs: &str,
2256    base_children: &[&SemanticEntity],
2257    ours_children: &[&SemanticEntity],
2258    theirs_children: &[&SemanticEntity],
2259    base_start_line: usize,
2260    ours_start_line: usize,
2261    theirs_start_line: usize,
2262    marker_format: &MarkerFormat,
2263) -> Option<InnerMergeResult> {
2264    // Try sem-core child entities first (tree-sitter-accurate boundaries),
2265    // fall back to indentation heuristic if children aren't available.
2266    // When children_to_chunks produces chunks, try indentation as a fallback
2267    // if the tree-sitter chunks lead to conflicts (the indentation heuristic
2268    // can include trailing context that helps diffy merge adjacent changes).
2269    let use_children = !ours_children.is_empty() || !theirs_children.is_empty();
2270    let (base_chunks, ours_chunks, theirs_chunks) = if use_children {
2271        (
2272            children_to_chunks(base_children, base, base_start_line),
2273            children_to_chunks(ours_children, ours, ours_start_line),
2274            children_to_chunks(theirs_children, theirs, theirs_start_line),
2275        )
2276    } else {
2277        (
2278            extract_member_chunks(base)?,
2279            extract_member_chunks(ours)?,
2280            extract_member_chunks(theirs)?,
2281        )
2282    };
2283
2284    // Need at least 1 member to attempt inner merge
2285    // (Even single-member containers benefit from decorator-aware merge)
2286    if base_chunks.is_empty() && ours_chunks.is_empty() && theirs_chunks.is_empty() {
2287        return None;
2288    }
2289
2290    // Build name → content maps
2291    let base_map: HashMap<&str, &str> = base_chunks
2292        .iter()
2293        .map(|c| (c.name.as_str(), c.content.as_str()))
2294        .collect();
2295    let ours_map: HashMap<&str, &str> = ours_chunks
2296        .iter()
2297        .map(|c| (c.name.as_str(), c.content.as_str()))
2298        .collect();
2299    let theirs_map: HashMap<&str, &str> = theirs_chunks
2300        .iter()
2301        .map(|c| (c.name.as_str(), c.content.as_str()))
2302        .collect();
2303
2304    // Collect all member names
2305    let mut all_names: Vec<String> = Vec::new();
2306    let mut seen: HashSet<String> = HashSet::new();
2307    // Use ours ordering as skeleton
2308    for chunk in &ours_chunks {
2309        if seen.insert(chunk.name.clone()) {
2310            all_names.push(chunk.name.clone());
2311        }
2312    }
2313    // Add theirs-only members
2314    for chunk in &theirs_chunks {
2315        if seen.insert(chunk.name.clone()) {
2316            all_names.push(chunk.name.clone());
2317        }
2318    }
2319
2320    // Extract header/footer (class declaration line and closing brace)
2321    let (ours_header, ours_footer) = extract_container_wrapper(ours)?;
2322
2323    let mut merged_members: Vec<String> = Vec::new();
2324    let mut has_conflict = false;
2325
2326    for name in &all_names {
2327        let in_base = base_map.get(name.as_str());
2328        let in_ours = ours_map.get(name.as_str());
2329        let in_theirs = theirs_map.get(name.as_str());
2330
2331        match (in_base, in_ours, in_theirs) {
2332            // In all three
2333            (Some(b), Some(o), Some(t)) => {
2334                if o == t {
2335                    merged_members.push(o.to_string());
2336                } else if b == o {
2337                    merged_members.push(t.to_string());
2338                } else if b == t {
2339                    merged_members.push(o.to_string());
2340                } else {
2341                    // Both changed differently: try diffy, then git merge-file, then decorator merge
2342                    if let Some(merged) = diffy_merge(b, o, t) {
2343                        merged_members.push(merged);
2344                    } else if let Some(merged) = git_merge_string(b, o, t) {
2345                        merged_members.push(merged);
2346                    } else if let Some(merged) = try_decorator_aware_merge(b, o, t) {
2347                        merged_members.push(merged);
2348                    } else {
2349                        // Emit per-member conflict markers
2350                        has_conflict = true;
2351                        merged_members.push(scoped_conflict_marker(name, Some(b), Some(o), Some(t), false, false, marker_format));
2352                    }
2353                }
2354            }
2355            // Deleted by theirs, ours unchanged or not in base
2356            (Some(b), Some(o), None) => {
2357                if *b == *o {
2358                    // Ours unchanged, theirs deleted → accept deletion
2359                } else {
2360                    // Ours modified, theirs deleted → per-member conflict
2361                    has_conflict = true;
2362                    merged_members.push(scoped_conflict_marker(name, Some(b), Some(o), None, false, true, marker_format));
2363                }
2364            }
2365            // Deleted by ours, theirs unchanged or not in base
2366            (Some(b), None, Some(t)) => {
2367                if *b == *t {
2368                    // Theirs unchanged, ours deleted → accept deletion
2369                } else {
2370                    // Theirs modified, ours deleted → per-member conflict
2371                    has_conflict = true;
2372                    merged_members.push(scoped_conflict_marker(name, Some(b), None, Some(t), true, false, marker_format));
2373                }
2374            }
2375            // Added by ours only
2376            (None, Some(o), None) => {
2377                merged_members.push(o.to_string());
2378            }
2379            // Added by theirs only
2380            (None, None, Some(t)) => {
2381                merged_members.push(t.to_string());
2382            }
2383            // Added by both with different content
2384            (None, Some(o), Some(t)) => {
2385                if o == t {
2386                    merged_members.push(o.to_string());
2387                } else {
2388                    has_conflict = true;
2389                    merged_members.push(scoped_conflict_marker(name, None, Some(o), Some(t), false, false, marker_format));
2390                }
2391            }
2392            // Deleted by both
2393            (Some(_), None, None) => {}
2394            (None, None, None) => {}
2395        }
2396    }
2397
2398    // Reconstruct: header + merged members + footer
2399    let mut result = String::new();
2400    result.push_str(ours_header);
2401    if !ours_header.ends_with('\n') {
2402        result.push('\n');
2403    }
2404
2405    // Detect if members are single-line (fields, variants) vs multi-line (methods)
2406    let has_multiline_members = merged_members.iter().any(|m| m.contains('\n'));
2407
2408    for (i, member) in merged_members.iter().enumerate() {
2409        result.push_str(member);
2410        if !member.ends_with('\n') {
2411            result.push('\n');
2412        }
2413        // Add blank line between multi-line members (methods) but not single-line (fields, variants)
2414        if i < merged_members.len() - 1 && has_multiline_members && !member.ends_with("\n\n") {
2415            result.push('\n');
2416        }
2417    }
2418
2419    result.push_str(ours_footer);
2420    if !ours_footer.ends_with('\n') && ours.ends_with('\n') {
2421        result.push('\n');
2422    }
2423
2424    // If children_to_chunks led to conflicts, retry with indentation heuristic.
2425    // The indentation approach includes trailing blank lines in chunks, giving
2426    // diffy more context to merge adjacent changes from different branches.
2427    if has_conflict && use_children {
2428        if let (Some(bc), Some(oc), Some(tc)) = (
2429            extract_member_chunks(base),
2430            extract_member_chunks(ours),
2431            extract_member_chunks(theirs),
2432        ) {
2433            if !bc.is_empty() || !oc.is_empty() || !tc.is_empty() {
2434                let fallback = try_inner_merge_with_chunks(
2435                    &bc, &oc, &tc, ours, ours_header, ours_footer,
2436                    has_multiline_members, marker_format,
2437                );
2438                if let Some(fb) = fallback {
2439                    if !fb.has_conflicts {
2440                        return Some(fb);
2441                    }
2442                }
2443            }
2444        }
2445    }
2446
2447    Some(InnerMergeResult {
2448        content: result,
2449        has_conflicts: has_conflict,
2450    })
2451}
2452
2453/// Inner merge helper using pre-extracted chunks. Used for indentation-heuristic fallback.
2454fn try_inner_merge_with_chunks(
2455    base_chunks: &[MemberChunk],
2456    ours_chunks: &[MemberChunk],
2457    theirs_chunks: &[MemberChunk],
2458    ours: &str,
2459    ours_header: &str,
2460    ours_footer: &str,
2461    has_multiline_hint: bool,
2462    marker_format: &MarkerFormat,
2463) -> Option<InnerMergeResult> {
2464    let base_map: HashMap<&str, &str> = base_chunks.iter().map(|c| (c.name.as_str(), c.content.as_str())).collect();
2465    let ours_map: HashMap<&str, &str> = ours_chunks.iter().map(|c| (c.name.as_str(), c.content.as_str())).collect();
2466    let theirs_map: HashMap<&str, &str> = theirs_chunks.iter().map(|c| (c.name.as_str(), c.content.as_str())).collect();
2467
2468    let mut all_names: Vec<String> = Vec::new();
2469    let mut seen: HashSet<String> = HashSet::new();
2470    for chunk in ours_chunks {
2471        if seen.insert(chunk.name.clone()) {
2472            all_names.push(chunk.name.clone());
2473        }
2474    }
2475    for chunk in theirs_chunks {
2476        if seen.insert(chunk.name.clone()) {
2477            all_names.push(chunk.name.clone());
2478        }
2479    }
2480
2481    let mut merged_members: Vec<String> = Vec::new();
2482    let mut has_conflict = false;
2483
2484    for name in &all_names {
2485        let in_base = base_map.get(name.as_str());
2486        let in_ours = ours_map.get(name.as_str());
2487        let in_theirs = theirs_map.get(name.as_str());
2488
2489        match (in_base, in_ours, in_theirs) {
2490            (Some(b), Some(o), Some(t)) => {
2491                if o == t {
2492                    merged_members.push(o.to_string());
2493                } else if b == o {
2494                    merged_members.push(t.to_string());
2495                } else if b == t {
2496                    merged_members.push(o.to_string());
2497                } else if let Some(merged) = diffy_merge(b, o, t) {
2498                    merged_members.push(merged);
2499                } else if let Some(merged) = git_merge_string(b, o, t) {
2500                    merged_members.push(merged);
2501                } else {
2502                    has_conflict = true;
2503                    merged_members.push(scoped_conflict_marker(name, Some(b), Some(o), Some(t), false, false, marker_format));
2504                }
2505            }
2506            (Some(b), Some(o), None) => {
2507                if *b != *o { merged_members.push(o.to_string()); }
2508            }
2509            (Some(b), None, Some(t)) => {
2510                if *b != *t { merged_members.push(t.to_string()); }
2511            }
2512            (None, Some(o), None) => merged_members.push(o.to_string()),
2513            (None, None, Some(t)) => merged_members.push(t.to_string()),
2514            (None, Some(o), Some(t)) => {
2515                if o == t {
2516                    merged_members.push(o.to_string());
2517                } else {
2518                    has_conflict = true;
2519                    merged_members.push(scoped_conflict_marker(name, None, Some(o), Some(t), false, false, marker_format));
2520                }
2521            }
2522            (Some(_), None, None) | (None, None, None) => {}
2523        }
2524    }
2525
2526    let has_multiline_members = has_multiline_hint || merged_members.iter().any(|m| m.contains('\n'));
2527    let mut result = String::new();
2528    result.push_str(ours_header);
2529    if !ours_header.ends_with('\n') { result.push('\n'); }
2530    for (i, member) in merged_members.iter().enumerate() {
2531        result.push_str(member);
2532        if !member.ends_with('\n') { result.push('\n'); }
2533        if i < merged_members.len() - 1 && has_multiline_members && !member.ends_with("\n\n") {
2534            result.push('\n');
2535        }
2536    }
2537    result.push_str(ours_footer);
2538    if !ours_footer.ends_with('\n') && ours.ends_with('\n') { result.push('\n'); }
2539
2540    Some(InnerMergeResult {
2541        content: result,
2542        has_conflicts: has_conflict,
2543    })
2544}
2545
2546/// Extract the header (class declaration) and footer (closing brace) from a container.
2547/// Supports both brace-delimited (JS/TS/Java/Rust/C) and indentation-based (Python) containers.
2548fn extract_container_wrapper(content: &str) -> Option<(&str, &str)> {
2549    let lines: Vec<&str> = content.lines().collect();
2550    if lines.len() < 2 {
2551        return None;
2552    }
2553
2554    // Check if this is a Python-style container (ends with `:` instead of `{`)
2555    let is_python_style = lines.iter().any(|l| {
2556        let trimmed = l.trim();
2557        (trimmed.starts_with("class ") || trimmed.starts_with("def "))
2558            && trimmed.ends_with(':')
2559    }) && !lines.iter().any(|l| l.contains('{'));
2560
2561    if is_python_style {
2562        // Python: header is the `class Foo:` line, no footer
2563        let header_end = lines.iter().position(|l| l.trim().ends_with(':'))?;
2564        let header_byte_end: usize = lines[..=header_end]
2565            .iter()
2566            .map(|l| l.len() + 1)
2567            .sum();
2568        let header = &content[..header_byte_end.min(content.len())];
2569        // No closing brace in Python — footer is empty
2570        let footer = &content[content.len()..];
2571        Some((header, footer))
2572    } else {
2573        // Brace-delimited: header up to `{`, footer from last `}`
2574        let header_end = lines.iter().position(|l| l.contains('{'))?;
2575        let header_byte_end = lines[..=header_end]
2576            .iter()
2577            .map(|l| l.len() + 1)
2578            .sum::<usize>();
2579        let header = &content[..header_byte_end.min(content.len())];
2580
2581        let footer_start = lines.iter().rposition(|l| {
2582            let trimmed = l.trim();
2583            trimmed == "}" || trimmed == "};"
2584        })?;
2585
2586        let footer_byte_start: usize = lines[..footer_start]
2587            .iter()
2588            .map(|l| l.len() + 1)
2589            .sum();
2590        let footer = &content[footer_byte_start.min(content.len())..];
2591
2592        Some((header, footer))
2593    }
2594}
2595
2596/// Extract named member chunks from a container body.
2597///
2598/// Identifies member boundaries by indentation: members start at the first
2599/// indentation level inside the container. Each member extends until the next
2600/// member starts or the container closes.
2601fn extract_member_chunks(content: &str) -> Option<Vec<MemberChunk>> {
2602    let lines: Vec<&str> = content.lines().collect();
2603    if lines.len() < 2 {
2604        return None;
2605    }
2606
2607    // Check if Python-style (indentation-based)
2608    let is_python_style = lines.iter().any(|l| {
2609        let trimmed = l.trim();
2610        (trimmed.starts_with("class ") || trimmed.starts_with("def "))
2611            && trimmed.ends_with(':')
2612    }) && !lines.iter().any(|l| l.contains('{'));
2613
2614    // Find the body range
2615    let body_start = if is_python_style {
2616        lines.iter().position(|l| l.trim().ends_with(':'))? + 1
2617    } else {
2618        lines.iter().position(|l| l.contains('{'))? + 1
2619    };
2620    let body_end = if is_python_style {
2621        // Python: body extends to end of content
2622        lines.len()
2623    } else {
2624        lines.iter().rposition(|l| {
2625            let trimmed = l.trim();
2626            trimmed == "}" || trimmed == "};"
2627        })?
2628    };
2629
2630    if body_start >= body_end {
2631        return None;
2632    }
2633
2634    // Determine member indentation level by looking at first non-empty body line
2635    let member_indent = lines[body_start..body_end]
2636        .iter()
2637        .find(|l| !l.trim().is_empty())
2638        .map(|l| l.len() - l.trim_start().len())?;
2639
2640    let mut chunks: Vec<MemberChunk> = Vec::new();
2641    let mut current_chunk_lines: Vec<&str> = Vec::new();
2642    let mut current_name: Option<String> = None;
2643
2644    for line in &lines[body_start..body_end] {
2645        let trimmed = line.trim();
2646        if trimmed.is_empty() {
2647            // Blank lines: if we have a current chunk, include them
2648            if current_name.is_some() {
2649                // Only include if not trailing blanks
2650                current_chunk_lines.push(line);
2651            }
2652            continue;
2653        }
2654
2655        let indent = line.len() - line.trim_start().len();
2656
2657        // Is this a new member declaration at the member indent level?
2658        // Exclude closing braces, comments, and decorators/annotations
2659        if indent == member_indent
2660            && !trimmed.starts_with("//")
2661            && !trimmed.starts_with("/*")
2662            && !trimmed.starts_with("*")
2663            && !trimmed.starts_with("#")
2664            && !trimmed.starts_with("@")
2665            && trimmed != "}"
2666            && trimmed != "};"
2667            && trimmed != ","
2668        {
2669            // Save previous chunk
2670            if let Some(name) = current_name.take() {
2671                // Trim trailing blank lines
2672                while current_chunk_lines.last().map_or(false, |l| l.trim().is_empty()) {
2673                    current_chunk_lines.pop();
2674                }
2675                if !current_chunk_lines.is_empty() {
2676                    chunks.push(MemberChunk {
2677                        name,
2678                        content: current_chunk_lines.join("\n"),
2679                    });
2680                }
2681                current_chunk_lines.clear();
2682            }
2683
2684            // Start new chunk — extract member name
2685            let name = extract_member_name(trimmed);
2686            current_name = Some(name);
2687            current_chunk_lines.push(line);
2688        } else if current_name.is_some() {
2689            // Continuation of current member (body lines, nested blocks)
2690            current_chunk_lines.push(line);
2691        } else {
2692            // Content before first member (decorators, comments for first member)
2693            // Attach to next member
2694            current_chunk_lines.push(line);
2695        }
2696    }
2697
2698    // Save last chunk
2699    if let Some(name) = current_name {
2700        while current_chunk_lines.last().map_or(false, |l| l.trim().is_empty()) {
2701            current_chunk_lines.pop();
2702        }
2703        if !current_chunk_lines.is_empty() {
2704            chunks.push(MemberChunk {
2705                name,
2706                content: current_chunk_lines.join("\n"),
2707            });
2708        }
2709    }
2710
2711    if chunks.is_empty() {
2712        None
2713    } else {
2714        Some(chunks)
2715    }
2716}
2717
2718/// Extract a member name from a declaration line.
2719fn extract_member_name(line: &str) -> String {
2720    let trimmed = line.trim();
2721
2722    // Go method receiver: `func (c *Calculator) Add(` -> skip receiver, find name before second `(`
2723    if trimmed.starts_with("func ") && trimmed.get(5..6) == Some("(") {
2724        // Skip past the receiver: find closing `)`, then extract name before next `(`
2725        if let Some(recv_close) = trimmed.find(')') {
2726            let after_recv = &trimmed[recv_close + 1..];
2727            if let Some(paren_pos) = after_recv.find('(') {
2728                let before = after_recv[..paren_pos].trim();
2729                let name: String = before
2730                    .chars()
2731                    .rev()
2732                    .take_while(|c| c.is_alphanumeric() || *c == '_')
2733                    .collect::<Vec<_>>()
2734                    .into_iter()
2735                    .rev()
2736                    .collect();
2737                if !name.is_empty() {
2738                    return name;
2739                }
2740            }
2741        }
2742    }
2743
2744    // Strategy 1: For method/function declarations with parentheses,
2745    // the name is the identifier immediately before `(`.
2746    // This handles all languages: Java `public int add(`, Rust `pub fn add(`,
2747    // Python `def add(`, TS `async getUser(`, Go `func add(`, etc.
2748    if let Some(paren_pos) = trimmed.find('(') {
2749        let before = trimmed[..paren_pos].trim_end();
2750        let name: String = before
2751            .chars()
2752            .rev()
2753            .take_while(|c| c.is_alphanumeric() || *c == '_')
2754            .collect::<Vec<_>>()
2755            .into_iter()
2756            .rev()
2757            .collect();
2758        if !name.is_empty() {
2759            return name;
2760        }
2761    }
2762
2763    // Strategy 2: For fields/properties/variants without parens,
2764    // strip keywords and take the first identifier.
2765    let mut s = trimmed;
2766    for keyword in &[
2767        "export ", "public ", "private ", "protected ", "static ",
2768        "abstract ", "async ", "override ", "readonly ",
2769        "pub ", "pub(crate) ", "fn ", "def ", "get ", "set ",
2770    ] {
2771        if s.starts_with(keyword) {
2772            s = &s[keyword.len()..];
2773        }
2774    }
2775    if s.starts_with("fn ") {
2776        s = &s[3..];
2777    }
2778
2779    let name: String = s
2780        .chars()
2781        .take_while(|c| c.is_alphanumeric() || *c == '_')
2782        .collect();
2783
2784    if name.is_empty() {
2785        trimmed.chars().take(20).collect()
2786    } else {
2787        name
2788    }
2789}
2790
2791/// Returns true for data/config file formats where Sesame separator expansion
2792/// (`{`, `}`, `;`) is counterproductive because those chars are structural
2793/// content rather than code block separators.
2794///
2795/// Note: template files like .svelte/.vue are NOT included here because their
2796/// embedded `<script>` sections contain real code where Sesame helps.
2797/// Check if content looks binary (contains null bytes in first 8KB).
2798fn is_binary(content: &str) -> bool {
2799    content.as_bytes().iter().take(8192).any(|&b| b == 0)
2800}
2801
2802/// Check if content already contains git conflict markers.
2803/// This happens with AU/AA conflicts where git stores markers in stage blobs.
2804fn has_conflict_markers(content: &str) -> bool {
2805    content.contains("<<<<<<<") && content.contains(">>>>>>>")
2806}
2807
2808fn skip_sesame(file_path: &str) -> bool {
2809    let path_lower = file_path.to_lowercase();
2810    let extensions = [
2811        // Data/config formats
2812        ".json", ".yaml", ".yml", ".toml", ".lock", ".xml", ".csv", ".tsv",
2813        ".ini", ".cfg", ".conf", ".properties", ".env",
2814        // Markup/document formats
2815        ".md", ".markdown", ".txt", ".rst", ".svg", ".html", ".htm",
2816    ];
2817    extensions.iter().any(|ext| path_lower.ends_with(ext))
2818}
2819
2820/// Expand syntactic separators into separate lines for finer merge alignment.
2821/// Inspired by Sesame (arXiv:2407.18888): isolating separators lets line-based
2822/// merge tools see block boundaries as independent change units.
2823/// Uses byte-level iteration since separators ({, }, ;) and string delimiters
2824/// (", ', `) are all ASCII.
2825fn expand_separators(content: &str) -> String {
2826    let bytes = content.as_bytes();
2827    let mut result = Vec::with_capacity(content.len() * 2);
2828    let mut in_string = false;
2829    let mut escape_next = false;
2830    let mut string_char = b'"';
2831
2832    for &b in bytes {
2833        if escape_next {
2834            result.push(b);
2835            escape_next = false;
2836            continue;
2837        }
2838        if b == b'\\' && in_string {
2839            result.push(b);
2840            escape_next = true;
2841            continue;
2842        }
2843        if !in_string && (b == b'"' || b == b'\'' || b == b'`') {
2844            in_string = true;
2845            string_char = b;
2846            result.push(b);
2847            continue;
2848        }
2849        if in_string && b == string_char {
2850            in_string = false;
2851            result.push(b);
2852            continue;
2853        }
2854
2855        if !in_string && (b == b'{' || b == b'}' || b == b';') {
2856            if result.last() != Some(&b'\n') && !result.is_empty() {
2857                result.push(b'\n');
2858            }
2859            result.push(b);
2860            result.push(b'\n');
2861        } else {
2862            result.push(b);
2863        }
2864    }
2865
2866    // Safe: we only inserted ASCII bytes into valid UTF-8 content
2867    unsafe { String::from_utf8_unchecked(result) }
2868}
2869
2870/// Collapse separator expansion back to original formatting.
2871/// Uses the base formatting as a guide where possible.
2872fn collapse_separators(merged: &str, _base: &str) -> String {
2873    // Simple approach: join lines that contain only a separator with adjacent lines
2874    let lines: Vec<&str> = merged.lines().collect();
2875    let mut result = String::new();
2876    let mut i = 0;
2877
2878    while i < lines.len() {
2879        let trimmed = lines[i].trim();
2880        if (trimmed == "{" || trimmed == "}" || trimmed == ";") && trimmed.len() == 1 {
2881            // This is a separator-only line we may have created
2882            // Try to join with previous line if it doesn't end with a separator
2883            if !result.is_empty() && !result.ends_with('\n') {
2884                // Peek: if it's an opening brace, join with previous
2885                if trimmed == "{" {
2886                    result.push(' ');
2887                    result.push_str(trimmed);
2888                    result.push('\n');
2889                } else if trimmed == "}" {
2890                    result.push('\n');
2891                    result.push_str(trimmed);
2892                    result.push('\n');
2893                } else {
2894                    result.push_str(trimmed);
2895                    result.push('\n');
2896                }
2897            } else {
2898                result.push_str(lines[i]);
2899                result.push('\n');
2900            }
2901        } else {
2902            result.push_str(lines[i]);
2903            result.push('\n');
2904        }
2905        i += 1;
2906    }
2907
2908    // Trim any trailing extra newlines to match original style
2909    while result.ends_with("\n\n") {
2910        result.pop();
2911    }
2912
2913    result
2914}
2915
2916#[cfg(test)]
2917mod tests {
2918    use super::*;
2919
2920    #[test]
2921    fn test_replace_at_word_boundaries() {
2922        // Should replace standalone occurrences
2923        assert_eq!(replace_at_word_boundaries("fn get() {}", "get", "__E__"), "fn __E__() {}");
2924        // Should NOT replace inside longer identifiers
2925        assert_eq!(replace_at_word_boundaries("fn getAll() {}", "get", "__E__"), "fn getAll() {}");
2926        assert_eq!(replace_at_word_boundaries("fn _get() {}", "get", "__E__"), "fn _get() {}");
2927        // Should replace multiple standalone occurrences
2928        assert_eq!(
2929            replace_at_word_boundaries("pub enum Source { Source }", "Source", "__E__"),
2930            "pub enum __E__ { __E__ }"
2931        );
2932        // Should not replace substring at start/end of identifiers
2933        assert_eq!(
2934            replace_at_word_boundaries("SourceManager isSource", "Source", "__E__"),
2935            "SourceManager isSource"
2936        );
2937        // Should handle multi-byte UTF-8 characters (emojis) without panicking
2938        assert_eq!(
2939            replace_at_word_boundaries("❌ get ✅", "get", "__E__"),
2940            "❌ __E__ ✅"
2941        );
2942        assert_eq!(
2943            replace_at_word_boundaries("fn 名前() { get }", "get", "__E__"),
2944            "fn 名前() { __E__ }"
2945        );
2946        // Emoji-only content with no needle match should pass through unchanged
2947        assert_eq!(
2948            replace_at_word_boundaries("🎉🚀✨", "get", "__E__"),
2949            "🎉🚀✨"
2950        );
2951    }
2952
2953    #[test]
2954    fn test_fast_path_identical() {
2955        let content = "hello world";
2956        let result = entity_merge(content, content, content, "test.ts");
2957        assert!(result.is_clean());
2958        assert_eq!(result.content, content);
2959    }
2960
2961    #[test]
2962    fn test_fast_path_only_ours_changed() {
2963        let base = "hello";
2964        let ours = "hello world";
2965        let result = entity_merge(base, ours, base, "test.ts");
2966        assert!(result.is_clean());
2967        assert_eq!(result.content, ours);
2968    }
2969
2970    #[test]
2971    fn test_fast_path_only_theirs_changed() {
2972        let base = "hello";
2973        let theirs = "hello world";
2974        let result = entity_merge(base, base, theirs, "test.ts");
2975        assert!(result.is_clean());
2976        assert_eq!(result.content, theirs);
2977    }
2978
2979    #[test]
2980    fn test_different_functions_no_conflict() {
2981        // Core value prop: two agents add different functions to the same file
2982        let base = r#"export function existing() {
2983    return 1;
2984}
2985"#;
2986        let ours = r#"export function existing() {
2987    return 1;
2988}
2989
2990export function agentA() {
2991    return "added by agent A";
2992}
2993"#;
2994        let theirs = r#"export function existing() {
2995    return 1;
2996}
2997
2998export function agentB() {
2999    return "added by agent B";
3000}
3001"#;
3002        let result = entity_merge(base, ours, theirs, "test.ts");
3003        assert!(
3004            result.is_clean(),
3005            "Should auto-resolve: different functions added. Conflicts: {:?}",
3006            result.conflicts
3007        );
3008        assert!(
3009            result.content.contains("agentA"),
3010            "Should contain agentA function"
3011        );
3012        assert!(
3013            result.content.contains("agentB"),
3014            "Should contain agentB function"
3015        );
3016    }
3017
3018    #[test]
3019    fn test_same_function_modified_by_both_conflict() {
3020        let base = r#"export function shared() {
3021    return "original";
3022}
3023"#;
3024        let ours = r#"export function shared() {
3025    return "modified by ours";
3026}
3027"#;
3028        let theirs = r#"export function shared() {
3029    return "modified by theirs";
3030}
3031"#;
3032        let result = entity_merge(base, ours, theirs, "test.ts");
3033        // This should be a conflict since both modified the same function incompatibly
3034        assert!(
3035            !result.is_clean(),
3036            "Should conflict when both modify same function differently"
3037        );
3038        assert_eq!(result.conflicts.len(), 1);
3039        assert_eq!(result.conflicts[0].entity_name, "shared");
3040    }
3041
3042    #[test]
3043    fn test_fallback_for_unknown_filetype() {
3044        // Non-adjacent changes should merge cleanly with line-level merge
3045        let base = "line 1\nline 2\nline 3\nline 4\nline 5\n";
3046        let ours = "line 1 modified\nline 2\nline 3\nline 4\nline 5\n";
3047        let theirs = "line 1\nline 2\nline 3\nline 4\nline 5 modified\n";
3048        let result = entity_merge(base, ours, theirs, "test.xyz");
3049        assert!(
3050            result.is_clean(),
3051            "Non-adjacent changes should merge cleanly. Conflicts: {:?}",
3052            result.conflicts,
3053        );
3054    }
3055
3056    #[test]
3057    fn test_line_level_fallback() {
3058        // Non-adjacent changes merge cleanly in 3-way merge
3059        let base = "a\nb\nc\nd\ne\n";
3060        let ours = "A\nb\nc\nd\ne\n";
3061        let theirs = "a\nb\nc\nd\nE\n";
3062        let result = line_level_fallback(base, ours, theirs, "test.rs");
3063        assert!(result.is_clean());
3064        assert!(result.stats.used_fallback);
3065        assert_eq!(result.content, "A\nb\nc\nd\nE\n");
3066    }
3067
3068    #[test]
3069    fn test_line_level_fallback_conflict() {
3070        // Same line changed differently → conflict
3071        let base = "a\nb\nc\n";
3072        let ours = "X\nb\nc\n";
3073        let theirs = "Y\nb\nc\n";
3074        let result = line_level_fallback(base, ours, theirs, "test.rs");
3075        assert!(!result.is_clean());
3076        assert!(result.stats.used_fallback);
3077    }
3078
3079    #[test]
3080    fn test_expand_separators() {
3081        let code = "function foo() { return 1; }";
3082        let expanded = expand_separators(code);
3083        // Separators should be on their own lines
3084        assert!(expanded.contains("{\n"), "Opening brace should have newline after");
3085        assert!(expanded.contains(";\n"), "Semicolons should have newline after");
3086        assert!(expanded.contains("\n}"), "Closing brace should have newline before");
3087    }
3088
3089    #[test]
3090    fn test_expand_separators_preserves_strings() {
3091        let code = r#"let x = "hello { world };";"#;
3092        let expanded = expand_separators(code);
3093        // Separators inside strings should NOT be expanded
3094        assert!(
3095            expanded.contains("\"hello { world };\""),
3096            "Separators in strings should be preserved: {}",
3097            expanded
3098        );
3099    }
3100
3101    #[test]
3102    fn test_is_import_region() {
3103        assert!(is_import_region("import foo from 'foo';\nimport bar from 'bar';\n"));
3104        assert!(is_import_region("use std::io;\nuse std::fs;\n"));
3105        assert!(!is_import_region("let x = 1;\nlet y = 2;\n"));
3106        // Mixed: 1 import + 2 non-imports → not import region
3107        assert!(!is_import_region("import foo from 'foo';\nlet x = 1;\nlet y = 2;\n"));
3108        // Empty → not import region
3109        assert!(!is_import_region(""));
3110    }
3111
3112    #[test]
3113    fn test_is_import_line() {
3114        // JS/TS
3115        assert!(is_import_line("import foo from 'foo';"));
3116        assert!(is_import_line("import { bar } from 'bar';"));
3117        assert!(is_import_line("from typing import List"));
3118        // Rust
3119        assert!(is_import_line("use std::io::Read;"));
3120        // C/C++
3121        assert!(is_import_line("#include <stdio.h>"));
3122        // Node require
3123        assert!(is_import_line("const fs = require('fs');"));
3124        // Not imports
3125        assert!(!is_import_line("let x = 1;"));
3126        assert!(!is_import_line("function foo() {}"));
3127    }
3128
3129    #[test]
3130    fn test_commutative_import_merge_both_add_different() {
3131        // The key scenario: both branches add different imports
3132        let base = "import a from 'a';\nimport b from 'b';\n";
3133        let ours = "import a from 'a';\nimport b from 'b';\nimport c from 'c';\n";
3134        let theirs = "import a from 'a';\nimport b from 'b';\nimport d from 'd';\n";
3135        let result = merge_imports_commutatively(base, ours, theirs);
3136        assert!(result.contains("import a from 'a';"));
3137        assert!(result.contains("import b from 'b';"));
3138        assert!(result.contains("import c from 'c';"));
3139        assert!(result.contains("import d from 'd';"));
3140    }
3141
3142    #[test]
3143    fn test_commutative_import_merge_one_removes() {
3144        // Ours removes an import, theirs keeps it → removed
3145        let base = "import a from 'a';\nimport b from 'b';\nimport c from 'c';\n";
3146        let ours = "import a from 'a';\nimport c from 'c';\n";
3147        let theirs = "import a from 'a';\nimport b from 'b';\nimport c from 'c';\n";
3148        let result = merge_imports_commutatively(base, ours, theirs);
3149        assert!(result.contains("import a from 'a';"));
3150        assert!(!result.contains("import b from 'b';"), "Removed import should stay removed");
3151        assert!(result.contains("import c from 'c';"));
3152    }
3153
3154    #[test]
3155    fn test_commutative_import_merge_both_add_same() {
3156        // Both add the same import → should appear only once
3157        let base = "import a from 'a';\n";
3158        let ours = "import a from 'a';\nimport b from 'b';\n";
3159        let theirs = "import a from 'a';\nimport b from 'b';\n";
3160        let result = merge_imports_commutatively(base, ours, theirs);
3161        let count = result.matches("import b from 'b';").count();
3162        assert_eq!(count, 1, "Duplicate import should be deduplicated");
3163    }
3164
3165    #[test]
3166    fn test_inner_entity_merge_different_methods() {
3167        // Two agents modify different methods in the same class
3168        // This would normally conflict with diffy because the changes are adjacent
3169        let base = r#"export class Calculator {
3170    add(a: number, b: number): number {
3171        return a + b;
3172    }
3173
3174    subtract(a: number, b: number): number {
3175        return a - b;
3176    }
3177}
3178"#;
3179        let ours = r#"export class Calculator {
3180    add(a: number, b: number): number {
3181        // Added logging
3182        console.log("adding", a, b);
3183        return a + b;
3184    }
3185
3186    subtract(a: number, b: number): number {
3187        return a - b;
3188    }
3189}
3190"#;
3191        let theirs = r#"export class Calculator {
3192    add(a: number, b: number): number {
3193        return a + b;
3194    }
3195
3196    subtract(a: number, b: number): number {
3197        // Added validation
3198        if (b > a) throw new Error("negative");
3199        return a - b;
3200    }
3201}
3202"#;
3203        let result = entity_merge(base, ours, theirs, "test.ts");
3204        assert!(
3205            result.is_clean(),
3206            "Different methods modified should auto-merge via inner entity merge. Conflicts: {:?}",
3207            result.conflicts,
3208        );
3209        assert!(result.content.contains("console.log"), "Should contain ours changes");
3210        assert!(result.content.contains("negative"), "Should contain theirs changes");
3211    }
3212
3213    #[test]
3214    fn test_inner_entity_merge_both_add_different_methods() {
3215        // Both branches add different methods to the same class
3216        let base = r#"export class Calculator {
3217    add(a: number, b: number): number {
3218        return a + b;
3219    }
3220}
3221"#;
3222        let ours = r#"export class Calculator {
3223    add(a: number, b: number): number {
3224        return a + b;
3225    }
3226
3227    multiply(a: number, b: number): number {
3228        return a * b;
3229    }
3230}
3231"#;
3232        let theirs = r#"export class Calculator {
3233    add(a: number, b: number): number {
3234        return a + b;
3235    }
3236
3237    divide(a: number, b: number): number {
3238        return a / b;
3239    }
3240}
3241"#;
3242        let result = entity_merge(base, ours, theirs, "test.ts");
3243        assert!(
3244            result.is_clean(),
3245            "Both adding different methods should auto-merge. Conflicts: {:?}",
3246            result.conflicts,
3247        );
3248        assert!(result.content.contains("multiply"), "Should contain ours's new method");
3249        assert!(result.content.contains("divide"), "Should contain theirs's new method");
3250    }
3251
3252    #[test]
3253    fn test_inner_entity_merge_same_method_modified_still_conflicts() {
3254        // Both modify the same method differently → should still conflict
3255        let base = r#"export class Calculator {
3256    add(a: number, b: number): number {
3257        return a + b;
3258    }
3259
3260    subtract(a: number, b: number): number {
3261        return a - b;
3262    }
3263}
3264"#;
3265        let ours = r#"export class Calculator {
3266    add(a: number, b: number): number {
3267        return a + b + 1;
3268    }
3269
3270    subtract(a: number, b: number): number {
3271        return a - b;
3272    }
3273}
3274"#;
3275        let theirs = r#"export class Calculator {
3276    add(a: number, b: number): number {
3277        return a + b + 2;
3278    }
3279
3280    subtract(a: number, b: number): number {
3281        return a - b;
3282    }
3283}
3284"#;
3285        let result = entity_merge(base, ours, theirs, "test.ts");
3286        assert!(
3287            !result.is_clean(),
3288            "Both modifying same method differently should still conflict"
3289        );
3290    }
3291
3292    #[test]
3293    fn test_extract_member_chunks() {
3294        let class_body = r#"export class Foo {
3295    bar() {
3296        return 1;
3297    }
3298
3299    baz() {
3300        return 2;
3301    }
3302}
3303"#;
3304        let chunks = extract_member_chunks(class_body).unwrap();
3305        assert_eq!(chunks.len(), 2, "Should find 2 members, found {:?}", chunks.iter().map(|c| &c.name).collect::<Vec<_>>());
3306        assert_eq!(chunks[0].name, "bar");
3307        assert_eq!(chunks[1].name, "baz");
3308    }
3309
3310    #[test]
3311    fn test_extract_member_name() {
3312        assert_eq!(extract_member_name("add(a, b) {"), "add");
3313        assert_eq!(extract_member_name("fn add(&self, a: i32) -> i32 {"), "add");
3314        assert_eq!(extract_member_name("def add(self, a, b):"), "add");
3315        assert_eq!(extract_member_name("public static getValue(): number {"), "getValue");
3316        assert_eq!(extract_member_name("async fetchData() {"), "fetchData");
3317    }
3318
3319    #[test]
3320    fn test_commutative_import_merge_rust_use() {
3321        let base = "use std::io;\nuse std::fs;\n";
3322        let ours = "use std::io;\nuse std::fs;\nuse std::path::Path;\n";
3323        let theirs = "use std::io;\nuse std::fs;\nuse std::collections::HashMap;\n";
3324        let result = merge_imports_commutatively(base, ours, theirs);
3325        assert!(result.contains("use std::path::Path;"));
3326        assert!(result.contains("use std::collections::HashMap;"));
3327        assert!(result.contains("use std::io;"));
3328        assert!(result.contains("use std::fs;"));
3329    }
3330
3331    #[test]
3332    fn test_is_whitespace_only_diff_true() {
3333        // Same content, different indentation
3334        assert!(is_whitespace_only_diff(
3335            "    return 1;\n    return 2;\n",
3336            "      return 1;\n      return 2;\n"
3337        ));
3338        // Same content, extra blank lines
3339        assert!(is_whitespace_only_diff(
3340            "return 1;\nreturn 2;\n",
3341            "return 1;\n\nreturn 2;\n"
3342        ));
3343    }
3344
3345    #[test]
3346    fn test_is_whitespace_only_diff_false() {
3347        // Different content
3348        assert!(!is_whitespace_only_diff(
3349            "    return 1;\n",
3350            "    return 2;\n"
3351        ));
3352        // Added code
3353        assert!(!is_whitespace_only_diff(
3354            "return 1;\n",
3355            "return 1;\nconsole.log('x');\n"
3356        ));
3357    }
3358
3359    #[test]
3360    fn test_ts_interface_both_add_different_fields() {
3361        let base = "interface Config {\n    name: string;\n}\n";
3362        let ours = "interface Config {\n    name: string;\n    age: number;\n}\n";
3363        let theirs = "interface Config {\n    name: string;\n    email: string;\n}\n";
3364        let result = entity_merge(base, ours, theirs, "test.ts");
3365        eprintln!("TS interface: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3366        eprintln!("Content: {:?}", result.content);
3367        assert!(
3368            result.is_clean(),
3369            "Both adding different fields to TS interface should merge. Conflicts: {:?}",
3370            result.conflicts,
3371        );
3372        assert!(result.content.contains("age"));
3373        assert!(result.content.contains("email"));
3374    }
3375
3376    #[test]
3377    fn test_rust_enum_both_add_different_variants() {
3378        let base = "enum Color {\n    Red,\n    Blue,\n}\n";
3379        let ours = "enum Color {\n    Red,\n    Blue,\n    Green,\n}\n";
3380        let theirs = "enum Color {\n    Red,\n    Blue,\n    Yellow,\n}\n";
3381        let result = entity_merge(base, ours, theirs, "test.rs");
3382        eprintln!("Rust enum: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3383        eprintln!("Content: {:?}", result.content);
3384        assert!(
3385            result.is_clean(),
3386            "Both adding different enum variants should merge. Conflicts: {:?}",
3387            result.conflicts,
3388        );
3389        assert!(result.content.contains("Green"));
3390        assert!(result.content.contains("Yellow"));
3391    }
3392
3393    #[test]
3394    fn test_python_both_add_different_decorators() {
3395        // Both add different decorators to the same function
3396        let base = "def foo():\n    return 1\n\ndef bar():\n    return 2\n";
3397        let ours = "@cache\ndef foo():\n    return 1\n\ndef bar():\n    return 2\n";
3398        let theirs = "@deprecated\ndef foo():\n    return 1\n\ndef bar():\n    return 2\n";
3399        let result = entity_merge(base, ours, theirs, "test.py");
3400        assert!(
3401            result.is_clean(),
3402            "Both adding different decorators should merge. Conflicts: {:?}",
3403            result.conflicts,
3404        );
3405        assert!(result.content.contains("@cache"));
3406        assert!(result.content.contains("@deprecated"));
3407        assert!(result.content.contains("def foo()"));
3408    }
3409
3410    #[test]
3411    fn test_decorator_plus_body_change() {
3412        // One adds decorator, other modifies body — should merge both
3413        let base = "def foo():\n    return 1\n";
3414        let ours = "@cache\ndef foo():\n    return 1\n";
3415        let theirs = "def foo():\n    return 42\n";
3416        let result = entity_merge(base, ours, theirs, "test.py");
3417        assert!(
3418            result.is_clean(),
3419            "Decorator + body change should merge. Conflicts: {:?}",
3420            result.conflicts,
3421        );
3422        assert!(result.content.contains("@cache"));
3423        assert!(result.content.contains("return 42"));
3424    }
3425
3426    #[test]
3427    fn test_ts_class_decorator_merge() {
3428        // TypeScript decorators on class methods — both add different decorators
3429        let base = "class Foo {\n    bar() {\n        return 1;\n    }\n}\n";
3430        let ours = "class Foo {\n    @Injectable()\n    bar() {\n        return 1;\n    }\n}\n";
3431        let theirs = "class Foo {\n    @Deprecated()\n    bar() {\n        return 1;\n    }\n}\n";
3432        let result = entity_merge(base, ours, theirs, "test.ts");
3433        assert!(
3434            result.is_clean(),
3435            "Both adding different decorators to same method should merge. Conflicts: {:?}",
3436            result.conflicts,
3437        );
3438        assert!(result.content.contains("@Injectable()"));
3439        assert!(result.content.contains("@Deprecated()"));
3440        assert!(result.content.contains("bar()"));
3441    }
3442
3443    #[test]
3444    fn test_non_adjacent_intra_function_changes() {
3445        let base = r#"export function process(data: any) {
3446    const validated = validate(data);
3447    const transformed = transform(validated);
3448    const saved = save(transformed);
3449    return saved;
3450}
3451"#;
3452        let ours = r#"export function process(data: any) {
3453    const validated = validate(data);
3454    const transformed = transform(validated);
3455    const saved = save(transformed);
3456    console.log("saved", saved);
3457    return saved;
3458}
3459"#;
3460        let theirs = r#"export function process(data: any) {
3461    console.log("input", data);
3462    const validated = validate(data);
3463    const transformed = transform(validated);
3464    const saved = save(transformed);
3465    return saved;
3466}
3467"#;
3468        let result = entity_merge(base, ours, theirs, "test.ts");
3469        assert!(
3470            result.is_clean(),
3471            "Non-adjacent changes within same function should merge via diffy. Conflicts: {:?}",
3472            result.conflicts,
3473        );
3474        assert!(result.content.contains("console.log(\"saved\""));
3475        assert!(result.content.contains("console.log(\"input\""));
3476    }
3477
3478    #[test]
3479    fn test_method_reordering_with_modification() {
3480        // Agent A reorders methods in class, Agent B modifies one method
3481        // Inner entity merge matches by name, so reordering should be transparent
3482        let base = r#"class Service {
3483    getUser(id: string) {
3484        return db.find(id);
3485    }
3486
3487    createUser(data: any) {
3488        return db.create(data);
3489    }
3490
3491    deleteUser(id: string) {
3492        return db.delete(id);
3493    }
3494}
3495"#;
3496        // Ours: reorder methods (move deleteUser before createUser)
3497        let ours = r#"class Service {
3498    getUser(id: string) {
3499        return db.find(id);
3500    }
3501
3502    deleteUser(id: string) {
3503        return db.delete(id);
3504    }
3505
3506    createUser(data: any) {
3507        return db.create(data);
3508    }
3509}
3510"#;
3511        // Theirs: modify getUser
3512        let theirs = r#"class Service {
3513    getUser(id: string) {
3514        console.log("fetching", id);
3515        return db.find(id);
3516    }
3517
3518    createUser(data: any) {
3519        return db.create(data);
3520    }
3521
3522    deleteUser(id: string) {
3523        return db.delete(id);
3524    }
3525}
3526"#;
3527        let result = entity_merge(base, ours, theirs, "test.ts");
3528        eprintln!("Method reorder: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3529        eprintln!("Content:\n{}", result.content);
3530        assert!(
3531            result.is_clean(),
3532            "Method reordering + modification should merge. Conflicts: {:?}",
3533            result.conflicts,
3534        );
3535        assert!(result.content.contains("console.log(\"fetching\""), "Should contain theirs modification");
3536        assert!(result.content.contains("deleteUser"), "Should have deleteUser");
3537        assert!(result.content.contains("createUser"), "Should have createUser");
3538    }
3539
3540    #[test]
3541    fn test_doc_comment_plus_body_change() {
3542        // One side adds JSDoc comment, other modifies function body
3543        // Doc comments are part of the entity region — they should merge with body changes
3544        let base = r#"export function calculate(a: number, b: number): number {
3545    return a + b;
3546}
3547"#;
3548        let ours = r#"/**
3549 * Calculate the sum of two numbers.
3550 * @param a - First number
3551 * @param b - Second number
3552 */
3553export function calculate(a: number, b: number): number {
3554    return a + b;
3555}
3556"#;
3557        let theirs = r#"export function calculate(a: number, b: number): number {
3558    const result = a + b;
3559    console.log("result:", result);
3560    return result;
3561}
3562"#;
3563        let result = entity_merge(base, ours, theirs, "test.ts");
3564        eprintln!("Doc comment + body: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3565        eprintln!("Content:\n{}", result.content);
3566        // This tests whether weave can merge doc comment additions with body changes
3567    }
3568
3569    #[test]
3570    fn test_both_add_different_guard_clauses() {
3571        // Both add different guard clauses at the start of a function
3572        let base = r#"export function processOrder(order: Order): Result {
3573    const total = calculateTotal(order);
3574    return { success: true, total };
3575}
3576"#;
3577        let ours = r#"export function processOrder(order: Order): Result {
3578    if (!order) throw new Error("Order required");
3579    const total = calculateTotal(order);
3580    return { success: true, total };
3581}
3582"#;
3583        let theirs = r#"export function processOrder(order: Order): Result {
3584    if (order.items.length === 0) throw new Error("Empty order");
3585    const total = calculateTotal(order);
3586    return { success: true, total };
3587}
3588"#;
3589        let result = entity_merge(base, ours, theirs, "test.ts");
3590        eprintln!("Guard clauses: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3591        eprintln!("Content:\n{}", result.content);
3592        // Both add at same position — diffy may struggle since they're at the same insertion point
3593    }
3594
3595    #[test]
3596    fn test_both_modify_different_enum_variants() {
3597        // One modifies a variant's value, other adds new variants
3598        let base = r#"enum Status {
3599    Active = "active",
3600    Inactive = "inactive",
3601    Pending = "pending",
3602}
3603"#;
3604        let ours = r#"enum Status {
3605    Active = "active",
3606    Inactive = "disabled",
3607    Pending = "pending",
3608}
3609"#;
3610        let theirs = r#"enum Status {
3611    Active = "active",
3612    Inactive = "inactive",
3613    Pending = "pending",
3614    Deleted = "deleted",
3615}
3616"#;
3617        let result = entity_merge(base, ours, theirs, "test.ts");
3618        eprintln!("Enum modify+add: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3619        eprintln!("Content:\n{}", result.content);
3620        assert!(
3621            result.is_clean(),
3622            "Modify variant + add new variant should merge. Conflicts: {:?}",
3623            result.conflicts,
3624        );
3625        assert!(result.content.contains("\"disabled\""), "Should have modified Inactive");
3626        assert!(result.content.contains("Deleted"), "Should have new Deleted variant");
3627    }
3628
3629    #[test]
3630    fn test_config_object_field_additions() {
3631        // Both add different fields to a config object (exported const)
3632        let base = r#"export const config = {
3633    timeout: 5000,
3634    retries: 3,
3635};
3636"#;
3637        let ours = r#"export const config = {
3638    timeout: 5000,
3639    retries: 3,
3640    maxConnections: 10,
3641};
3642"#;
3643        let theirs = r#"export const config = {
3644    timeout: 5000,
3645    retries: 3,
3646    logLevel: "info",
3647};
3648"#;
3649        let result = entity_merge(base, ours, theirs, "test.ts");
3650        eprintln!("Config fields: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3651        eprintln!("Content:\n{}", result.content);
3652        // This tests whether inner entity merge handles object literals
3653        // (it probably won't since object fields aren't extracted as members the same way)
3654    }
3655
3656    #[test]
3657    fn test_rust_impl_block_both_add_methods() {
3658        // Both add different methods to a Rust impl block
3659        let base = r#"impl Calculator {
3660    fn add(&self, a: i32, b: i32) -> i32 {
3661        a + b
3662    }
3663}
3664"#;
3665        let ours = r#"impl Calculator {
3666    fn add(&self, a: i32, b: i32) -> i32 {
3667        a + b
3668    }
3669
3670    fn multiply(&self, a: i32, b: i32) -> i32 {
3671        a * b
3672    }
3673}
3674"#;
3675        let theirs = r#"impl Calculator {
3676    fn add(&self, a: i32, b: i32) -> i32 {
3677        a + b
3678    }
3679
3680    fn divide(&self, a: i32, b: i32) -> i32 {
3681        a / b
3682    }
3683}
3684"#;
3685        let result = entity_merge(base, ours, theirs, "test.rs");
3686        eprintln!("Rust impl: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3687        eprintln!("Content:\n{}", result.content);
3688        assert!(
3689            result.is_clean(),
3690            "Both adding methods to Rust impl should merge. Conflicts: {:?}",
3691            result.conflicts,
3692        );
3693        assert!(result.content.contains("multiply"), "Should have multiply");
3694        assert!(result.content.contains("divide"), "Should have divide");
3695    }
3696
3697    #[test]
3698    fn test_rust_impl_same_trait_different_types() {
3699        // Two impl blocks for the same trait but different types.
3700        // Each branch modifies a different impl. Both should be preserved.
3701        // Regression: sem-core <0.3.10 named both "Stream", causing collision.
3702        let base = r#"struct Foo;
3703struct Bar;
3704
3705impl Stream for Foo {
3706    type Item = i32;
3707    fn poll_next(&self) -> Option<i32> {
3708        Some(1)
3709    }
3710}
3711
3712impl Stream for Bar {
3713    type Item = String;
3714    fn poll_next(&self) -> Option<String> {
3715        Some("hello".into())
3716    }
3717}
3718
3719fn other() {}
3720"#;
3721        let ours = r#"struct Foo;
3722struct Bar;
3723
3724impl Stream for Foo {
3725    type Item = i32;
3726    fn poll_next(&self) -> Option<i32> {
3727        let x = compute();
3728        Some(x + 1)
3729    }
3730}
3731
3732impl Stream for Bar {
3733    type Item = String;
3734    fn poll_next(&self) -> Option<String> {
3735        Some("hello".into())
3736    }
3737}
3738
3739fn other() {}
3740"#;
3741        let theirs = r#"struct Foo;
3742struct Bar;
3743
3744impl Stream for Foo {
3745    type Item = i32;
3746    fn poll_next(&self) -> Option<i32> {
3747        Some(1)
3748    }
3749}
3750
3751impl Stream for Bar {
3752    type Item = String;
3753    fn poll_next(&self) -> Option<String> {
3754        let s = format!("hello {}", name);
3755        Some(s)
3756    }
3757}
3758
3759fn other() {}
3760"#;
3761        let result = entity_merge(base, ours, theirs, "test.rs");
3762        assert!(
3763            result.is_clean(),
3764            "Same trait, different types should not conflict. Conflicts: {:?}",
3765            result.conflicts,
3766        );
3767        assert!(result.content.contains("impl Stream for Foo"), "Should have Foo impl");
3768        assert!(result.content.contains("impl Stream for Bar"), "Should have Bar impl");
3769        assert!(result.content.contains("compute()"), "Should have ours' Foo change");
3770        assert!(result.content.contains("format!"), "Should have theirs' Bar change");
3771    }
3772
3773    #[test]
3774    fn test_rust_doc_comment_plus_body_change() {
3775        // One side adds Rust doc comment, other modifies body
3776        // Comment bundling ensures the doc comment is part of the entity
3777        let base = r#"fn add(a: i32, b: i32) -> i32 {
3778    a + b
3779}
3780
3781fn subtract(a: i32, b: i32) -> i32 {
3782    a - b
3783}
3784"#;
3785        let ours = r#"/// Adds two numbers together.
3786fn add(a: i32, b: i32) -> i32 {
3787    a + b
3788}
3789
3790fn subtract(a: i32, b: i32) -> i32 {
3791    a - b
3792}
3793"#;
3794        let theirs = r#"fn add(a: i32, b: i32) -> i32 {
3795    a + b
3796}
3797
3798fn subtract(a: i32, b: i32) -> i32 {
3799    a - b - 1
3800}
3801"#;
3802        let result = entity_merge(base, ours, theirs, "test.rs");
3803        assert!(
3804            result.is_clean(),
3805            "Rust doc comment + body change should merge. Conflicts: {:?}",
3806            result.conflicts,
3807        );
3808        assert!(result.content.contains("/// Adds two numbers"), "Should have ours doc comment");
3809        assert!(result.content.contains("a - b - 1"), "Should have theirs body change");
3810    }
3811
3812    #[test]
3813    fn test_both_add_different_doc_comments() {
3814        // Both add doc comments to different functions — should merge cleanly
3815        let base = r#"fn add(a: i32, b: i32) -> i32 {
3816    a + b
3817}
3818
3819fn subtract(a: i32, b: i32) -> i32 {
3820    a - b
3821}
3822"#;
3823        let ours = r#"/// Adds two numbers.
3824fn add(a: i32, b: i32) -> i32 {
3825    a + b
3826}
3827
3828fn subtract(a: i32, b: i32) -> i32 {
3829    a - b
3830}
3831"#;
3832        let theirs = r#"fn add(a: i32, b: i32) -> i32 {
3833    a + b
3834}
3835
3836/// Subtracts b from a.
3837fn subtract(a: i32, b: i32) -> i32 {
3838    a - b
3839}
3840"#;
3841        let result = entity_merge(base, ours, theirs, "test.rs");
3842        assert!(
3843            result.is_clean(),
3844            "Both adding doc comments to different functions should merge. Conflicts: {:?}",
3845            result.conflicts,
3846        );
3847        assert!(result.content.contains("/// Adds two numbers"), "Should have add's doc comment");
3848        assert!(result.content.contains("/// Subtracts b from a"), "Should have subtract's doc comment");
3849    }
3850
3851    #[test]
3852    fn test_go_import_block_both_add_different() {
3853        // Go uses import (...) blocks — both add different imports
3854        let base = "package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n)\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n";
3855        let ours = "package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"strings\"\n)\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n";
3856        let theirs = "package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"io\"\n)\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n";
3857        let result = entity_merge(base, ours, theirs, "main.go");
3858        eprintln!("Go import block: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3859        eprintln!("Content:\n{}", result.content);
3860        // This tests whether Go import blocks (a single entity) get inner-merged
3861    }
3862
3863    #[test]
3864    fn test_python_class_both_add_methods() {
3865        // Python class — both add different methods
3866        let base = "class Calculator:\n    def add(self, a, b):\n        return a + b\n";
3867        let ours = "class Calculator:\n    def add(self, a, b):\n        return a + b\n\n    def multiply(self, a, b):\n        return a * b\n";
3868        let theirs = "class Calculator:\n    def add(self, a, b):\n        return a + b\n\n    def divide(self, a, b):\n        return a / b\n";
3869        let result = entity_merge(base, ours, theirs, "test.py");
3870        eprintln!("Python class: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3871        eprintln!("Content:\n{}", result.content);
3872        assert!(
3873            result.is_clean(),
3874            "Both adding methods to Python class should merge. Conflicts: {:?}",
3875            result.conflicts,
3876        );
3877        assert!(result.content.contains("multiply"), "Should have multiply");
3878        assert!(result.content.contains("divide"), "Should have divide");
3879    }
3880
3881    #[test]
3882    fn test_interstitial_conflict_not_silently_embedded() {
3883        // Regression test: when interstitial content between entities has a
3884        // both-modified conflict, merge_interstitials must report it as a real
3885        // conflict instead of silently embedding raw diffy markers and claiming
3886        // is_clean=true.
3887        //
3888        // Scenario: a barrel export file (index.ts) with comments between
3889        // export statements. Both sides modify the SAME interstitial comment
3890        // block differently. The exports are the entities; the comment between
3891        // them is interstitial content that goes through merge_interstitials
3892        // → diffy, which cannot auto-merge conflicting edits.
3893        let base = r#"export { alpha } from "./alpha";
3894
3895// Section: data utilities
3896// TODO: add more exports here
3897
3898export { beta } from "./beta";
3899"#;
3900        let ours = r#"export { alpha } from "./alpha";
3901
3902// Section: data utilities (sorting)
3903// Sorting helpers for list views
3904
3905export { beta } from "./beta";
3906"#;
3907        let theirs = r#"export { alpha } from "./alpha";
3908
3909// Section: data utilities (filtering)
3910// Filtering helpers for search views
3911
3912export { beta } from "./beta";
3913"#;
3914        let result = entity_merge(base, ours, theirs, "index.ts");
3915
3916        // The key assertions:
3917        // 1. If the content has conflict markers, is_clean() MUST be false
3918        let has_markers = result.content.contains("<<<<<<<") || result.content.contains(">>>>>>>");
3919        if has_markers {
3920            assert!(
3921                !result.is_clean(),
3922                "BUG: is_clean()=true but merged content has conflict markers!\n\
3923                 stats: {}\nconflicts: {:?}\ncontent:\n{}",
3924                result.stats, result.conflicts, result.content
3925            );
3926            assert!(
3927                result.stats.entities_conflicted > 0,
3928                "entities_conflicted should be > 0 when markers are present"
3929            );
3930        }
3931
3932        // 2. If it was resolved cleanly, no markers should exist
3933        if result.is_clean() {
3934            assert!(
3935                !has_markers,
3936                "Clean merge should not contain conflict markers!\ncontent:\n{}",
3937                result.content
3938            );
3939        }
3940    }
3941
3942    #[test]
3943    fn test_pre_conflicted_input_not_treated_as_clean() {
3944        // Regression test for AU/AA conflicts: git can store conflict markers
3945        // directly into stage blobs. Weave must not return is_clean=true.
3946        let base = "";
3947        let theirs = "";
3948        let ours = r#"/**
3949 * MIT License
3950 */
3951
3952<<<<<<<< HEAD:src/lib/exports/index.ts
3953export { renderDocToBuffer } from "./doc-exporter";
3954export type { ExportOptions, ExportMetadata, RenderContext } from "./types";
3955========
3956export * from "./editor";
3957export * from "./types";
3958>>>>>>>> feature:packages/core/src/editor/index.ts
3959"#;
3960        let result = entity_merge(base, ours, theirs, "index.ts");
3961
3962        assert!(
3963            !result.is_clean(),
3964            "Pre-conflicted input must not be reported as clean!\n\
3965             stats: {}\nconflicts: {:?}",
3966            result.stats, result.conflicts,
3967        );
3968        assert!(result.stats.entities_conflicted > 0);
3969        assert!(!result.conflicts.is_empty());
3970    }
3971
3972    #[test]
3973    fn test_multi_line_signature_classified_as_syntax() {
3974        // Multi-line parameter list: changing a param should be Syntax, not Functional
3975        let base = "function process(\n    a: number,\n    b: string\n) {\n    return a;\n}\n";
3976        let ours = "function process(\n    a: number,\n    b: string,\n    c: boolean\n) {\n    return a;\n}\n";
3977        let theirs = "function process(\n    a: number,\n    b: number\n) {\n    return a;\n}\n";
3978        let complexity = crate::conflict::classify_conflict(Some(base), Some(ours), Some(theirs));
3979        assert_eq!(
3980            complexity,
3981            crate::conflict::ConflictComplexity::Syntax,
3982            "Multi-line signature change should be classified as Syntax, got {:?}",
3983            complexity
3984        );
3985    }
3986
3987    #[test]
3988    fn test_grouped_import_merge_preserves_groups() {
3989        let base = "import os\nimport sys\n\nfrom collections import OrderedDict\nfrom typing import List\n";
3990        let ours = "import os\nimport sys\nimport json\n\nfrom collections import OrderedDict\nfrom typing import List\n";
3991        let theirs = "import os\nimport sys\n\nfrom collections import OrderedDict\nfrom collections import defaultdict\nfrom typing import List\n";
3992        let result = merge_imports_commutatively(base, ours, theirs);
3993        // json should be in the first group (stdlib), defaultdict in the second (collections)
3994        let lines: Vec<&str> = result.lines().collect();
3995        let json_idx = lines.iter().position(|l| l.contains("json"));
3996        let blank_idx = lines.iter().position(|l| l.trim().is_empty());
3997        let defaultdict_idx = lines.iter().position(|l| l.contains("defaultdict"));
3998        assert!(json_idx.is_some(), "json import should be present");
3999        assert!(blank_idx.is_some(), "blank line separator should be present");
4000        assert!(defaultdict_idx.is_some(), "defaultdict import should be present");
4001        // json should come before the blank line, defaultdict after
4002        assert!(json_idx.unwrap() < blank_idx.unwrap(), "json should be in first group");
4003        assert!(defaultdict_idx.unwrap() > blank_idx.unwrap(), "defaultdict should be in second group");
4004    }
4005
4006    #[test]
4007    fn test_configurable_duplicate_threshold() {
4008        // Create entities with 15 same-name entities
4009        let entities: Vec<SemanticEntity> = (0..15).map(|i| SemanticEntity {
4010            id: format!("test::function::test_{}", i),
4011            file_path: "test.ts".to_string(),
4012            entity_type: "function".to_string(),
4013            name: "test".to_string(),
4014            parent_id: None,
4015            content: format!("function test() {{ return {}; }}", i),
4016            content_hash: format!("hash_{}", i),
4017            structural_hash: None,
4018            start_line: i * 3 + 1,
4019            end_line: i * 3 + 3,
4020            metadata: None,
4021        }).collect();
4022        // Default threshold (10): should trigger
4023        assert!(has_excessive_duplicates(&entities));
4024        // Set threshold to 20: should not trigger
4025        std::env::set_var("WEAVE_MAX_DUPLICATES", "20");
4026        assert!(!has_excessive_duplicates(&entities));
4027        std::env::remove_var("WEAVE_MAX_DUPLICATES");
4028    }
4029
4030    #[test]
4031    fn test_ts_multiline_import_consolidation() {
4032        // Issue #24: when incoming consolidates two imports into one multi-line import,
4033        // the `import {` opening line can get dropped.
4034        let base = "\
4035import type { Foo } from \"./foo\"
4036import {
4037     type a,
4038     type b,
4039     type c,
4040} from \"./foo\"
4041
4042export function bar() {
4043    return 1;
4044}
4045";
4046        let ours = base;
4047        let theirs = "\
4048import {
4049     type Foo,
4050     type a,
4051     type b,
4052     type c,
4053} from \"./foo\"
4054
4055export function bar() {
4056    return 1;
4057}
4058";
4059        let result = entity_merge(base, ours, theirs, "test.ts");
4060        eprintln!("TS import consolidation: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4061        eprintln!("Content:\n{}", result.content);
4062        // Theirs is the only change, result should match theirs exactly
4063        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4064        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4065        assert!(result.content.contains("} from \"./foo\""), "closing must be present");
4066        assert!(!result.content.contains("import type { Foo }"), "old separate import should be removed");
4067    }
4068
4069    #[test]
4070    fn test_ts_multiline_import_both_modify() {
4071        // Issue #24 variant: both sides modify the import block
4072        let base = "\
4073import type { Foo } from \"./foo\"
4074import {
4075     type a,
4076     type b,
4077     type c,
4078} from \"./foo\"
4079
4080export function bar() {
4081    return 1;
4082}
4083";
4084        // Ours: consolidates imports + adds type d
4085        let ours = "\
4086import {
4087     type Foo,
4088     type a,
4089     type b,
4090     type c,
4091     type d,
4092} from \"./foo\"
4093
4094export function bar() {
4095    return 1;
4096}
4097";
4098        // Theirs: consolidates imports + adds type e
4099        let theirs = "\
4100import {
4101     type Foo,
4102     type a,
4103     type b,
4104     type c,
4105     type e,
4106} from \"./foo\"
4107
4108export function bar() {
4109    return 1;
4110}
4111";
4112        let result = entity_merge(base, ours, theirs, "test.ts");
4113        eprintln!("TS import both modify: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4114        eprintln!("Content:\n{}", result.content);
4115        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4116        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4117        assert!(result.content.contains("type d,"), "ours addition must be present");
4118        assert!(result.content.contains("type e,"), "theirs addition must be present");
4119        assert!(result.content.contains("} from \"./foo\""), "closing must be present");
4120    }
4121
4122    #[test]
4123    fn test_ts_multiline_import_no_entities() {
4124        // Issue #24: file with only imports, no other entities
4125        let base = "\
4126import type { Foo } from \"./foo\"
4127import {
4128     type a,
4129     type b,
4130     type c,
4131} from \"./foo\"
4132";
4133        let ours = base;
4134        let theirs = "\
4135import {
4136     type Foo,
4137     type a,
4138     type b,
4139     type c,
4140} from \"./foo\"
4141";
4142        let result = entity_merge(base, ours, theirs, "test.ts");
4143        eprintln!("TS import no entities: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4144        eprintln!("Content:\n{}", result.content);
4145        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4146        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4147    }
4148
4149    #[test]
4150    fn test_ts_multiline_import_export_variable() {
4151        // Issue #24: import block near an export variable entity
4152        let base = "\
4153import type { Foo } from \"./foo\"
4154import {
4155     type a,
4156     type b,
4157     type c,
4158} from \"./foo\"
4159
4160export const X = 1;
4161
4162export function bar() {
4163    return 1;
4164}
4165";
4166        let ours = "\
4167import type { Foo } from \"./foo\"
4168import {
4169     type a,
4170     type b,
4171     type c,
4172     type d,
4173} from \"./foo\"
4174
4175export const X = 1;
4176
4177export function bar() {
4178    return 1;
4179}
4180";
4181        let theirs = "\
4182import {
4183     type Foo,
4184     type a,
4185     type b,
4186     type c,
4187} from \"./foo\"
4188
4189export const X = 2;
4190
4191export function bar() {
4192    return 1;
4193}
4194";
4195        let result = entity_merge(base, ours, theirs, "test.ts");
4196        eprintln!("TS import + export var: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4197        eprintln!("Content:\n{}", result.content);
4198        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4199    }
4200
4201    #[test]
4202    fn test_ts_multiline_import_adjacent_to_entity() {
4203        // Issue #24: import block directly adjacent to entity (no blank line)
4204        let base = "\
4205import type { Foo } from \"./foo\"
4206import {
4207     type a,
4208     type b,
4209     type c,
4210} from \"./foo\"
4211export function bar() {
4212    return 1;
4213}
4214";
4215        let ours = base;
4216        let theirs = "\
4217import {
4218     type Foo,
4219     type a,
4220     type b,
4221     type c,
4222} from \"./foo\"
4223export function bar() {
4224    return 1;
4225}
4226";
4227        let result = entity_merge(base, ours, theirs, "test.ts");
4228        eprintln!("TS import adjacent: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4229        eprintln!("Content:\n{}", result.content);
4230        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4231        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4232    }
4233
4234    #[test]
4235    fn test_ts_multiline_import_both_consolidate_differently() {
4236        // Issue #24: both sides consolidate imports but add different specifiers
4237        let base = "\
4238import type { Foo } from \"./foo\"
4239import {
4240     type a,
4241     type b,
4242} from \"./foo\"
4243
4244export function bar() {
4245    return 1;
4246}
4247";
4248        let ours = "\
4249import {
4250     type Foo,
4251     type a,
4252     type b,
4253     type c,
4254} from \"./foo\"
4255
4256export function bar() {
4257    return 1;
4258}
4259";
4260        let theirs = "\
4261import {
4262     type Foo,
4263     type a,
4264     type b,
4265     type d,
4266} from \"./foo\"
4267
4268export function bar() {
4269    return 1;
4270}
4271";
4272        let result = entity_merge(base, ours, theirs, "test.ts");
4273        eprintln!("TS both consolidate: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4274        eprintln!("Content:\n{}", result.content);
4275        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4276        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4277        assert!(result.content.contains("} from \"./foo\""), "closing must be present");
4278    }
4279
4280    #[test]
4281    fn test_ts_multiline_import_ours_adds_theirs_consolidates() {
4282        // Issue #24 variant: ours adds new import, theirs consolidates
4283        let base = "\
4284import type { Foo } from \"./foo\"
4285import {
4286     type a,
4287     type b,
4288     type c,
4289} from \"./foo\"
4290
4291export function bar() {
4292    return 1;
4293}
4294";
4295        // Ours: adds a new specifier to the multiline import
4296        let ours = "\
4297import type { Foo } from \"./foo\"
4298import {
4299     type a,
4300     type b,
4301     type c,
4302     type d,
4303} from \"./foo\"
4304
4305export function bar() {
4306    return 1;
4307}
4308";
4309        // Theirs: consolidates into one import
4310        let theirs = "\
4311import {
4312     type Foo,
4313     type a,
4314     type b,
4315     type c,
4316} from \"./foo\"
4317
4318export function bar() {
4319    return 1;
4320}
4321";
4322        let result = entity_merge(base, ours, theirs, "test.ts");
4323        eprintln!("TS import ours-adds theirs-consolidates: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4324        eprintln!("Content:\n{}", result.content);
4325        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4326        assert!(result.content.contains("type d,"), "ours addition must be present");
4327        assert!(result.content.contains("} from \"./foo\""), "closing must be present");
4328    }
4329}