weave_core/
merge.rs

1use std::collections::{HashMap, HashSet};
2use std::io::Write;
3use std::process::Command;
4use std::sync::{mpsc, LazyLock};
5use std::time::Duration;
6
7use serde::Serialize;
8use sem_core::model::change::ChangeType;
9use sem_core::model::entity::SemanticEntity;
10use sem_core::model::identity::match_entities;
11use sem_core::parser::plugins::create_default_registry;
12use sem_core::parser::registry::ParserRegistry;
13
14/// Static parser registry shared across all merge operations.
15/// Avoids recreating 11 tree-sitter language parsers per merge call.
16static PARSER_REGISTRY: LazyLock<ParserRegistry> = LazyLock::new(create_default_registry);
17
18use crate::conflict::{classify_conflict, ConflictKind, EntityConflict, MarkerFormat, MergeStats};
19use crate::region::{extract_regions, EntityRegion, FileRegion};
20use crate::validate::SemanticWarning;
21use crate::reconstruct::reconstruct;
22
23/// How an individual entity was resolved during merge.
24#[derive(Debug, Clone, Serialize)]
25#[serde(rename_all = "snake_case")]
26pub enum ResolutionStrategy {
27    Unchanged,
28    OursOnly,
29    TheirsOnly,
30    ContentEqual,
31    DiffyMerged,
32    DecoratorMerged,
33    InnerMerged,
34    ConflictBothModified,
35    ConflictModifyDelete,
36    ConflictBothAdded,
37    ConflictRenameRename,
38    AddedOurs,
39    AddedTheirs,
40    Deleted,
41    Renamed { from: String, to: String },
42    Fallback,
43}
44
45/// Audit record for a single entity's merge resolution.
46#[derive(Debug, Clone, Serialize)]
47pub struct EntityAudit {
48    pub name: String,
49    #[serde(rename = "type")]
50    pub entity_type: String,
51    pub resolution: ResolutionStrategy,
52}
53
54/// Result of a merge operation.
55#[derive(Debug)]
56pub struct MergeResult {
57    pub content: String,
58    pub conflicts: Vec<EntityConflict>,
59    pub warnings: Vec<SemanticWarning>,
60    pub stats: MergeStats,
61    pub audit: Vec<EntityAudit>,
62}
63
64impl MergeResult {
65    pub fn is_clean(&self) -> bool {
66        self.conflicts.is_empty()
67            && !self.content.lines().any(|l| l.starts_with("<<<<<<< ours"))
68    }
69}
70
71/// The resolved content for a single entity after merging.
72#[derive(Debug, Clone)]
73pub enum ResolvedEntity {
74    /// Clean resolution — use this content.
75    Clean(EntityRegion),
76    /// Conflict — render conflict markers.
77    Conflict(EntityConflict),
78    /// Inner merge with per-member scoped conflicts.
79    /// Content already contains per-member conflict markers; emit as-is.
80    ScopedConflict {
81        content: String,
82        conflict: EntityConflict,
83    },
84    /// Entity was deleted.
85    Deleted,
86}
87
88/// Perform entity-level 3-way merge.
89///
90/// Falls back to line-level merge (via diffy) when:
91/// - No parser matches the file type
92/// - Parser returns 0 entities for non-empty content
93/// - File exceeds 1MB
94pub fn entity_merge(
95    base: &str,
96    ours: &str,
97    theirs: &str,
98    file_path: &str,
99) -> MergeResult {
100    entity_merge_fmt(base, ours, theirs, file_path, &MarkerFormat::default())
101}
102
103/// Perform entity-level 3-way merge with configurable marker format.
104pub fn entity_merge_fmt(
105    base: &str,
106    ours: &str,
107    theirs: &str,
108    file_path: &str,
109    marker_format: &MarkerFormat,
110) -> MergeResult {
111    let timeout_secs = std::env::var("WEAVE_TIMEOUT")
112        .ok()
113        .and_then(|v| v.parse::<u64>().ok())
114        .unwrap_or(5);
115
116    // Timeout: if entity merge takes too long, diffy is likely hitting
117    // pathological input. Fall back to git merge-file which always terminates.
118    let base_owned = base.to_string();
119    let ours_owned = ours.to_string();
120    let theirs_owned = theirs.to_string();
121    let path_owned = file_path.to_string();
122    let fmt_owned = marker_format.clone();
123
124    let (tx, rx) = mpsc::channel();
125    std::thread::spawn(move || {
126        let result = entity_merge_with_registry(&base_owned, &ours_owned, &theirs_owned, &path_owned, &PARSER_REGISTRY, &fmt_owned);
127        let _ = tx.send(result);
128    });
129
130    match rx.recv_timeout(Duration::from_secs(timeout_secs)) {
131        Ok(result) => result,
132        Err(_) => {
133            eprintln!("weave: merge timed out after {}s for {}, falling back to git merge-file", timeout_secs, file_path);
134            let mut stats = MergeStats::default();
135            stats.used_fallback = true;
136            git_merge_file(base, ours, theirs, &mut stats)
137        }
138    }
139}
140
141pub fn entity_merge_with_registry(
142    base: &str,
143    ours: &str,
144    theirs: &str,
145    file_path: &str,
146    registry: &ParserRegistry,
147    marker_format: &MarkerFormat,
148) -> MergeResult {
149    // Guard: if any input already contains conflict markers (e.g. AU/AA conflicts
150    // where git bakes markers into stage blobs), report as conflict immediately.
151    // We can't do a meaningful 3-way merge on pre-conflicted content.
152    if has_conflict_markers(base) || has_conflict_markers(ours) || has_conflict_markers(theirs) {
153        let mut stats = MergeStats::default();
154        stats.entities_conflicted = 1;
155        stats.used_fallback = true;
156        // Use whichever input has markers as the merged content (preserves
157        // the conflict for the user to resolve manually).
158        let content = if has_conflict_markers(ours) {
159            ours
160        } else if has_conflict_markers(theirs) {
161            theirs
162        } else {
163            base
164        };
165        let complexity = classify_conflict(Some(base), Some(ours), Some(theirs));
166        return MergeResult {
167            content: content.to_string(),
168            conflicts: vec![EntityConflict {
169                entity_name: "(file)".to_string(),
170                entity_type: "file".to_string(),
171                kind: ConflictKind::BothModified,
172                complexity,
173                ours_content: Some(ours.to_string()),
174                theirs_content: Some(theirs.to_string()),
175                base_content: Some(base.to_string()),
176            }],
177            warnings: vec![],
178            stats,
179            audit: vec![],
180        };
181    }
182
183    // Fast path: if ours == theirs, no merge needed
184    if ours == theirs {
185        return MergeResult {
186            content: ours.to_string(),
187            conflicts: vec![],
188            warnings: vec![],
189            stats: MergeStats::default(),
190            audit: vec![],
191        };
192    }
193
194    // Fast path: if base == ours, take theirs entirely
195    if base == ours {
196        return MergeResult {
197            content: theirs.to_string(),
198            conflicts: vec![],
199            warnings: vec![],
200            stats: MergeStats {
201                entities_theirs_only: 1,
202                ..Default::default()
203            },
204            audit: vec![],
205        };
206    }
207
208    // Fast path: if base == theirs, take ours entirely
209    if base == theirs {
210        return MergeResult {
211            content: ours.to_string(),
212            conflicts: vec![],
213            warnings: vec![],
214            stats: MergeStats {
215                entities_ours_only: 1,
216                ..Default::default()
217            },
218            audit: vec![],
219        };
220    }
221
222    // Binary file detection: if any version has null bytes, use git merge-file directly
223    if is_binary(base) || is_binary(ours) || is_binary(theirs) {
224        let mut stats = MergeStats::default();
225        stats.used_fallback = true;
226        return git_merge_file(base, ours, theirs, &mut stats);
227    }
228
229    // Large file fallback
230    if base.len() > 1_000_000 || ours.len() > 1_000_000 || theirs.len() > 1_000_000 {
231        return line_level_fallback(base, ours, theirs, file_path);
232    }
233
234    // If the file type isn't natively supported, the registry returns the fallback
235    // plugin (20-line chunks). Entity merge on arbitrary chunks produces WORSE
236    // results than line-level merge (confirmed on GitButler's .svelte files where
237    // chunk boundaries don't align with structural boundaries). So we skip entity
238    // merge entirely for fallback-plugin files and go straight to line-level merge.
239    let plugin = match registry.get_plugin(file_path) {
240        Some(p) if p.id() != "fallback" => p,
241        _ => return line_level_fallback(base, ours, theirs, file_path),
242    };
243
244    // Extract entities from all three versions. Keep unfiltered lists for inner merge
245    // (child entities provide tree-sitter-based method decomposition for classes).
246    let base_all = plugin.extract_entities(base, file_path);
247    let ours_all = plugin.extract_entities(ours, file_path);
248    let theirs_all = plugin.extract_entities(theirs, file_path);
249
250    // Filter out nested entities for top-level matching and region extraction
251    let base_entities = filter_nested_entities(base_all.clone());
252    let ours_entities = filter_nested_entities(ours_all.clone());
253    let theirs_entities = filter_nested_entities(theirs_all.clone());
254
255    // Fallback if parser returns nothing for non-empty content
256    if base_entities.is_empty() && !base.trim().is_empty() {
257        return line_level_fallback(base, ours, theirs, file_path);
258    }
259    // Allow empty entities if content is actually empty
260    if ours_entities.is_empty() && !ours.trim().is_empty() && theirs_entities.is_empty() && !theirs.trim().is_empty() {
261        return line_level_fallback(base, ours, theirs, file_path);
262    }
263
264    // Fallback if too many duplicate entity names. Entity matching is O(n*m) on
265    // same-named entities which can hang on files with many `var app = ...` etc.
266    if has_excessive_duplicates(&base_entities) || has_excessive_duplicates(&ours_entities) || has_excessive_duplicates(&theirs_entities) {
267        return line_level_fallback(base, ours, theirs, file_path);
268    }
269
270    // Extract regions from all three
271    let base_regions = extract_regions(base, &base_entities);
272    let ours_regions = extract_regions(ours, &ours_entities);
273    let theirs_regions = extract_regions(theirs, &theirs_entities);
274
275    // Build region content maps (entity_id → content from file lines, preserving
276    // surrounding syntax like `export` that sem-core's entity.content may strip)
277    let base_region_content = build_region_content_map(&base_regions);
278    let ours_region_content = build_region_content_map(&ours_regions);
279    let theirs_region_content = build_region_content_map(&theirs_regions);
280
281    // Match entities: base↔ours and base↔theirs
282    let ours_changes = match_entities(&base_entities, &ours_entities, file_path, None, None, None);
283    let theirs_changes = match_entities(&base_entities, &theirs_entities, file_path, None, None, None);
284
285    // Build lookup maps
286    let base_entity_map: HashMap<&str, &SemanticEntity> =
287        base_entities.iter().map(|e| (e.id.as_str(), e)).collect();
288    let ours_entity_map: HashMap<&str, &SemanticEntity> =
289        ours_entities.iter().map(|e| (e.id.as_str(), e)).collect();
290    let theirs_entity_map: HashMap<&str, &SemanticEntity> =
291        theirs_entities.iter().map(|e| (e.id.as_str(), e)).collect();
292
293    // Classify what happened to each entity in each branch
294    let mut ours_change_map: HashMap<String, ChangeType> = HashMap::new();
295    for change in &ours_changes.changes {
296        ours_change_map.insert(change.entity_id.clone(), change.change_type);
297    }
298    let mut theirs_change_map: HashMap<String, ChangeType> = HashMap::new();
299    for change in &theirs_changes.changes {
300        theirs_change_map.insert(change.entity_id.clone(), change.change_type);
301    }
302
303    // Detect renames using structural_hash (RefFilter / IntelliMerge-inspired).
304    // When one branch renames an entity, connect the old and new IDs so the merge
305    // treats it as the same entity rather than a delete+add.
306    let ours_rename_to_base = build_rename_map(&base_entities, &ours_entities);
307    let theirs_rename_to_base = build_rename_map(&base_entities, &theirs_entities);
308    // Reverse maps: base_id → renamed_id in that branch
309    let base_to_ours_rename: HashMap<String, String> = ours_rename_to_base
310        .iter()
311        .map(|(new, old)| (old.clone(), new.clone()))
312        .collect();
313    let base_to_theirs_rename: HashMap<String, String> = theirs_rename_to_base
314        .iter()
315        .map(|(new, old)| (old.clone(), new.clone()))
316        .collect();
317
318    // Collect all entity IDs across all versions
319    let mut all_entity_ids: Vec<String> = Vec::new();
320    let mut seen: HashSet<String> = HashSet::new();
321    // Track renamed IDs so we don't process them twice
322    let mut skip_ids: HashSet<String> = HashSet::new();
323    // The "new" IDs from renames should be skipped — they'll be handled via the base ID
324    for new_id in ours_rename_to_base.keys() {
325        skip_ids.insert(new_id.clone());
326    }
327    for new_id in theirs_rename_to_base.keys() {
328        skip_ids.insert(new_id.clone());
329    }
330
331    // Start with ours ordering (skeleton)
332    for entity in &ours_entities {
333        if skip_ids.contains(&entity.id) {
334            continue;
335        }
336        if seen.insert(entity.id.clone()) {
337            all_entity_ids.push(entity.id.clone());
338        }
339    }
340    // Add theirs-only entities
341    for entity in &theirs_entities {
342        if skip_ids.contains(&entity.id) {
343            continue;
344        }
345        if seen.insert(entity.id.clone()) {
346            all_entity_ids.push(entity.id.clone());
347        }
348    }
349    // Add base-only entities (deleted in both → skip, deleted in one → handled below)
350    for entity in &base_entities {
351        if seen.insert(entity.id.clone()) {
352            all_entity_ids.push(entity.id.clone());
353        }
354    }
355
356    let mut stats = MergeStats::default();
357    let mut conflicts: Vec<EntityConflict> = Vec::new();
358    let mut audit: Vec<EntityAudit> = Vec::new();
359    let mut resolved_entities: HashMap<String, ResolvedEntity> = HashMap::new();
360
361    // Detect rename/rename conflicts: same base entity renamed differently in both branches.
362    // These must be flagged before the entity resolution loop, which would otherwise silently
363    // pick ours and also include theirs as an unmatched entity.
364    let mut rename_conflict_ids: HashSet<String> = HashSet::new();
365    for (base_id, ours_new_id) in &base_to_ours_rename {
366        if let Some(theirs_new_id) = base_to_theirs_rename.get(base_id) {
367            if ours_new_id != theirs_new_id {
368                rename_conflict_ids.insert(base_id.clone());
369            }
370        }
371    }
372
373    for entity_id in &all_entity_ids {
374        // Handle rename/rename conflicts: both branches renamed this base entity differently
375        if rename_conflict_ids.contains(entity_id) {
376            let ours_new_id = &base_to_ours_rename[entity_id];
377            let theirs_new_id = &base_to_theirs_rename[entity_id];
378            let base_entity = base_entity_map.get(entity_id.as_str());
379            let ours_entity = ours_entity_map.get(ours_new_id.as_str());
380            let theirs_entity = theirs_entity_map.get(theirs_new_id.as_str());
381            let base_name = base_entity.map(|e| e.name.as_str()).unwrap_or(entity_id);
382            let ours_name = ours_entity.map(|e| e.name.as_str()).unwrap_or(ours_new_id);
383            let theirs_name = theirs_entity.map(|e| e.name.as_str()).unwrap_or(theirs_new_id);
384
385            let base_rc = base_entity.map(|e| base_region_content.get(e.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| e.content.clone()));
386            let ours_rc = ours_entity.map(|e| ours_region_content.get(e.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| e.content.clone()));
387            let theirs_rc = theirs_entity.map(|e| theirs_region_content.get(e.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| e.content.clone()));
388
389            stats.entities_conflicted += 1;
390            let conflict = EntityConflict {
391                entity_name: base_name.to_string(),
392                entity_type: base_entity.map(|e| e.entity_type.clone()).unwrap_or_default(),
393                kind: ConflictKind::RenameRename {
394                    base_name: base_name.to_string(),
395                    ours_name: ours_name.to_string(),
396                    theirs_name: theirs_name.to_string(),
397                },
398                complexity: crate::conflict::ConflictComplexity::Syntax,
399                ours_content: ours_rc,
400                theirs_content: theirs_rc,
401                base_content: base_rc,
402            };
403            conflicts.push(conflict.clone());
404            audit.push(EntityAudit {
405                name: base_name.to_string(),
406                entity_type: base_entity.map(|e| e.entity_type.clone()).unwrap_or_default(),
407                resolution: ResolutionStrategy::ConflictRenameRename,
408            });
409            let resolution = ResolvedEntity::Conflict(conflict);
410            resolved_entities.insert(entity_id.clone(), resolution.clone());
411            resolved_entities.insert(ours_new_id.clone(), resolution);
412            // Mark theirs renamed ID as Deleted so reconstruct doesn't emit the conflict twice
413            // (once from ours skeleton, once from theirs-only insertion)
414            resolved_entities.insert(theirs_new_id.clone(), ResolvedEntity::Deleted);
415            continue;
416        }
417
418        let in_base = base_entity_map.get(entity_id.as_str());
419        // Follow rename chains: if base entity was renamed in ours/theirs, use renamed version
420        let ours_id = base_to_ours_rename.get(entity_id.as_str()).map(|s| s.as_str()).unwrap_or(entity_id.as_str());
421        let theirs_id = base_to_theirs_rename.get(entity_id.as_str()).map(|s| s.as_str()).unwrap_or(entity_id.as_str());
422        let in_ours = ours_entity_map.get(ours_id).or_else(|| ours_entity_map.get(entity_id.as_str()));
423        let in_theirs = theirs_entity_map.get(theirs_id).or_else(|| theirs_entity_map.get(entity_id.as_str()));
424
425        let ours_change = ours_change_map.get(entity_id);
426        let theirs_change = theirs_change_map.get(entity_id);
427
428        let (resolution, strategy) = resolve_entity(
429            entity_id,
430            in_base,
431            in_ours,
432            in_theirs,
433            ours_change,
434            theirs_change,
435            &base_region_content,
436            &ours_region_content,
437            &theirs_region_content,
438            &base_all,
439            &ours_all,
440            &theirs_all,
441            &mut stats,
442            marker_format,
443        );
444
445        // Build audit entry from entity info
446        let entity_name = in_ours.map(|e| e.name.as_str())
447            .or_else(|| in_theirs.map(|e| e.name.as_str()))
448            .or_else(|| in_base.map(|e| e.name.as_str()))
449            .unwrap_or(entity_id)
450            .to_string();
451        let entity_type = in_ours.map(|e| e.entity_type.as_str())
452            .or_else(|| in_theirs.map(|e| e.entity_type.as_str()))
453            .or_else(|| in_base.map(|e| e.entity_type.as_str()))
454            .unwrap_or("")
455            .to_string();
456        audit.push(EntityAudit {
457            name: entity_name,
458            entity_type,
459            resolution: strategy,
460        });
461
462        match &resolution {
463            ResolvedEntity::Conflict(ref c) => conflicts.push(c.clone()),
464            ResolvedEntity::ScopedConflict { conflict, .. } => conflicts.push(conflict.clone()),
465            _ => {}
466        }
467
468        resolved_entities.insert(entity_id.clone(), resolution.clone());
469        // Also store under renamed IDs so reconstruct can find them
470        if let Some(ours_renamed_id) = base_to_ours_rename.get(entity_id.as_str()) {
471            resolved_entities.insert(ours_renamed_id.clone(), resolution.clone());
472        }
473        if let Some(theirs_renamed_id) = base_to_theirs_rename.get(entity_id.as_str()) {
474            resolved_entities.insert(theirs_renamed_id.clone(), resolution);
475        }
476    }
477
478    // Merge interstitial regions
479    let (merged_interstitials, interstitial_conflicts) =
480        merge_interstitials(&base_regions, &ours_regions, &theirs_regions, marker_format);
481    stats.entities_conflicted += interstitial_conflicts.len();
482    conflicts.extend(interstitial_conflicts);
483
484    // Reconstruct the file
485    let content = reconstruct(
486        &ours_regions,
487        &theirs_regions,
488        &theirs_entities,
489        &ours_entity_map,
490        &resolved_entities,
491        &merged_interstitials,
492        marker_format,
493    );
494
495    // Post-merge cleanup: remove duplicate lines and normalize blank lines
496    let content = post_merge_cleanup(&content);
497
498    // Post-merge parse validation: verify the merged result still parses correctly
499    // (MergeBot-inspired safety check — catch syntactically broken merges)
500    let mut warnings = vec![];
501    if conflicts.is_empty() && stats.entities_both_changed_merged > 0 {
502        let merged_entities = plugin.extract_entities(&content, file_path);
503        if merged_entities.is_empty() && !content.trim().is_empty() {
504            warnings.push(crate::validate::SemanticWarning {
505                entity_name: "(file)".to_string(),
506                entity_type: "file".to_string(),
507                file_path: file_path.to_string(),
508                kind: crate::validate::WarningKind::ParseFailedAfterMerge,
509                related: vec![],
510            });
511        }
512    }
513
514    let entity_result = MergeResult {
515        content,
516        conflicts,
517        warnings,
518        stats: stats.clone(),
519        audit,
520    };
521
522    // Floor: never produce more conflict markers than git merge-file.
523    // Entity merge can split one git conflict into multiple per-entity conflicts,
524    // or interstitial merges can produce conflicts not tracked in the conflicts vec.
525    let entity_markers = entity_result.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
526    if entity_markers > 0 {
527        let git_result = git_merge_file(base, ours, theirs, &mut stats);
528        let git_markers = git_result.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
529        if entity_markers > git_markers {
530            return git_result;
531        }
532    }
533
534    // Safety net: detect silent data loss from inner merge.
535    // If the merged result is significantly shorter than both inputs and has no
536    // conflict markers, the inner merge likely garbled content. Fall back to git.
537    if entity_markers == 0 {
538        let merged_len = entity_result.content.len();
539        let min_input_len = ours.len().min(theirs.len());
540        // If result is less than 80% of the shorter input, something went wrong
541        if min_input_len > 200 && merged_len < min_input_len * 80 / 100 {
542            return git_merge_file(base, ours, theirs, &mut stats);
543        }
544    }
545
546    entity_result
547}
548
549fn resolve_entity(
550    _entity_id: &str,
551    in_base: Option<&&SemanticEntity>,
552    in_ours: Option<&&SemanticEntity>,
553    in_theirs: Option<&&SemanticEntity>,
554    _ours_change: Option<&ChangeType>,
555    _theirs_change: Option<&ChangeType>,
556    base_region_content: &HashMap<&str, &str>,
557    ours_region_content: &HashMap<&str, &str>,
558    theirs_region_content: &HashMap<&str, &str>,
559    base_all: &[SemanticEntity],
560    ours_all: &[SemanticEntity],
561    theirs_all: &[SemanticEntity],
562    stats: &mut MergeStats,
563    marker_format: &MarkerFormat,
564) -> (ResolvedEntity, ResolutionStrategy) {
565    // Helper: get region content (from file lines) for an entity, falling back to entity.content
566    let region_content = |entity: &SemanticEntity, map: &HashMap<&str, &str>| -> String {
567        map.get(entity.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| entity.content.clone())
568    };
569
570    match (in_base, in_ours, in_theirs) {
571        // Entity exists in all three versions
572        (Some(base), Some(ours), Some(theirs)) => {
573            // Check modification status via structural hash AND region content.
574            // Region content may differ even when structural hash is the same
575            // (e.g., doc comment added/changed but function body unchanged).
576            let base_rc_lazy = || region_content(base, base_region_content);
577            let ours_rc_lazy = || region_content(ours, ours_region_content);
578            let theirs_rc_lazy = || region_content(theirs, theirs_region_content);
579
580            let ours_modified = ours.content_hash != base.content_hash
581                || ours_rc_lazy() != base_rc_lazy();
582            let theirs_modified = theirs.content_hash != base.content_hash
583                || theirs_rc_lazy() != base_rc_lazy();
584
585            match (ours_modified, theirs_modified) {
586                (false, false) => {
587                    // Neither changed
588                    stats.entities_unchanged += 1;
589                    (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::Unchanged)
590                }
591                (true, false) => {
592                    // Only ours changed
593                    stats.entities_ours_only += 1;
594                    (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::OursOnly)
595                }
596                (false, true) => {
597                    // Only theirs changed
598                    stats.entities_theirs_only += 1;
599                    (ResolvedEntity::Clean(entity_to_region_with_content(theirs, &region_content(theirs, theirs_region_content))), ResolutionStrategy::TheirsOnly)
600                }
601                (true, true) => {
602                    // Both changed — try intra-entity merge
603                    if ours.content_hash == theirs.content_hash {
604                        // Same change in both — take ours
605                        stats.entities_both_changed_merged += 1;
606                        (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::ContentEqual)
607                    } else {
608                        // Try diffy 3-way merge on region content (preserves full syntax)
609                        let base_rc = region_content(base, base_region_content);
610                        let ours_rc = region_content(ours, ours_region_content);
611                        let theirs_rc = region_content(theirs, theirs_region_content);
612
613                        // Whitespace-aware shortcut: if one side only changed
614                        // whitespace/formatting, take the other side's content changes.
615                        // This handles the common case where one agent reformats while
616                        // another makes semantic changes.
617                        if is_whitespace_only_diff(&base_rc, &ours_rc) {
618                            stats.entities_theirs_only += 1;
619                            return (ResolvedEntity::Clean(entity_to_region_with_content(theirs, &theirs_rc)), ResolutionStrategy::TheirsOnly);
620                        }
621                        if is_whitespace_only_diff(&base_rc, &theirs_rc) {
622                            stats.entities_ours_only += 1;
623                            return (ResolvedEntity::Clean(entity_to_region_with_content(ours, &ours_rc)), ResolutionStrategy::OursOnly);
624                        }
625
626                        match diffy_merge(&base_rc, &ours_rc, &theirs_rc) {
627                            Some(merged) => {
628                                stats.entities_both_changed_merged += 1;
629                                stats.resolved_via_diffy += 1;
630                                (ResolvedEntity::Clean(EntityRegion {
631                                    entity_id: ours.id.clone(),
632                                    entity_name: ours.name.clone(),
633                                    entity_type: ours.entity_type.clone(),
634                                    content: merged,
635                                    start_line: ours.start_line,
636                                    end_line: ours.end_line,
637                                }), ResolutionStrategy::DiffyMerged)
638                            }
639                            None => {
640                                // Strategy 1: decorator/annotation-aware merge
641                                // Decorators are unordered annotations — merge them commutatively
642                                if let Some(merged) = try_decorator_aware_merge(&base_rc, &ours_rc, &theirs_rc) {
643                                    stats.entities_both_changed_merged += 1;
644                                    stats.resolved_via_diffy += 1;
645                                    return (ResolvedEntity::Clean(EntityRegion {
646                                        entity_id: ours.id.clone(),
647                                        entity_name: ours.name.clone(),
648                                        entity_type: ours.entity_type.clone(),
649                                        content: merged,
650                                        start_line: ours.start_line,
651                                        end_line: ours.end_line,
652                                    }), ResolutionStrategy::DecoratorMerged);
653                                }
654
655                                // Strategy 2: inner entity merge for container types
656                                // (LastMerge insight: class members are unordered children)
657                                if is_container_entity_type(&ours.entity_type) {
658                                    let base_children = in_base
659                                        .map(|b| get_child_entities(b, base_all))
660                                        .unwrap_or_default();
661                                    let ours_children = get_child_entities(ours, ours_all);
662                                    let theirs_children = in_theirs
663                                        .map(|t| get_child_entities(t, theirs_all))
664                                        .unwrap_or_default();
665                                    let base_start = in_base.map(|b| b.start_line).unwrap_or(1);
666                                    let ours_start = ours.start_line;
667                                    let theirs_start = in_theirs.map(|t| t.start_line).unwrap_or(1);
668                                    if let Some(inner) = try_inner_entity_merge(
669                                        &base_rc, &ours_rc, &theirs_rc,
670                                        &base_children, &ours_children, &theirs_children,
671                                        base_start, ours_start, theirs_start,
672                                        marker_format,
673                                    ) {
674                                        if inner.has_conflicts {
675                                            // Inner merge produced per-member conflicts:
676                                            // content has scoped markers for just the conflicted
677                                            // members; clean members are merged normally.
678                                            stats.entities_conflicted += 1;
679                                            stats.resolved_via_inner_merge += 1;
680                                            let complexity = classify_conflict(Some(&base_rc), Some(&ours_rc), Some(&theirs_rc));
681                                            return (ResolvedEntity::ScopedConflict {
682                                                content: inner.content,
683                                                conflict: EntityConflict {
684                                                    entity_name: ours.name.clone(),
685                                                    entity_type: ours.entity_type.clone(),
686                                                    kind: ConflictKind::BothModified,
687                                                    complexity,
688                                                    ours_content: Some(ours_rc),
689                                                    theirs_content: Some(theirs_rc),
690                                                    base_content: Some(base_rc),
691                                                },
692                                            }, ResolutionStrategy::InnerMerged);
693                                        } else {
694                                            stats.entities_both_changed_merged += 1;
695                                            stats.resolved_via_inner_merge += 1;
696                                            return (ResolvedEntity::Clean(EntityRegion {
697                                                entity_id: ours.id.clone(),
698                                                entity_name: ours.name.clone(),
699                                                entity_type: ours.entity_type.clone(),
700                                                content: inner.content,
701                                                start_line: ours.start_line,
702                                                end_line: ours.end_line,
703                                            }), ResolutionStrategy::InnerMerged);
704                                        }
705                                    }
706                                }
707                                stats.entities_conflicted += 1;
708                                let complexity = classify_conflict(Some(&base_rc), Some(&ours_rc), Some(&theirs_rc));
709                                (ResolvedEntity::Conflict(EntityConflict {
710                                    entity_name: ours.name.clone(),
711                                    entity_type: ours.entity_type.clone(),
712                                    kind: ConflictKind::BothModified,
713                                    complexity,
714                                    ours_content: Some(ours_rc),
715                                    theirs_content: Some(theirs_rc),
716                                    base_content: Some(base_rc),
717                                }), ResolutionStrategy::ConflictBothModified)
718                            }
719                        }
720                    }
721                }
722            }
723        }
724
725        // Entity in base and ours, but not theirs → theirs deleted it
726        (Some(_base), Some(ours), None) => {
727            let ours_modified = ours.content_hash != _base.content_hash;
728            if ours_modified {
729                // Modify/delete conflict
730                stats.entities_conflicted += 1;
731                let ours_rc = region_content(ours, ours_region_content);
732                let base_rc = region_content(_base, base_region_content);
733                let complexity = classify_conflict(Some(&base_rc), Some(&ours_rc), None);
734                (ResolvedEntity::Conflict(EntityConflict {
735                    entity_name: ours.name.clone(),
736                    entity_type: ours.entity_type.clone(),
737                    kind: ConflictKind::ModifyDelete {
738                        modified_in_ours: true,
739                    },
740                    complexity,
741                    ours_content: Some(ours_rc),
742                    theirs_content: None,
743                    base_content: Some(base_rc),
744                }), ResolutionStrategy::ConflictModifyDelete)
745            } else {
746                // Theirs deleted, ours unchanged → accept deletion
747                stats.entities_deleted += 1;
748                (ResolvedEntity::Deleted, ResolutionStrategy::Deleted)
749            }
750        }
751
752        // Entity in base and theirs, but not ours → ours deleted it
753        (Some(_base), None, Some(theirs)) => {
754            let theirs_modified = theirs.content_hash != _base.content_hash;
755            if theirs_modified {
756                // Modify/delete conflict
757                stats.entities_conflicted += 1;
758                let theirs_rc = region_content(theirs, theirs_region_content);
759                let base_rc = region_content(_base, base_region_content);
760                let complexity = classify_conflict(Some(&base_rc), None, Some(&theirs_rc));
761                (ResolvedEntity::Conflict(EntityConflict {
762                    entity_name: theirs.name.clone(),
763                    entity_type: theirs.entity_type.clone(),
764                    kind: ConflictKind::ModifyDelete {
765                        modified_in_ours: false,
766                    },
767                    complexity,
768                    ours_content: None,
769                    theirs_content: Some(theirs_rc),
770                    base_content: Some(base_rc),
771                }), ResolutionStrategy::ConflictModifyDelete)
772            } else {
773                // Ours deleted, theirs unchanged → accept deletion
774                stats.entities_deleted += 1;
775                (ResolvedEntity::Deleted, ResolutionStrategy::Deleted)
776            }
777        }
778
779        // Entity only in ours (added by ours)
780        (None, Some(ours), None) => {
781            stats.entities_added_ours += 1;
782            (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::AddedOurs)
783        }
784
785        // Entity only in theirs (added by theirs)
786        (None, None, Some(theirs)) => {
787            stats.entities_added_theirs += 1;
788            (ResolvedEntity::Clean(entity_to_region_with_content(theirs, &region_content(theirs, theirs_region_content))), ResolutionStrategy::AddedTheirs)
789        }
790
791        // Entity in both ours and theirs but not base (both added)
792        (None, Some(ours), Some(theirs)) => {
793            if ours.content_hash == theirs.content_hash {
794                // Same content added by both → take ours
795                stats.entities_added_ours += 1;
796                (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::ContentEqual)
797            } else {
798                // Different content → conflict
799                stats.entities_conflicted += 1;
800                let ours_rc = region_content(ours, ours_region_content);
801                let theirs_rc = region_content(theirs, theirs_region_content);
802                let complexity = classify_conflict(None, Some(&ours_rc), Some(&theirs_rc));
803                (ResolvedEntity::Conflict(EntityConflict {
804                    entity_name: ours.name.clone(),
805                    entity_type: ours.entity_type.clone(),
806                    kind: ConflictKind::BothAdded,
807                    complexity,
808                    ours_content: Some(ours_rc),
809                    theirs_content: Some(theirs_rc),
810                    base_content: None,
811                }), ResolutionStrategy::ConflictBothAdded)
812            }
813        }
814
815        // Entity only in base (deleted by both)
816        (Some(_), None, None) => {
817            stats.entities_deleted += 1;
818            (ResolvedEntity::Deleted, ResolutionStrategy::Deleted)
819        }
820
821        // Should not happen
822        (None, None, None) => (ResolvedEntity::Deleted, ResolutionStrategy::Deleted),
823    }
824}
825
826fn entity_to_region_with_content(entity: &SemanticEntity, content: &str) -> EntityRegion {
827    EntityRegion {
828        entity_id: entity.id.clone(),
829        entity_name: entity.name.clone(),
830        entity_type: entity.entity_type.clone(),
831        content: content.to_string(),
832        start_line: entity.start_line,
833        end_line: entity.end_line,
834    }
835}
836
837/// Build a map from entity_id to region content (from file lines).
838/// This preserves surrounding syntax (like `export`) that sem-core's entity.content may strip.
839/// Returns borrowed references since regions live for the merge duration.
840fn build_region_content_map(regions: &[FileRegion]) -> HashMap<&str, &str> {
841    regions
842        .iter()
843        .filter_map(|r| match r {
844            FileRegion::Entity(e) => Some((e.entity_id.as_str(), e.content.as_str())),
845            _ => None,
846        })
847        .collect()
848}
849
850/// Check if the only differences between two strings are whitespace changes.
851/// This includes: indentation changes, trailing whitespace, blank line additions/removals.
852fn is_whitespace_only_diff(a: &str, b: &str) -> bool {
853    if a == b {
854        return true; // identical, not really a "whitespace-only diff" but safe
855    }
856    let a_normalized: Vec<&str> = a.lines().map(|l| l.trim()).filter(|l| !l.is_empty()).collect();
857    let b_normalized: Vec<&str> = b.lines().map(|l| l.trim()).filter(|l| !l.is_empty()).collect();
858    a_normalized == b_normalized
859}
860
861/// Check if a line is a decorator or annotation.
862/// Covers Python (@decorator), Java/TS (@Annotation), and comment-style annotations.
863fn is_decorator_line(line: &str) -> bool {
864    let trimmed = line.trim();
865    trimmed.starts_with('@')
866        && !trimmed.starts_with("@param")
867        && !trimmed.starts_with("@return")
868        && !trimmed.starts_with("@type")
869        && !trimmed.starts_with("@see")
870}
871
872/// Split content into (decorators, body) where decorators are leading @-prefixed lines.
873fn split_decorators(content: &str) -> (Vec<&str>, &str) {
874    let mut decorator_end = 0;
875    let mut byte_offset = 0;
876    for line in content.lines() {
877        if is_decorator_line(line) || line.trim().is_empty() {
878            decorator_end += 1;
879            byte_offset += line.len() + 1; // +1 for newline
880        } else {
881            break;
882        }
883    }
884    // Trim trailing empty lines from decorator section
885    let lines: Vec<&str> = content.lines().collect();
886    while decorator_end > 0 && lines.get(decorator_end - 1).map_or(false, |l| l.trim().is_empty()) {
887        byte_offset -= lines[decorator_end - 1].len() + 1;
888        decorator_end -= 1;
889    }
890    let decorators: Vec<&str> = lines[..decorator_end]
891        .iter()
892        .filter(|l| is_decorator_line(l))
893        .copied()
894        .collect();
895    let body = &content[byte_offset.min(content.len())..];
896    (decorators, body)
897}
898
899/// Try decorator-aware merge: when both sides add different decorators/annotations,
900/// merge them commutatively (like imports). Also try merging the bodies separately.
901///
902/// This handles the common pattern where one agent adds @cache and another adds @deprecated
903/// to the same function — they should both be preserved.
904fn try_decorator_aware_merge(base: &str, ours: &str, theirs: &str) -> Option<String> {
905    let (base_decorators, base_body) = split_decorators(base);
906    let (ours_decorators, ours_body) = split_decorators(ours);
907    let (theirs_decorators, theirs_body) = split_decorators(theirs);
908
909    // Only useful if at least one side has decorators
910    if ours_decorators.is_empty() && theirs_decorators.is_empty() {
911        return None;
912    }
913
914    // Merge bodies using diffy (or take unchanged side)
915    let merged_body = if base_body == ours_body && base_body == theirs_body {
916        base_body.to_string()
917    } else if base_body == ours_body {
918        theirs_body.to_string()
919    } else if base_body == theirs_body {
920        ours_body.to_string()
921    } else {
922        // Both changed body — try diffy on just the body
923        diffy_merge(base_body, ours_body, theirs_body)?
924    };
925
926    // Merge decorators commutatively (set union)
927    let base_set: HashSet<&str> = base_decorators.iter().copied().collect();
928    let ours_set: HashSet<&str> = ours_decorators.iter().copied().collect();
929    let theirs_set: HashSet<&str> = theirs_decorators.iter().copied().collect();
930
931    // Deletions
932    let ours_deleted: HashSet<&str> = base_set.difference(&ours_set).copied().collect();
933    let theirs_deleted: HashSet<&str> = base_set.difference(&theirs_set).copied().collect();
934
935    // Start with base decorators, remove deletions
936    let mut merged_decorators: Vec<&str> = base_decorators
937        .iter()
938        .filter(|d| !ours_deleted.contains(**d) && !theirs_deleted.contains(**d))
939        .copied()
940        .collect();
941
942    // Add new decorators from ours (not in base)
943    for d in &ours_decorators {
944        if !base_set.contains(d) && !merged_decorators.contains(d) {
945            merged_decorators.push(d);
946        }
947    }
948    // Add new decorators from theirs (not in base, not already added)
949    for d in &theirs_decorators {
950        if !base_set.contains(d) && !merged_decorators.contains(d) {
951            merged_decorators.push(d);
952        }
953    }
954
955    // Reconstruct
956    let mut result = String::new();
957    for d in &merged_decorators {
958        result.push_str(d);
959        result.push('\n');
960    }
961    result.push_str(&merged_body);
962
963    Some(result)
964}
965
966/// Try 3-way merge on text using diffy. Returns None if there are conflicts.
967fn diffy_merge(base: &str, ours: &str, theirs: &str) -> Option<String> {
968    let result = diffy::merge(base, ours, theirs);
969    match result {
970        Ok(merged) => Some(merged),
971        Err(_conflicted) => None,
972    }
973}
974
975/// Try 3-way merge using git merge-file. Returns None on conflict or error.
976/// This uses a different diff algorithm than diffy and can sometimes merge
977/// cases that diffy cannot (and vice versa).
978fn git_merge_string(base: &str, ours: &str, theirs: &str) -> Option<String> {
979    let dir = tempfile::tempdir().ok()?;
980    let base_path = dir.path().join("base");
981    let ours_path = dir.path().join("ours");
982    let theirs_path = dir.path().join("theirs");
983
984    std::fs::write(&base_path, base).ok()?;
985    std::fs::write(&ours_path, ours).ok()?;
986    std::fs::write(&theirs_path, theirs).ok()?;
987
988    let output = Command::new("git")
989        .arg("merge-file")
990        .arg("-p")
991        .arg(&ours_path)
992        .arg(&base_path)
993        .arg(&theirs_path)
994        .output()
995        .ok()?;
996
997    if output.status.success() {
998        String::from_utf8(output.stdout).ok()
999    } else {
1000        None
1001    }
1002}
1003
1004/// Merge interstitial regions from all three versions.
1005/// Uses commutative (set-based) merge for import blocks — inspired by
1006/// LastMerge/Mergiraf's "unordered children" concept.
1007/// Falls back to line-level 3-way merge for non-import content.
1008fn merge_interstitials(
1009    base_regions: &[FileRegion],
1010    ours_regions: &[FileRegion],
1011    theirs_regions: &[FileRegion],
1012    marker_format: &MarkerFormat,
1013) -> (HashMap<String, String>, Vec<EntityConflict>) {
1014    let base_map: HashMap<&str, &str> = base_regions
1015        .iter()
1016        .filter_map(|r| match r {
1017            FileRegion::Interstitial(i) => Some((i.position_key.as_str(), i.content.as_str())),
1018            _ => None,
1019        })
1020        .collect();
1021
1022    let ours_map: HashMap<&str, &str> = ours_regions
1023        .iter()
1024        .filter_map(|r| match r {
1025            FileRegion::Interstitial(i) => Some((i.position_key.as_str(), i.content.as_str())),
1026            _ => None,
1027        })
1028        .collect();
1029
1030    let theirs_map: HashMap<&str, &str> = theirs_regions
1031        .iter()
1032        .filter_map(|r| match r {
1033            FileRegion::Interstitial(i) => Some((i.position_key.as_str(), i.content.as_str())),
1034            _ => None,
1035        })
1036        .collect();
1037
1038    let mut all_keys: HashSet<&str> = HashSet::new();
1039    all_keys.extend(base_map.keys());
1040    all_keys.extend(ours_map.keys());
1041    all_keys.extend(theirs_map.keys());
1042
1043    let mut merged: HashMap<String, String> = HashMap::new();
1044    let mut interstitial_conflicts: Vec<EntityConflict> = Vec::new();
1045
1046    for key in all_keys {
1047        let base_content = base_map.get(key).copied().unwrap_or("");
1048        let ours_content = ours_map.get(key).copied().unwrap_or("");
1049        let theirs_content = theirs_map.get(key).copied().unwrap_or("");
1050
1051        // If all same, no merge needed
1052        if ours_content == theirs_content {
1053            merged.insert(key.to_string(), ours_content.to_string());
1054        } else if base_content == ours_content {
1055            merged.insert(key.to_string(), theirs_content.to_string());
1056        } else if base_content == theirs_content {
1057            merged.insert(key.to_string(), ours_content.to_string());
1058        } else {
1059            // Both changed — check if this is an import-heavy region
1060            if is_import_region(base_content)
1061                || is_import_region(ours_content)
1062                || is_import_region(theirs_content)
1063            {
1064                // Commutative merge: treat import lines as a set
1065                let result = merge_imports_commutatively(base_content, ours_content, theirs_content);
1066                merged.insert(key.to_string(), result);
1067            } else {
1068                // Regular line-level merge
1069                match diffy::merge(base_content, ours_content, theirs_content) {
1070                    Ok(m) => {
1071                        merged.insert(key.to_string(), m);
1072                    }
1073                    Err(_conflicted) => {
1074                        // Create a proper conflict instead of silently embedding
1075                        // raw conflict markers into the output.
1076                        let complexity = classify_conflict(
1077                            Some(base_content),
1078                            Some(ours_content),
1079                            Some(theirs_content),
1080                        );
1081                        let conflict = EntityConflict {
1082                            entity_name: key.to_string(),
1083                            entity_type: "interstitial".to_string(),
1084                            kind: ConflictKind::BothModified,
1085                            complexity,
1086                            ours_content: Some(ours_content.to_string()),
1087                            theirs_content: Some(theirs_content.to_string()),
1088                            base_content: Some(base_content.to_string()),
1089                        };
1090                        merged.insert(key.to_string(), conflict.to_conflict_markers(marker_format));
1091                        interstitial_conflicts.push(conflict);
1092                    }
1093                }
1094            }
1095        }
1096    }
1097
1098    (merged, interstitial_conflicts)
1099}
1100
1101/// Check if a region is predominantly import/use statements.
1102/// Handles both single-line imports and multi-line import blocks
1103/// (e.g. `import { type a, type b } from "..."` spread across lines).
1104fn is_import_region(content: &str) -> bool {
1105    let lines: Vec<&str> = content
1106        .lines()
1107        .filter(|l| !l.trim().is_empty())
1108        .collect();
1109    if lines.is_empty() {
1110        return false;
1111    }
1112    let mut import_count = 0;
1113    let mut in_multiline_import = false;
1114    for line in &lines {
1115        if in_multiline_import {
1116            import_count += 1;
1117            let trimmed = line.trim();
1118            if trimmed.starts_with('}') || trimmed.ends_with(')') {
1119                in_multiline_import = false;
1120            }
1121        } else if is_import_line(line) {
1122            import_count += 1;
1123            let trimmed = line.trim();
1124            // Detect start of multi-line import: `import {` or `import (` without closing on same line
1125            if (trimmed.contains('{') && !trimmed.contains('}'))
1126                || (trimmed.starts_with("import (") && !trimmed.contains(')'))
1127            {
1128                in_multiline_import = true;
1129            }
1130        }
1131    }
1132    // If >50% of non-empty lines are imports, treat as import region
1133    import_count * 2 > lines.len()
1134}
1135
1136/// Post-merge cleanup: remove consecutive duplicate lines and normalize blank lines.
1137///
1138/// Fixes two classes of merge artifacts:
1139/// 1. Duplicate lines/blocks that appear when both sides add the same content
1140///    (e.g. duplicate typedefs, forward declarations)
1141/// 2. Missing blank lines between entities or declarations, and excessive
1142///    blank lines (3+ consecutive) collapsed to 2
1143fn post_merge_cleanup(content: &str) -> String {
1144    let lines: Vec<&str> = content.lines().collect();
1145    let mut result: Vec<&str> = Vec::with_capacity(lines.len());
1146
1147    // Pass 1: Remove consecutive duplicate lines that look like declarations or imports.
1148    // Only dedup lines that are plausibly merge artifacts (imports, exports, forward decls).
1149    // Preserve intentional duplicates like repeated assertions, assignments, or data lines.
1150    for line in &lines {
1151        if line.trim().is_empty() {
1152            result.push(line);
1153            continue;
1154        }
1155        if let Some(prev) = result.last() {
1156            if !prev.trim().is_empty() && *prev == *line && looks_like_declaration(line) {
1157                continue; // skip consecutive exact duplicate of declaration-like line
1158            }
1159        }
1160        result.push(line);
1161    }
1162
1163    // Pass 2: Collapse 3+ consecutive blank lines to 2 (one separator blank line).
1164    let mut final_lines: Vec<&str> = Vec::with_capacity(result.len());
1165    let mut consecutive_blanks = 0;
1166    for line in &result {
1167        if line.trim().is_empty() {
1168            consecutive_blanks += 1;
1169            if consecutive_blanks <= 2 {
1170                final_lines.push(line);
1171            }
1172        } else {
1173            consecutive_blanks = 0;
1174            final_lines.push(line);
1175        }
1176    }
1177
1178    let mut out = final_lines.join("\n");
1179    if content.ends_with('\n') && !out.ends_with('\n') {
1180        out.push('\n');
1181    }
1182    out
1183}
1184
1185/// Check if a line looks like a declaration/import that merge might duplicate.
1186/// Returns false for lines that could be intentionally repeated (assertions,
1187/// assignments, data initializers, struct fields, etc.).
1188fn looks_like_declaration(line: &str) -> bool {
1189    let trimmed = line.trim();
1190    trimmed.starts_with("import ")
1191        || trimmed.starts_with("from ")
1192        || trimmed.starts_with("use ")
1193        || trimmed.starts_with("export ")
1194        || trimmed.starts_with("require(")
1195        || trimmed.starts_with("#include")
1196        || trimmed.starts_with("typedef ")
1197        || trimmed.starts_with("using ")
1198        || (trimmed.starts_with("pub ") && trimmed.contains("mod "))
1199}
1200
1201/// Check if a line is a top-level import/use/require statement.
1202///
1203/// Only matches unindented lines to avoid picking up conditional imports
1204/// inside `if TYPE_CHECKING:` blocks or similar constructs.
1205fn is_import_line(line: &str) -> bool {
1206    // Skip indented lines: these are inside conditional blocks (TYPE_CHECKING, etc.)
1207    if line.starts_with(' ') || line.starts_with('\t') {
1208        return false;
1209    }
1210    let trimmed = line.trim();
1211    trimmed.starts_with("import ")
1212        || trimmed.starts_with("from ")
1213        || trimmed.starts_with("use ")
1214        || trimmed.starts_with("require(")
1215        || trimmed.starts_with("const ") && trimmed.contains("require(")
1216        || trimmed.starts_with("package ")
1217        || trimmed.starts_with("#include ")
1218        || trimmed.starts_with("using ")
1219}
1220
1221/// A complete import statement (possibly multi-line) as a single unit.
1222#[derive(Debug, Clone)]
1223struct ImportStatement {
1224    /// The full text of the import (may span multiple lines)
1225    lines: Vec<String>,
1226    /// The source module (e.g. "./foo", "react", "std::io")
1227    source: String,
1228    /// For multi-line imports: the individual specifiers (e.g. ["type a", "type b"])
1229    specifiers: Vec<String>,
1230    /// Whether this is a multi-line import block
1231    is_multiline: bool,
1232}
1233
1234/// Parse content into import statements, handling multi-line imports as single units.
1235fn parse_import_statements(content: &str) -> (Vec<ImportStatement>, Vec<String>) {
1236    let mut imports: Vec<ImportStatement> = Vec::new();
1237    let mut non_import_lines: Vec<String> = Vec::new();
1238    let lines: Vec<&str> = content.lines().collect();
1239    let mut i = 0;
1240
1241    while i < lines.len() {
1242        let line = lines[i];
1243
1244        if line.trim().is_empty() {
1245            non_import_lines.push(line.to_string());
1246            i += 1;
1247            continue;
1248        }
1249
1250        if is_import_line(line) {
1251            let trimmed = line.trim();
1252            // Check for multi-line import: `import {` without `}` on same line
1253            let starts_multiline = (trimmed.contains('{') && !trimmed.contains('}'))
1254                || (trimmed.starts_with("import (") && !trimmed.contains(')'));
1255
1256            if starts_multiline {
1257                let mut block_lines = vec![line.to_string()];
1258                let mut specifiers = Vec::new();
1259                let close_char = if trimmed.contains('{') { '}' } else { ')' };
1260                i += 1;
1261
1262                // Collect lines until closing brace/paren
1263                while i < lines.len() {
1264                    let inner = lines[i];
1265                    block_lines.push(inner.to_string());
1266                    let inner_trimmed = inner.trim();
1267
1268                    if inner_trimmed.starts_with(close_char) {
1269                        // This is the closing line (e.g. `} from "./foo"`)
1270                        break;
1271                    } else if !inner_trimmed.is_empty() {
1272                        // This is a specifier line — strip trailing comma
1273                        let spec = inner_trimmed.trim_end_matches(',').trim().to_string();
1274                        if !spec.is_empty() {
1275                            specifiers.push(spec);
1276                        }
1277                    }
1278                    i += 1;
1279                }
1280
1281                let full_text = block_lines.join("\n");
1282                let source = import_source_prefix(&full_text).to_string();
1283                imports.push(ImportStatement {
1284                    lines: block_lines,
1285                    source,
1286                    specifiers,
1287                    is_multiline: true,
1288                });
1289            } else {
1290                // Single-line import
1291                let source = import_source_prefix(line).to_string();
1292                imports.push(ImportStatement {
1293                    lines: vec![line.to_string()],
1294                    source,
1295                    specifiers: Vec::new(),
1296                    is_multiline: false,
1297                });
1298            }
1299        } else {
1300            non_import_lines.push(line.to_string());
1301        }
1302        i += 1;
1303    }
1304
1305    (imports, non_import_lines)
1306}
1307
1308/// Merge import blocks commutatively (as unordered sets), preserving grouping.
1309///
1310/// Handles both single-line imports and multi-line import blocks.
1311/// For multi-line imports from the same source, merges specifiers as a set.
1312/// Single-line imports are merged as before: set union with deletions.
1313fn merge_imports_commutatively(base: &str, ours: &str, theirs: &str) -> String {
1314    let (base_imports, _) = parse_import_statements(base);
1315    let (ours_imports, _) = parse_import_statements(ours);
1316    let (theirs_imports, _) = parse_import_statements(theirs);
1317
1318    let has_multiline = base_imports.iter().any(|i| i.is_multiline)
1319        || ours_imports.iter().any(|i| i.is_multiline)
1320        || theirs_imports.iter().any(|i| i.is_multiline);
1321
1322    if has_multiline {
1323        return merge_imports_with_multiline(base, ours, theirs,
1324            &base_imports, &ours_imports, &theirs_imports);
1325    }
1326
1327    // Original single-line-only logic
1328    let base_lines: HashSet<&str> = base.lines().filter(|l| is_import_line(l)).collect();
1329    let ours_lines: HashSet<&str> = ours.lines().filter(|l| is_import_line(l)).collect();
1330
1331    let theirs_deleted: HashSet<&str> = base_lines.difference(
1332        &theirs.lines().filter(|l| is_import_line(l)).collect::<HashSet<&str>>()
1333    ).copied().collect();
1334
1335    let theirs_added: Vec<&str> = theirs
1336        .lines()
1337        .filter(|l| is_import_line(l) && !base_lines.contains(l) && !ours_lines.contains(l))
1338        .collect();
1339
1340    let mut groups: Vec<Vec<&str>> = Vec::new();
1341    let mut current_group: Vec<&str> = Vec::new();
1342
1343    for line in ours.lines() {
1344        if line.trim().is_empty() {
1345            if !current_group.is_empty() {
1346                groups.push(current_group);
1347                current_group = Vec::new();
1348            }
1349        } else if is_import_line(line) {
1350            if theirs_deleted.contains(line) {
1351                continue;
1352            }
1353            current_group.push(line);
1354        } else {
1355            current_group.push(line);
1356        }
1357    }
1358    if !current_group.is_empty() {
1359        groups.push(current_group);
1360    }
1361
1362    for add in &theirs_added {
1363        let prefix = import_source_prefix(add);
1364        let mut best_group = if groups.is_empty() { 0 } else { groups.len() - 1 };
1365        for (i, group) in groups.iter().enumerate() {
1366            if group.iter().any(|l| {
1367                is_import_line(l) && import_source_prefix(l) == prefix
1368            }) {
1369                best_group = i;
1370                break;
1371            }
1372        }
1373        if best_group < groups.len() {
1374            groups[best_group].push(add);
1375        } else {
1376            groups.push(vec![add]);
1377        }
1378    }
1379
1380    let mut result_lines: Vec<&str> = Vec::new();
1381    for (i, group) in groups.iter().enumerate() {
1382        if i > 0 {
1383            result_lines.push("");
1384        }
1385        result_lines.extend(group);
1386    }
1387
1388    let mut result = result_lines.join("\n");
1389    let ours_trailing = ours.len() - ours.trim_end_matches('\n').len();
1390    let result_trailing = result.len() - result.trim_end_matches('\n').len();
1391    for _ in result_trailing..ours_trailing {
1392        result.push('\n');
1393    }
1394    result
1395}
1396
1397/// Merge imports when multi-line import blocks are involved.
1398/// Matches imports by source module, merges specifiers as a set.
1399fn merge_imports_with_multiline(
1400    _base_raw: &str,
1401    ours_raw: &str,
1402    _theirs_raw: &str,
1403    base_imports: &[ImportStatement],
1404    ours_imports: &[ImportStatement],
1405    theirs_imports: &[ImportStatement],
1406) -> String {
1407    // Build source → specifier sets for base and theirs
1408    let base_specs: HashMap<&str, HashSet<&str>> = base_imports.iter().map(|imp| {
1409        let specs: HashSet<&str> = imp.specifiers.iter().map(|s| s.as_str()).collect();
1410        (imp.source.as_str(), specs)
1411    }).collect();
1412
1413    let theirs_specs: HashMap<&str, HashSet<&str>> = theirs_imports.iter().map(|imp| {
1414        let specs: HashSet<&str> = imp.specifiers.iter().map(|s| s.as_str()).collect();
1415        (imp.source.as_str(), specs)
1416    }).collect();
1417
1418    // Single-line import tracking: base lines and theirs-deleted
1419    let base_single: HashSet<String> = base_imports.iter()
1420        .filter(|i| !i.is_multiline)
1421        .map(|i| i.lines[0].clone())
1422        .collect();
1423    let theirs_single: HashSet<String> = theirs_imports.iter()
1424        .filter(|i| !i.is_multiline)
1425        .map(|i| i.lines[0].clone())
1426        .collect();
1427    let theirs_deleted_single: HashSet<&str> = base_single.iter()
1428        .filter(|l| !theirs_single.contains(l.as_str()))
1429        .map(|l| l.as_str())
1430        .collect();
1431
1432    // Process ours imports, merging in theirs specifiers
1433    let mut result_parts: Vec<String> = Vec::new();
1434    let mut handled_theirs_sources: HashSet<&str> = HashSet::new();
1435
1436    // Walk through ours_raw to preserve formatting (blank lines, comments)
1437    let lines: Vec<&str> = ours_raw.lines().collect();
1438    let mut i = 0;
1439    let mut ours_imp_idx = 0;
1440
1441    while i < lines.len() {
1442        let line = lines[i];
1443
1444        if line.trim().is_empty() {
1445            result_parts.push(line.to_string());
1446            i += 1;
1447            continue;
1448        }
1449
1450        if is_import_line(line) {
1451            let trimmed = line.trim();
1452            let starts_multiline = (trimmed.contains('{') && !trimmed.contains('}'))
1453                || (trimmed.starts_with("import (") && !trimmed.contains(')'));
1454
1455            if starts_multiline && ours_imp_idx < ours_imports.len() {
1456                let imp = &ours_imports[ours_imp_idx];
1457                // Find the matching import by source
1458                let source = imp.source.as_str();
1459                handled_theirs_sources.insert(source);
1460
1461                // Merge specifiers: ours + theirs additions - theirs deletions
1462                let base_spec_set = base_specs.get(source).cloned().unwrap_or_default();
1463                let theirs_spec_set = theirs_specs.get(source).cloned().unwrap_or_default();
1464                // Added by theirs: in theirs but not in base
1465                let theirs_added: HashSet<&str> = theirs_spec_set.difference(&base_spec_set).copied().collect();
1466                // Deleted by theirs: in base but not in theirs
1467                let theirs_removed: HashSet<&str> = base_spec_set.difference(&theirs_spec_set).copied().collect();
1468
1469                // Final set: ours (in original order) + theirs_added - theirs_removed
1470                let mut final_specs: Vec<&str> = imp.specifiers.iter()
1471                    .map(|s| s.as_str())
1472                    .filter(|s| !theirs_removed.contains(s))
1473                    .collect();
1474                for added in &theirs_added {
1475                    if !final_specs.contains(added) {
1476                        final_specs.push(added);
1477                    }
1478                }
1479
1480                // Detect indentation from the original block
1481                let indent = if imp.lines.len() > 1 {
1482                    let second = &imp.lines[1];
1483                    &second[..second.len() - second.trim_start().len()]
1484                } else {
1485                    "     "
1486                };
1487
1488                // Reconstruct multi-line import
1489                result_parts.push(imp.lines[0].clone()); // `import {`
1490                for spec in &final_specs {
1491                    result_parts.push(format!("{}{},", indent, spec));
1492                }
1493                // Closing line from ours
1494                if let Some(last) = imp.lines.last() {
1495                    result_parts.push(last.clone());
1496                }
1497
1498                // Skip past the original multi-line block in ours_raw
1499                let close_char = if trimmed.contains('{') { '}' } else { ')' };
1500                i += 1;
1501                while i < lines.len() {
1502                    if lines[i].trim().starts_with(close_char) {
1503                        i += 1;
1504                        break;
1505                    }
1506                    i += 1;
1507                }
1508                ours_imp_idx += 1;
1509                continue;
1510            } else {
1511                // Single-line import
1512                if ours_imp_idx < ours_imports.len() {
1513                    let imp = &ours_imports[ours_imp_idx];
1514                    handled_theirs_sources.insert(imp.source.as_str());
1515                    ours_imp_idx += 1;
1516                }
1517                // Check if theirs deleted this single-line import
1518                if !theirs_deleted_single.contains(line) {
1519                    result_parts.push(line.to_string());
1520                }
1521            }
1522        } else {
1523            result_parts.push(line.to_string());
1524        }
1525        i += 1;
1526    }
1527
1528    // Add any new imports from theirs that have new sources
1529    for imp in theirs_imports {
1530        if handled_theirs_sources.contains(imp.source.as_str()) {
1531            continue;
1532        }
1533        // Check if this source exists in base (if so, it was handled above)
1534        if base_specs.contains_key(imp.source.as_str()) {
1535            continue;
1536        }
1537        // Truly new import from theirs
1538        for line in &imp.lines {
1539            result_parts.push(line.clone());
1540        }
1541    }
1542
1543    let mut result = result_parts.join("\n");
1544    let ours_trailing = ours_raw.len() - ours_raw.trim_end_matches('\n').len();
1545    let result_trailing = result.len() - result.trim_end_matches('\n').len();
1546    for _ in result_trailing..ours_trailing {
1547        result.push('\n');
1548    }
1549    result
1550}
1551
1552/// Extract the source/module prefix from an import line for group matching.
1553/// e.g. "from collections import OrderedDict" -> "collections"
1554///      "import React from 'react'" -> "react"
1555///      "use std::collections::HashMap;" -> "std::collections"
1556fn import_source_prefix(line: &str) -> &str {
1557    // For multi-line imports, search all lines for the source module
1558    // (e.g. `} from "./foo"` on the closing line)
1559    for l in line.lines() {
1560        let trimmed = l.trim();
1561        // Python: "from X import Y" -> X
1562        if let Some(rest) = trimmed.strip_prefix("from ") {
1563            return rest.split_whitespace().next().unwrap_or("");
1564        }
1565        // JS/TS closing line: `} from 'Y'` or `} from "Y"`
1566        if trimmed.starts_with('}') && trimmed.contains("from ") {
1567            if let Some(quote_start) = trimmed.find(|c: char| c == '\'' || c == '"') {
1568                let after = &trimmed[quote_start + 1..];
1569                if let Some(quote_end) = after.find(|c: char| c == '\'' || c == '"') {
1570                    return &after[..quote_end];
1571                }
1572            }
1573        }
1574        // JS/TS: "import X from 'Y'" -> Y (between quotes)
1575        if trimmed.starts_with("import ") {
1576            if let Some(quote_start) = trimmed.find(|c: char| c == '\'' || c == '"') {
1577                let after = &trimmed[quote_start + 1..];
1578                if let Some(quote_end) = after.find(|c: char| c == '\'' || c == '"') {
1579                    return &after[..quote_end];
1580                }
1581            }
1582        }
1583        // Rust: "use X::Y;" -> X
1584        if let Some(rest) = trimmed.strip_prefix("use ") {
1585            return rest.split("::").next().unwrap_or("").trim_end_matches(';');
1586        }
1587    }
1588    line.trim()
1589}
1590
1591/// Fallback to line-level 3-way merge when entity extraction isn't possible.
1592///
1593/// Uses Sesame-inspired separator preprocessing (arXiv:2407.18888) to get
1594/// finer-grained alignment before line-level merge. Inserts newlines around
1595/// syntactic separators ({, }, ;) so that changes in different code blocks
1596/// align independently, reducing spurious conflicts.
1597///
1598/// Sesame expansion is skipped for data formats (JSON, YAML, TOML, lock files)
1599/// where `{`, `}`, `;` are structural content rather than code separators.
1600/// Expanding them destroys alignment and produces far more conflicts (confirmed
1601/// on GitButler: YAML went from 68 git markers to 192 weave markers with Sesame).
1602fn line_level_fallback(base: &str, ours: &str, theirs: &str, file_path: &str) -> MergeResult {
1603    let mut stats = MergeStats::default();
1604    stats.used_fallback = true;
1605
1606    // Skip Sesame preprocessing for data formats where {/}/; are content, not separators
1607    let skip = skip_sesame(file_path);
1608
1609    if skip {
1610        // Use git merge-file for data formats so we match git's output exactly.
1611        // diffy::merge uses a different diff algorithm that can produce more
1612        // conflict markers on structured data like lock files.
1613        return git_merge_file(base, ours, theirs, &mut stats);
1614    }
1615
1616    // Try Sesame expansion + diffy first, then compare against git merge-file.
1617    // Use whichever produces fewer conflict markers so we're never worse than git.
1618    let base_expanded = expand_separators(base);
1619    let ours_expanded = expand_separators(ours);
1620    let theirs_expanded = expand_separators(theirs);
1621
1622    let sesame_result = match diffy::merge(&base_expanded, &ours_expanded, &theirs_expanded) {
1623        Ok(merged) => {
1624            let content = collapse_separators(&merged, base);
1625            Some(MergeResult {
1626                content: post_merge_cleanup(&content),
1627                conflicts: vec![],
1628                warnings: vec![],
1629                stats: stats.clone(),
1630                audit: vec![],
1631            })
1632        }
1633        Err(_) => {
1634            // Sesame expansion conflicted, try plain diffy
1635            match diffy::merge(base, ours, theirs) {
1636                Ok(merged) => Some(MergeResult {
1637                    content: merged,
1638                    conflicts: vec![],
1639                    warnings: vec![],
1640                    stats: stats.clone(),
1641                    audit: vec![],
1642                }),
1643                Err(conflicted) => {
1644                    let _markers = conflicted.lines().filter(|l| l.starts_with("<<<<<<<")).count();
1645                    let mut s = stats.clone();
1646                    s.entities_conflicted = 1;
1647                    Some(MergeResult {
1648                        content: conflicted,
1649                        conflicts: vec![EntityConflict {
1650                            entity_name: "(file)".to_string(),
1651                            entity_type: "file".to_string(),
1652                            kind: ConflictKind::BothModified,
1653                            complexity: classify_conflict(Some(base), Some(ours), Some(theirs)),
1654                            ours_content: Some(ours.to_string()),
1655                            theirs_content: Some(theirs.to_string()),
1656                            base_content: Some(base.to_string()),
1657                        }],
1658                        warnings: vec![],
1659                        stats: s,
1660                        audit: vec![],
1661                    })
1662                }
1663            }
1664        }
1665    };
1666
1667    // Get git merge-file result as our floor
1668    let git_result = git_merge_file(base, ours, theirs, &mut stats);
1669
1670    // Compare: use sesame result only if it has fewer or equal markers
1671    match sesame_result {
1672        Some(sesame) if sesame.conflicts.is_empty() && !git_result.conflicts.is_empty() => {
1673            // Sesame resolved cleanly, git didn't: use sesame
1674            sesame
1675        }
1676        Some(sesame) if !sesame.conflicts.is_empty() && !git_result.conflicts.is_empty() => {
1677            // Both conflicted: use whichever has fewer markers
1678            let sesame_markers = sesame.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
1679            let git_markers = git_result.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
1680            if sesame_markers <= git_markers { sesame } else { git_result }
1681        }
1682        _ => git_result,
1683    }
1684}
1685
1686/// Shell out to `git merge-file` for an exact match with git's line-level merge.
1687///
1688/// We use this instead of `diffy::merge` for data formats (lock files, JSON, YAML, TOML)
1689/// where weave can't improve on git. `diffy` uses a different diff algorithm that can
1690/// produce more conflict markers on structured data (e.g. 22 markers vs git's 19 on uv.lock).
1691fn git_merge_file(base: &str, ours: &str, theirs: &str, stats: &mut MergeStats) -> MergeResult {
1692    let dir = match tempfile::tempdir() {
1693        Ok(d) => d,
1694        Err(_) => return diffy_fallback(base, ours, theirs, stats),
1695    };
1696
1697    let base_path = dir.path().join("base");
1698    let ours_path = dir.path().join("ours");
1699    let theirs_path = dir.path().join("theirs");
1700
1701    let write_ok = (|| -> std::io::Result<()> {
1702        std::fs::File::create(&base_path)?.write_all(base.as_bytes())?;
1703        std::fs::File::create(&ours_path)?.write_all(ours.as_bytes())?;
1704        std::fs::File::create(&theirs_path)?.write_all(theirs.as_bytes())?;
1705        Ok(())
1706    })();
1707
1708    if write_ok.is_err() {
1709        return diffy_fallback(base, ours, theirs, stats);
1710    }
1711
1712    // git merge-file writes result to the first file (ours) in place
1713    let output = Command::new("git")
1714        .arg("merge-file")
1715        .arg("-p") // print to stdout instead of modifying ours in place
1716        .arg("--diff3") // include ||||||| base section for jj compatibility
1717        .arg("-L").arg("ours")
1718        .arg("-L").arg("base")
1719        .arg("-L").arg("theirs")
1720        .arg(&ours_path)
1721        .arg(&base_path)
1722        .arg(&theirs_path)
1723        .output();
1724
1725    match output {
1726        Ok(result) => {
1727            let content = String::from_utf8_lossy(&result.stdout).into_owned();
1728            if result.status.success() {
1729                // Exit 0 = clean merge
1730                MergeResult {
1731                    content: post_merge_cleanup(&content),
1732                    conflicts: vec![],
1733                    warnings: vec![],
1734                    stats: stats.clone(),
1735                    audit: vec![],
1736                }
1737            } else {
1738                // Exit >0 = conflicts (exit code = number of conflicts)
1739                stats.entities_conflicted = 1;
1740                MergeResult {
1741                    content,
1742                    conflicts: vec![EntityConflict {
1743                        entity_name: "(file)".to_string(),
1744                        entity_type: "file".to_string(),
1745                        kind: ConflictKind::BothModified,
1746                        complexity: classify_conflict(Some(base), Some(ours), Some(theirs)),
1747                        ours_content: Some(ours.to_string()),
1748                        theirs_content: Some(theirs.to_string()),
1749                        base_content: Some(base.to_string()),
1750                    }],
1751                    warnings: vec![],
1752                    stats: stats.clone(),
1753                    audit: vec![],
1754                }
1755            }
1756        }
1757        // git not available, fall back to diffy
1758        Err(_) => diffy_fallback(base, ours, theirs, stats),
1759    }
1760}
1761
1762/// Fallback to diffy::merge when git merge-file is unavailable.
1763fn diffy_fallback(base: &str, ours: &str, theirs: &str, stats: &mut MergeStats) -> MergeResult {
1764    match diffy::merge(base, ours, theirs) {
1765        Ok(merged) => {
1766            let content = post_merge_cleanup(&merged);
1767            MergeResult {
1768                content,
1769                conflicts: vec![],
1770                warnings: vec![],
1771                stats: stats.clone(),
1772                audit: vec![],
1773            }
1774        }
1775        Err(conflicted) => {
1776            stats.entities_conflicted = 1;
1777            MergeResult {
1778                content: conflicted,
1779                conflicts: vec![EntityConflict {
1780                    entity_name: "(file)".to_string(),
1781                    entity_type: "file".to_string(),
1782                    kind: ConflictKind::BothModified,
1783                    complexity: classify_conflict(Some(base), Some(ours), Some(theirs)),
1784                    ours_content: Some(ours.to_string()),
1785                    theirs_content: Some(theirs.to_string()),
1786                    base_content: Some(base.to_string()),
1787                }],
1788                warnings: vec![],
1789                stats: stats.clone(),
1790                audit: vec![],
1791            }
1792        }
1793    }
1794}
1795
1796/// Filter out entities that are nested inside other entities.
1797///
1798/// When a class contains methods which contain local variables, sem-core may extract
1799/// all of them as entities. But for merge purposes, nested entities are part of their
1800/// parent — we handle them via inner entity merge. Keeping them causes false conflicts
1801/// (e.g. two methods both declaring `const user` would appear as BothAdded).
1802/// Check if entity list has too many duplicate names, which causes matching to hang.
1803fn has_excessive_duplicates(entities: &[SemanticEntity]) -> bool {
1804    let threshold = std::env::var("WEAVE_MAX_DUPLICATES")
1805        .ok()
1806        .and_then(|v| v.parse::<usize>().ok())
1807        .unwrap_or(10);
1808    let mut counts: HashMap<&str, usize> = HashMap::new();
1809    for e in entities {
1810        *counts.entry(&e.name).or_default() += 1;
1811    }
1812    counts.values().any(|&c| c >= threshold)
1813}
1814
1815/// Filter out entities that are nested inside other entities.
1816/// O(n log n) via sort + stack, replacing the previous O(n^2) approach.
1817fn filter_nested_entities(mut entities: Vec<SemanticEntity>) -> Vec<SemanticEntity> {
1818    if entities.len() <= 1 {
1819        return entities;
1820    }
1821
1822    // Sort by start_line ASC, then by end_line DESC (widest span first).
1823    // A parent entity always appears before its children in this order.
1824    entities.sort_by(|a, b| {
1825        a.start_line.cmp(&b.start_line).then(b.end_line.cmp(&a.end_line))
1826    });
1827
1828    // Stack-based filter: track the end_line of the current outermost entity.
1829    let mut result: Vec<SemanticEntity> = Vec::with_capacity(entities.len());
1830    let mut max_end: usize = 0;
1831
1832    for entity in entities {
1833        if entity.start_line > max_end || max_end == 0 {
1834            // Not nested: new top-level entity
1835            max_end = entity.end_line;
1836            result.push(entity);
1837        } else if entity.start_line == result.last().map_or(0, |e| e.start_line)
1838            && entity.end_line == result.last().map_or(0, |e| e.end_line)
1839        {
1840            // Exact same span (e.g. decorated_definition wrapping function_definition)
1841            result.push(entity);
1842        }
1843        // else: strictly nested, skip
1844    }
1845
1846    result
1847}
1848
1849/// Get child entities of a parent, sorted by start line.
1850fn get_child_entities<'a>(
1851    parent: &SemanticEntity,
1852    all_entities: &'a [SemanticEntity],
1853) -> Vec<&'a SemanticEntity> {
1854    let mut children: Vec<&SemanticEntity> = all_entities
1855        .iter()
1856        .filter(|e| e.parent_id.as_deref() == Some(&parent.id))
1857        .collect();
1858    children.sort_by_key(|e| e.start_line);
1859    children
1860}
1861
1862/// Compute a body hash for rename detection: the entity content with the entity
1863/// name replaced at word boundaries by a placeholder, so entities with identical
1864/// bodies but different names produce the same hash.
1865///
1866/// Uses word-boundary matching to avoid partial replacements (e.g. replacing
1867/// "get" inside "getAll"). Works across all languages since it operates on
1868/// the content string, not language-specific AST features.
1869fn body_hash(entity: &SemanticEntity) -> u64 {
1870    use std::collections::hash_map::DefaultHasher;
1871    use std::hash::{Hash, Hasher};
1872    let normalized = replace_at_word_boundaries(&entity.content, &entity.name, "__ENTITY__");
1873    let mut hasher = DefaultHasher::new();
1874    normalized.hash(&mut hasher);
1875    hasher.finish()
1876}
1877
1878/// Replace `needle` with `replacement` only at word boundaries.
1879/// A word boundary means the character before/after the match is not
1880/// alphanumeric or underscore (i.e. not an identifier character).
1881fn replace_at_word_boundaries(content: &str, needle: &str, replacement: &str) -> String {
1882    if needle.is_empty() {
1883        return content.to_string();
1884    }
1885    let bytes = content.as_bytes();
1886    let mut result = String::with_capacity(content.len());
1887    let mut i = 0;
1888    while i < content.len() {
1889        if content.is_char_boundary(i) && content[i..].starts_with(needle) {
1890            let before_ok = i == 0 || {
1891                let prev_idx = content[..i]
1892                    .char_indices()
1893                    .next_back()
1894                    .map(|(idx, _)| idx)
1895                    .unwrap_or(0);
1896                !is_ident_char(bytes[prev_idx])
1897            };
1898            let after_idx = i + needle.len();
1899            let after_ok = after_idx >= content.len()
1900                || (content.is_char_boundary(after_idx)
1901                    && !is_ident_char(bytes[after_idx]));
1902            if before_ok && after_ok {
1903                result.push_str(replacement);
1904                i += needle.len();
1905                continue;
1906            }
1907        }
1908        if content.is_char_boundary(i) {
1909            let ch = content[i..].chars().next().unwrap();
1910            result.push(ch);
1911            i += ch.len_utf8();
1912        } else {
1913            i += 1;
1914        }
1915    }
1916    result
1917}
1918
1919fn is_ident_char(b: u8) -> bool {
1920    b.is_ascii_alphanumeric() || b == b'_'
1921}
1922
1923/// Build a rename map from new_id → base_id using confidence-scored matching.
1924///
1925/// Detects when an entity in the branch has the same body as an entity
1926/// in base but a different name/ID, indicating it was renamed.
1927/// Uses body_hash (name-stripped content hash) and structural_hash with
1928/// confidence scoring to resolve ambiguous matches correctly.
1929fn build_rename_map(
1930    base_entities: &[SemanticEntity],
1931    branch_entities: &[SemanticEntity],
1932) -> HashMap<String, String> {
1933    let mut rename_map: HashMap<String, String> = HashMap::new();
1934
1935    let base_ids: HashSet<&str> = base_entities.iter().map(|e| e.id.as_str()).collect();
1936
1937    // Build body_hash → base entities (multiple can have same hash)
1938    let mut base_by_body: HashMap<u64, Vec<&SemanticEntity>> = HashMap::new();
1939    for entity in base_entities {
1940        base_by_body.entry(body_hash(entity)).or_default().push(entity);
1941    }
1942
1943    // Also keep structural_hash index as fallback
1944    let mut base_by_structural: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
1945    for entity in base_entities {
1946        if let Some(ref sh) = entity.structural_hash {
1947            base_by_structural.entry(sh.as_str()).or_default().push(entity);
1948        }
1949    }
1950
1951    // Collect all candidate (branch_entity, base_entity, confidence) triples
1952    struct RenameCandidate<'a> {
1953        branch: &'a SemanticEntity,
1954        base: &'a SemanticEntity,
1955        confidence: f64,
1956    }
1957    let mut candidates: Vec<RenameCandidate> = Vec::new();
1958
1959    for branch_entity in branch_entities {
1960        if base_ids.contains(branch_entity.id.as_str()) {
1961            continue;
1962        }
1963
1964        let bh = body_hash(branch_entity);
1965
1966        // Body hash matches
1967        if let Some(base_entities_for_hash) = base_by_body.get(&bh) {
1968            for &base_entity in base_entities_for_hash {
1969                let same_type = base_entity.entity_type == branch_entity.entity_type;
1970                let same_parent = base_entity.parent_id == branch_entity.parent_id;
1971                let confidence = match (same_type, same_parent) {
1972                    (true, true) => 0.95,
1973                    (true, false) => 0.8,
1974                    (false, _) => 0.6,
1975                };
1976                candidates.push(RenameCandidate { branch: branch_entity, base: base_entity, confidence });
1977            }
1978        }
1979
1980        // Structural hash fallback (lower confidence)
1981        if let Some(ref sh) = branch_entity.structural_hash {
1982            if let Some(base_entities_for_sh) = base_by_structural.get(sh.as_str()) {
1983                for &base_entity in base_entities_for_sh {
1984                    // Skip if already covered by body hash match
1985                    if candidates.iter().any(|c| c.branch.id == branch_entity.id && c.base.id == base_entity.id) {
1986                        continue;
1987                    }
1988                    candidates.push(RenameCandidate { branch: branch_entity, base: base_entity, confidence: 0.6 });
1989                }
1990            }
1991        }
1992    }
1993
1994    // Sort by confidence descending, assign greedily
1995    candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
1996
1997    let mut used_base_ids: HashSet<String> = HashSet::new();
1998    let mut used_branch_ids: HashSet<String> = HashSet::new();
1999
2000    for candidate in &candidates {
2001        if candidate.confidence < 0.6 {
2002            break;
2003        }
2004        if used_base_ids.contains(&candidate.base.id) || used_branch_ids.contains(&candidate.branch.id) {
2005            continue;
2006        }
2007        // Don't rename if the base entity's ID still exists in branch (it wasn't actually renamed)
2008        let base_id_in_branch = branch_entities.iter().any(|e| e.id == candidate.base.id);
2009        if base_id_in_branch {
2010            continue;
2011        }
2012        rename_map.insert(candidate.branch.id.clone(), candidate.base.id.clone());
2013        used_base_ids.insert(candidate.base.id.clone());
2014        used_branch_ids.insert(candidate.branch.id.clone());
2015    }
2016
2017    rename_map
2018}
2019
2020/// Check if an entity type is a container that may benefit from inner entity merge.
2021fn is_container_entity_type(entity_type: &str) -> bool {
2022    matches!(
2023        entity_type,
2024        "class" | "interface" | "enum" | "impl" | "trait" | "module" | "impl_item" | "trait_item"
2025            | "struct" | "union" | "namespace" | "struct_item" | "struct_specifier"
2026            | "variable" | "export"
2027    )
2028}
2029
2030/// A named member chunk extracted from a class/container body.
2031#[derive(Debug, Clone)]
2032struct MemberChunk {
2033    /// The member name (method name, field name, etc.)
2034    name: String,
2035    /// Full content of the member including its body
2036    content: String,
2037}
2038
2039/// Result of an inner entity merge attempt.
2040struct InnerMergeResult {
2041    /// Merged content (may contain per-member conflict markers)
2042    content: String,
2043    /// Whether any members had conflicts
2044    has_conflicts: bool,
2045}
2046
2047/// Convert sem-core child entities to MemberChunks for inner merge.
2048///
2049/// Uses child entity line positions to extract content from the container text,
2050/// including any leading decorators/annotations that tree-sitter attaches as
2051/// sibling nodes rather than part of the method node.
2052fn children_to_chunks(
2053    children: &[&SemanticEntity],
2054    container_content: &str,
2055    container_start_line: usize,
2056) -> Vec<MemberChunk> {
2057    if children.is_empty() {
2058        return Vec::new();
2059    }
2060
2061    let lines: Vec<&str> = container_content.lines().collect();
2062    let mut chunks = Vec::new();
2063
2064    for (i, child) in children.iter().enumerate() {
2065        let child_start_idx = child.start_line.saturating_sub(container_start_line);
2066        // +1 because end_line is inclusive but we need an exclusive upper bound for slicing
2067        let child_end_idx = child.end_line.saturating_sub(container_start_line) + 1;
2068
2069        if child_end_idx > lines.len() + 1 || child_start_idx >= lines.len() {
2070            // Position out of range, fall back to entity content
2071            chunks.push(MemberChunk {
2072                name: child.name.clone(),
2073                content: child.content.clone(),
2074            });
2075            continue;
2076        }
2077        let child_end_idx = child_end_idx.min(lines.len());
2078
2079        // Determine the earliest line we can claim (after previous child's end, or body start)
2080        let floor = if i > 0 {
2081            children[i - 1].end_line.saturating_sub(container_start_line) + 1
2082        } else {
2083            // First child: start after the container header line (the `{` or `:` line)
2084            // Find the line containing `{` or ending with `:`
2085            let header_end = lines
2086                .iter()
2087                .position(|l| l.contains('{') || l.trim().ends_with(':'))
2088                .map(|p| p + 1)
2089                .unwrap_or(0);
2090            header_end
2091        };
2092
2093        // Scan backwards from child_start_idx to include decorators/annotations/comments
2094        let mut content_start = child_start_idx;
2095        while content_start > floor {
2096            let prev = content_start - 1;
2097            let trimmed = lines[prev].trim();
2098            if trimmed.starts_with('@')
2099                || trimmed.starts_with("#[")
2100                || trimmed.starts_with("//")
2101                || trimmed.starts_with("///")
2102                || trimmed.starts_with("/**")
2103                || trimmed.starts_with("* ")
2104                || trimmed == "*/"
2105            {
2106                content_start = prev;
2107            } else if trimmed.is_empty() && content_start > floor + 1 {
2108                // Allow one blank line between decorator and method
2109                content_start = prev;
2110            } else {
2111                break;
2112            }
2113        }
2114
2115        // Skip leading blank lines
2116        while content_start < child_start_idx && lines[content_start].trim().is_empty() {
2117            content_start += 1;
2118        }
2119
2120        let chunk_content: String = lines[content_start..child_end_idx].join("\n");
2121        chunks.push(MemberChunk {
2122            name: child.name.clone(),
2123            content: chunk_content,
2124        });
2125    }
2126
2127    chunks
2128}
2129
2130/// Generate a scoped conflict marker for a single member within a container merge.
2131fn scoped_conflict_marker(
2132    name: &str,
2133    base: Option<&str>,
2134    ours: Option<&str>,
2135    theirs: Option<&str>,
2136    ours_deleted: bool,
2137    theirs_deleted: bool,
2138    fmt: &MarkerFormat,
2139) -> String {
2140    let open = "<".repeat(fmt.marker_length);
2141    let sep = "=".repeat(fmt.marker_length);
2142    let close = ">".repeat(fmt.marker_length);
2143
2144    let o = ours.unwrap_or("");
2145    let t = theirs.unwrap_or("");
2146
2147    // Narrow conflict markers to just the differing lines
2148    let ours_lines: Vec<&str> = o.lines().collect();
2149    let theirs_lines: Vec<&str> = t.lines().collect();
2150    let (prefix_len, suffix_len) = if ours.is_some() && theirs.is_some() {
2151        crate::conflict::narrow_conflict_lines(&ours_lines, &theirs_lines)
2152    } else {
2153        (0, 0)
2154    };
2155    let has_narrowing = prefix_len > 0 || suffix_len > 0;
2156    let ours_mid = &ours_lines[prefix_len..ours_lines.len() - suffix_len];
2157    let theirs_mid = &theirs_lines[prefix_len..theirs_lines.len() - suffix_len];
2158
2159    let mut out = String::new();
2160
2161    // Emit common prefix as clean text
2162    if has_narrowing {
2163        for line in &ours_lines[..prefix_len] {
2164            out.push_str(line);
2165            out.push('\n');
2166        }
2167    }
2168
2169    // Opening marker
2170    if fmt.enhanced {
2171        if ours_deleted {
2172            out.push_str(&format!("{} ours ({} deleted)\n", open, name));
2173        } else {
2174            out.push_str(&format!("{} ours ({})\n", open, name));
2175        }
2176    } else {
2177        out.push_str(&format!("{} ours\n", open));
2178    }
2179
2180    // Ours content (narrowed or full)
2181    if ours.is_some() {
2182        if has_narrowing {
2183            for line in ours_mid {
2184                out.push_str(line);
2185                out.push('\n');
2186            }
2187        } else {
2188            out.push_str(o);
2189            if !o.ends_with('\n') {
2190                out.push('\n');
2191            }
2192        }
2193    }
2194
2195    // Base section for diff3 format (standard mode only)
2196    if !fmt.enhanced {
2197        let base_marker = "|".repeat(fmt.marker_length);
2198        out.push_str(&format!("{} base\n", base_marker));
2199        let b = base.unwrap_or("");
2200        if has_narrowing {
2201            let base_lines: Vec<&str> = b.lines().collect();
2202            let base_prefix = prefix_len.min(base_lines.len());
2203            let base_suffix = suffix_len.min(base_lines.len().saturating_sub(base_prefix));
2204            for line in &base_lines[base_prefix..base_lines.len() - base_suffix] {
2205                out.push_str(line);
2206                out.push('\n');
2207            }
2208        } else {
2209            out.push_str(b);
2210            if !b.is_empty() && !b.ends_with('\n') {
2211                out.push('\n');
2212            }
2213        }
2214    }
2215
2216    // Separator
2217    out.push_str(&format!("{}\n", sep));
2218
2219    // Theirs content (narrowed or full)
2220    if theirs.is_some() {
2221        if has_narrowing {
2222            for line in theirs_mid {
2223                out.push_str(line);
2224                out.push('\n');
2225            }
2226        } else {
2227            out.push_str(t);
2228            if !t.ends_with('\n') {
2229                out.push('\n');
2230            }
2231        }
2232    }
2233
2234    // Closing marker
2235    if fmt.enhanced {
2236        if theirs_deleted {
2237            out.push_str(&format!("{} theirs ({} deleted)\n", close, name));
2238        } else {
2239            out.push_str(&format!("{} theirs ({})\n", close, name));
2240        }
2241    } else {
2242        out.push_str(&format!("{} theirs\n", close));
2243    }
2244
2245    // Emit common suffix as clean text
2246    if has_narrowing {
2247        for line in &ours_lines[ours_lines.len() - suffix_len..] {
2248            out.push_str(line);
2249            out.push('\n');
2250        }
2251    }
2252
2253    out
2254}
2255
2256/// Try recursive inner entity merge for container types (classes, impls, etc.).
2257///
2258/// Inspired by LastMerge (arXiv:2507.19687): class members are "unordered children" —
2259/// reordering them is not a conflict. We chunk the class body into members, match by
2260/// name, and merge each member independently.
2261///
2262/// Returns Some(result) if chunking succeeded, None if we can't parse the container.
2263/// The result may contain per-member conflict markers (scoped conflicts).
2264fn try_inner_entity_merge(
2265    base: &str,
2266    ours: &str,
2267    theirs: &str,
2268    base_children: &[&SemanticEntity],
2269    ours_children: &[&SemanticEntity],
2270    theirs_children: &[&SemanticEntity],
2271    base_start_line: usize,
2272    ours_start_line: usize,
2273    theirs_start_line: usize,
2274    marker_format: &MarkerFormat,
2275) -> Option<InnerMergeResult> {
2276    // Try sem-core child entities first (tree-sitter-accurate boundaries),
2277    // fall back to indentation heuristic if children aren't available.
2278    // When children_to_chunks produces chunks, try indentation as a fallback
2279    // if the tree-sitter chunks lead to conflicts (the indentation heuristic
2280    // can include trailing context that helps diffy merge adjacent changes).
2281    let use_children = !ours_children.is_empty() || !theirs_children.is_empty();
2282    let (base_chunks, ours_chunks, theirs_chunks) = if use_children {
2283        (
2284            children_to_chunks(base_children, base, base_start_line),
2285            children_to_chunks(ours_children, ours, ours_start_line),
2286            children_to_chunks(theirs_children, theirs, theirs_start_line),
2287        )
2288    } else {
2289        (
2290            extract_member_chunks(base)?,
2291            extract_member_chunks(ours)?,
2292            extract_member_chunks(theirs)?,
2293        )
2294    };
2295
2296    // Need at least 1 member to attempt inner merge
2297    // (Even single-member containers benefit from decorator-aware merge)
2298    if base_chunks.is_empty() && ours_chunks.is_empty() && theirs_chunks.is_empty() {
2299        return None;
2300    }
2301
2302    // Build name → content maps
2303    let base_map: HashMap<&str, &str> = base_chunks
2304        .iter()
2305        .map(|c| (c.name.as_str(), c.content.as_str()))
2306        .collect();
2307    let ours_map: HashMap<&str, &str> = ours_chunks
2308        .iter()
2309        .map(|c| (c.name.as_str(), c.content.as_str()))
2310        .collect();
2311    let theirs_map: HashMap<&str, &str> = theirs_chunks
2312        .iter()
2313        .map(|c| (c.name.as_str(), c.content.as_str()))
2314        .collect();
2315
2316    // Collect all member names
2317    let mut all_names: Vec<String> = Vec::new();
2318    let mut seen: HashSet<String> = HashSet::new();
2319    // Use ours ordering as skeleton
2320    for chunk in &ours_chunks {
2321        if seen.insert(chunk.name.clone()) {
2322            all_names.push(chunk.name.clone());
2323        }
2324    }
2325    // Add theirs-only members
2326    for chunk in &theirs_chunks {
2327        if seen.insert(chunk.name.clone()) {
2328            all_names.push(chunk.name.clone());
2329        }
2330    }
2331
2332    // Extract header/footer (class declaration line and closing brace)
2333    let (ours_header, ours_footer) = extract_container_wrapper(ours)?;
2334
2335    let mut merged_members: Vec<String> = Vec::new();
2336    let mut has_conflict = false;
2337
2338    for name in &all_names {
2339        let in_base = base_map.get(name.as_str());
2340        let in_ours = ours_map.get(name.as_str());
2341        let in_theirs = theirs_map.get(name.as_str());
2342
2343        match (in_base, in_ours, in_theirs) {
2344            // In all three
2345            (Some(b), Some(o), Some(t)) => {
2346                if o == t {
2347                    merged_members.push(o.to_string());
2348                } else if b == o {
2349                    merged_members.push(t.to_string());
2350                } else if b == t {
2351                    merged_members.push(o.to_string());
2352                } else {
2353                    // Both changed differently: try diffy, then git merge-file, then decorator merge
2354                    if let Some(merged) = diffy_merge(b, o, t) {
2355                        merged_members.push(merged);
2356                    } else if let Some(merged) = git_merge_string(b, o, t) {
2357                        merged_members.push(merged);
2358                    } else if let Some(merged) = try_decorator_aware_merge(b, o, t) {
2359                        merged_members.push(merged);
2360                    } else {
2361                        // Emit per-member conflict markers
2362                        has_conflict = true;
2363                        merged_members.push(scoped_conflict_marker(name, Some(b), Some(o), Some(t), false, false, marker_format));
2364                    }
2365                }
2366            }
2367            // Deleted by theirs, ours unchanged or not in base
2368            (Some(b), Some(o), None) => {
2369                if *b == *o {
2370                    // Ours unchanged, theirs deleted → accept deletion
2371                } else {
2372                    // Ours modified, theirs deleted → per-member conflict
2373                    has_conflict = true;
2374                    merged_members.push(scoped_conflict_marker(name, Some(b), Some(o), None, false, true, marker_format));
2375                }
2376            }
2377            // Deleted by ours, theirs unchanged or not in base
2378            (Some(b), None, Some(t)) => {
2379                if *b == *t {
2380                    // Theirs unchanged, ours deleted → accept deletion
2381                } else {
2382                    // Theirs modified, ours deleted → per-member conflict
2383                    has_conflict = true;
2384                    merged_members.push(scoped_conflict_marker(name, Some(b), None, Some(t), true, false, marker_format));
2385                }
2386            }
2387            // Added by ours only
2388            (None, Some(o), None) => {
2389                merged_members.push(o.to_string());
2390            }
2391            // Added by theirs only
2392            (None, None, Some(t)) => {
2393                merged_members.push(t.to_string());
2394            }
2395            // Added by both with different content
2396            (None, Some(o), Some(t)) => {
2397                if o == t {
2398                    merged_members.push(o.to_string());
2399                } else {
2400                    has_conflict = true;
2401                    merged_members.push(scoped_conflict_marker(name, None, Some(o), Some(t), false, false, marker_format));
2402                }
2403            }
2404            // Deleted by both
2405            (Some(_), None, None) => {}
2406            (None, None, None) => {}
2407        }
2408    }
2409
2410    // Reconstruct: header + merged members + footer
2411    let mut result = String::new();
2412    result.push_str(ours_header);
2413    if !ours_header.ends_with('\n') {
2414        result.push('\n');
2415    }
2416
2417    // Detect if members are single-line (fields, variants) vs multi-line (methods)
2418    let has_multiline_members = merged_members.iter().any(|m| m.contains('\n'));
2419    // Check if the original content had blank lines between members
2420    let original_has_blank_separators = {
2421        let body = ours_header.len()..ours.rfind(ours_footer).unwrap_or(ours.len());
2422        let body_content = &ours[body];
2423        body_content.contains("\n\n")
2424    };
2425
2426    for (i, member) in merged_members.iter().enumerate() {
2427        result.push_str(member);
2428        if !member.ends_with('\n') {
2429            result.push('\n');
2430        }
2431        // Add blank line between multi-line members only if the original had them
2432        if i < merged_members.len() - 1 && has_multiline_members && original_has_blank_separators && !member.ends_with("\n\n") {
2433            result.push('\n');
2434        }
2435    }
2436
2437    result.push_str(ours_footer);
2438    if !ours_footer.ends_with('\n') && ours.ends_with('\n') {
2439        result.push('\n');
2440    }
2441
2442    // If children_to_chunks led to conflicts, retry with indentation heuristic.
2443    // The indentation approach includes trailing blank lines in chunks, giving
2444    // diffy more context to merge adjacent changes from different branches.
2445    if has_conflict && use_children {
2446        if let (Some(bc), Some(oc), Some(tc)) = (
2447            extract_member_chunks(base),
2448            extract_member_chunks(ours),
2449            extract_member_chunks(theirs),
2450        ) {
2451            if !bc.is_empty() || !oc.is_empty() || !tc.is_empty() {
2452                let fallback = try_inner_merge_with_chunks(
2453                    &bc, &oc, &tc, ours, ours_header, ours_footer,
2454                    has_multiline_members, marker_format,
2455                );
2456                if let Some(fb) = fallback {
2457                    if !fb.has_conflicts {
2458                        return Some(fb);
2459                    }
2460                }
2461            }
2462        }
2463    }
2464
2465    Some(InnerMergeResult {
2466        content: result,
2467        has_conflicts: has_conflict,
2468    })
2469}
2470
2471/// Inner merge helper using pre-extracted chunks. Used for indentation-heuristic fallback.
2472fn try_inner_merge_with_chunks(
2473    base_chunks: &[MemberChunk],
2474    ours_chunks: &[MemberChunk],
2475    theirs_chunks: &[MemberChunk],
2476    ours: &str,
2477    ours_header: &str,
2478    ours_footer: &str,
2479    has_multiline_hint: bool,
2480    marker_format: &MarkerFormat,
2481) -> Option<InnerMergeResult> {
2482    let base_map: HashMap<&str, &str> = base_chunks.iter().map(|c| (c.name.as_str(), c.content.as_str())).collect();
2483    let ours_map: HashMap<&str, &str> = ours_chunks.iter().map(|c| (c.name.as_str(), c.content.as_str())).collect();
2484    let theirs_map: HashMap<&str, &str> = theirs_chunks.iter().map(|c| (c.name.as_str(), c.content.as_str())).collect();
2485
2486    let mut all_names: Vec<String> = Vec::new();
2487    let mut seen: HashSet<String> = HashSet::new();
2488    for chunk in ours_chunks {
2489        if seen.insert(chunk.name.clone()) {
2490            all_names.push(chunk.name.clone());
2491        }
2492    }
2493    for chunk in theirs_chunks {
2494        if seen.insert(chunk.name.clone()) {
2495            all_names.push(chunk.name.clone());
2496        }
2497    }
2498
2499    let mut merged_members: Vec<String> = Vec::new();
2500    let mut has_conflict = false;
2501
2502    for name in &all_names {
2503        let in_base = base_map.get(name.as_str());
2504        let in_ours = ours_map.get(name.as_str());
2505        let in_theirs = theirs_map.get(name.as_str());
2506
2507        match (in_base, in_ours, in_theirs) {
2508            (Some(b), Some(o), Some(t)) => {
2509                if o == t {
2510                    merged_members.push(o.to_string());
2511                } else if b == o {
2512                    merged_members.push(t.to_string());
2513                } else if b == t {
2514                    merged_members.push(o.to_string());
2515                } else if let Some(merged) = diffy_merge(b, o, t) {
2516                    merged_members.push(merged);
2517                } else if let Some(merged) = git_merge_string(b, o, t) {
2518                    merged_members.push(merged);
2519                } else {
2520                    has_conflict = true;
2521                    merged_members.push(scoped_conflict_marker(name, Some(b), Some(o), Some(t), false, false, marker_format));
2522                }
2523            }
2524            (Some(b), Some(o), None) => {
2525                if *b != *o { merged_members.push(o.to_string()); }
2526            }
2527            (Some(b), None, Some(t)) => {
2528                if *b != *t { merged_members.push(t.to_string()); }
2529            }
2530            (None, Some(o), None) => merged_members.push(o.to_string()),
2531            (None, None, Some(t)) => merged_members.push(t.to_string()),
2532            (None, Some(o), Some(t)) => {
2533                if o == t {
2534                    merged_members.push(o.to_string());
2535                } else {
2536                    has_conflict = true;
2537                    merged_members.push(scoped_conflict_marker(name, None, Some(o), Some(t), false, false, marker_format));
2538                }
2539            }
2540            (Some(_), None, None) | (None, None, None) => {}
2541        }
2542    }
2543
2544    let has_multiline_members = has_multiline_hint || merged_members.iter().any(|m| m.contains('\n'));
2545    let mut result = String::new();
2546    result.push_str(ours_header);
2547    if !ours_header.ends_with('\n') { result.push('\n'); }
2548    for (i, member) in merged_members.iter().enumerate() {
2549        result.push_str(member);
2550        if !member.ends_with('\n') { result.push('\n'); }
2551        if i < merged_members.len() - 1 && has_multiline_members && !member.ends_with("\n\n") {
2552            result.push('\n');
2553        }
2554    }
2555    result.push_str(ours_footer);
2556    if !ours_footer.ends_with('\n') && ours.ends_with('\n') { result.push('\n'); }
2557
2558    Some(InnerMergeResult {
2559        content: result,
2560        has_conflicts: has_conflict,
2561    })
2562}
2563
2564/// Extract the header (class declaration) and footer (closing brace) from a container.
2565/// Supports both brace-delimited (JS/TS/Java/Rust/C) and indentation-based (Python) containers.
2566fn extract_container_wrapper(content: &str) -> Option<(&str, &str)> {
2567    let lines: Vec<&str> = content.lines().collect();
2568    if lines.len() < 2 {
2569        return None;
2570    }
2571
2572    // Check if this is a Python-style container (ends with `:` instead of `{`)
2573    let is_python_style = lines.iter().any(|l| {
2574        let trimmed = l.trim();
2575        (trimmed.starts_with("class ") || trimmed.starts_with("def "))
2576            && trimmed.ends_with(':')
2577    }) && !lines.iter().any(|l| l.contains('{'));
2578
2579    if is_python_style {
2580        // Python: header is the `class Foo:` line, no footer
2581        let header_end = lines.iter().position(|l| l.trim().ends_with(':'))?;
2582        let header_byte_end: usize = lines[..=header_end]
2583            .iter()
2584            .map(|l| l.len() + 1)
2585            .sum();
2586        let header = &content[..header_byte_end.min(content.len())];
2587        // No closing brace in Python — footer is empty
2588        let footer = &content[content.len()..];
2589        Some((header, footer))
2590    } else {
2591        // Brace-delimited: header up to `{`, footer from last `}`
2592        let header_end = lines.iter().position(|l| l.contains('{'))?;
2593        let header_byte_end = lines[..=header_end]
2594            .iter()
2595            .map(|l| l.len() + 1)
2596            .sum::<usize>();
2597        let header = &content[..header_byte_end.min(content.len())];
2598
2599        let footer_start = lines.iter().rposition(|l| {
2600            let trimmed = l.trim();
2601            trimmed == "}" || trimmed == "};"
2602        })?;
2603
2604        let footer_byte_start: usize = lines[..footer_start]
2605            .iter()
2606            .map(|l| l.len() + 1)
2607            .sum();
2608        let footer = &content[footer_byte_start.min(content.len())..];
2609
2610        Some((header, footer))
2611    }
2612}
2613
2614/// Extract named member chunks from a container body.
2615///
2616/// Identifies member boundaries by indentation: members start at the first
2617/// indentation level inside the container. Each member extends until the next
2618/// member starts or the container closes.
2619fn extract_member_chunks(content: &str) -> Option<Vec<MemberChunk>> {
2620    let lines: Vec<&str> = content.lines().collect();
2621    if lines.len() < 2 {
2622        return None;
2623    }
2624
2625    // Check if Python-style (indentation-based)
2626    let is_python_style = lines.iter().any(|l| {
2627        let trimmed = l.trim();
2628        (trimmed.starts_with("class ") || trimmed.starts_with("def "))
2629            && trimmed.ends_with(':')
2630    }) && !lines.iter().any(|l| l.contains('{'));
2631
2632    // Find the body range
2633    let body_start = if is_python_style {
2634        lines.iter().position(|l| l.trim().ends_with(':'))? + 1
2635    } else {
2636        lines.iter().position(|l| l.contains('{'))? + 1
2637    };
2638    let body_end = if is_python_style {
2639        // Python: body extends to end of content
2640        lines.len()
2641    } else {
2642        lines.iter().rposition(|l| {
2643            let trimmed = l.trim();
2644            trimmed == "}" || trimmed == "};"
2645        })?
2646    };
2647
2648    if body_start >= body_end {
2649        return None;
2650    }
2651
2652    // Determine member indentation level by looking at first non-empty body line
2653    let member_indent = lines[body_start..body_end]
2654        .iter()
2655        .find(|l| !l.trim().is_empty())
2656        .map(|l| l.len() - l.trim_start().len())?;
2657
2658    let mut chunks: Vec<MemberChunk> = Vec::new();
2659    let mut current_chunk_lines: Vec<&str> = Vec::new();
2660    let mut current_name: Option<String> = None;
2661
2662    for line in &lines[body_start..body_end] {
2663        let trimmed = line.trim();
2664        if trimmed.is_empty() {
2665            // Blank lines: if we have a current chunk, include them
2666            if current_name.is_some() {
2667                // Only include if not trailing blanks
2668                current_chunk_lines.push(line);
2669            }
2670            continue;
2671        }
2672
2673        let indent = line.len() - line.trim_start().len();
2674
2675        // Is this a new member declaration at the member indent level?
2676        // Exclude closing braces, comments, and decorators/annotations
2677        if indent == member_indent
2678            && !trimmed.starts_with("//")
2679            && !trimmed.starts_with("/*")
2680            && !trimmed.starts_with("*")
2681            && !trimmed.starts_with("#")
2682            && !trimmed.starts_with("@")
2683            && !trimmed.starts_with("}")
2684            && trimmed != ","
2685        {
2686            // Save previous chunk
2687            if let Some(name) = current_name.take() {
2688                // Trim trailing blank lines
2689                while current_chunk_lines.last().map_or(false, |l| l.trim().is_empty()) {
2690                    current_chunk_lines.pop();
2691                }
2692                if !current_chunk_lines.is_empty() {
2693                    chunks.push(MemberChunk {
2694                        name,
2695                        content: current_chunk_lines.join("\n"),
2696                    });
2697                }
2698                current_chunk_lines.clear();
2699            }
2700
2701            // Start new chunk — extract member name
2702            let name = extract_member_name(trimmed);
2703            current_name = Some(name);
2704            current_chunk_lines.push(line);
2705        } else if current_name.is_some() {
2706            // Continuation of current member (body lines, nested blocks)
2707            current_chunk_lines.push(line);
2708        } else {
2709            // Content before first member (decorators, comments for first member)
2710            // Attach to next member
2711            current_chunk_lines.push(line);
2712        }
2713    }
2714
2715    // Save last chunk
2716    if let Some(name) = current_name {
2717        while current_chunk_lines.last().map_or(false, |l| l.trim().is_empty()) {
2718            current_chunk_lines.pop();
2719        }
2720        if !current_chunk_lines.is_empty() {
2721            chunks.push(MemberChunk {
2722                name,
2723                content: current_chunk_lines.join("\n"),
2724            });
2725        }
2726    }
2727
2728    // Post-process: if any chunk has a brace-only name (anonymous struct literal
2729    // entries like Go's `{ Name: "x", ... }`), derive a name from the first
2730    // key-value field inside the chunk to avoid HashMap collisions.
2731    for chunk in &mut chunks {
2732        if chunk.name == "{" || chunk.name == "{}" {
2733            if let Some(better) = derive_name_from_struct_literal(&chunk.content) {
2734                chunk.name = better;
2735            }
2736        }
2737    }
2738
2739    if chunks.is_empty() {
2740        None
2741    } else {
2742        Some(chunks)
2743    }
2744}
2745
2746/// Extract a member name from a declaration line.
2747fn extract_member_name(line: &str) -> String {
2748    let trimmed = line.trim();
2749
2750    // Go method receiver: `func (c *Calculator) Add(` -> skip receiver, find name before second `(`
2751    if trimmed.starts_with("func ") && trimmed.get(5..6) == Some("(") {
2752        // Skip past the receiver: find closing `)`, then extract name before next `(`
2753        if let Some(recv_close) = trimmed.find(')') {
2754            let after_recv = &trimmed[recv_close + 1..];
2755            if let Some(paren_pos) = after_recv.find('(') {
2756                let before = after_recv[..paren_pos].trim();
2757                let name: String = before
2758                    .chars()
2759                    .rev()
2760                    .take_while(|c| c.is_alphanumeric() || *c == '_')
2761                    .collect::<Vec<_>>()
2762                    .into_iter()
2763                    .rev()
2764                    .collect();
2765                if !name.is_empty() {
2766                    return name;
2767                }
2768            }
2769        }
2770    }
2771
2772    // Strategy 1: For method/function declarations with parentheses,
2773    // the name is the identifier immediately before `(`.
2774    // This handles all languages: Java `public int add(`, Rust `pub fn add(`,
2775    // Python `def add(`, TS `async getUser(`, Go `func add(`, etc.
2776    if let Some(paren_pos) = trimmed.find('(') {
2777        let before = trimmed[..paren_pos].trim_end();
2778        let name: String = before
2779            .chars()
2780            .rev()
2781            .take_while(|c| c.is_alphanumeric() || *c == '_')
2782            .collect::<Vec<_>>()
2783            .into_iter()
2784            .rev()
2785            .collect();
2786        if !name.is_empty() {
2787            return name;
2788        }
2789    }
2790
2791    // Strategy 2: For fields/properties/variants without parens,
2792    // strip keywords and take the first identifier.
2793    let mut s = trimmed;
2794    for keyword in &[
2795        "export ", "public ", "private ", "protected ", "static ",
2796        "abstract ", "async ", "override ", "readonly ",
2797        "pub ", "pub(crate) ", "fn ", "def ", "get ", "set ",
2798    ] {
2799        if s.starts_with(keyword) {
2800            s = &s[keyword.len()..];
2801        }
2802    }
2803    if s.starts_with("fn ") {
2804        s = &s[3..];
2805    }
2806
2807    let name: String = s
2808        .chars()
2809        .take_while(|c| c.is_alphanumeric() || *c == '_')
2810        .collect();
2811
2812    if name.is_empty() {
2813        trimmed.chars().take(20).collect()
2814    } else {
2815        name
2816    }
2817}
2818
2819/// For anonymous struct literal entries (e.g., Go slice entries starting with `{`),
2820/// derive a name from the first key-value field inside the chunk.
2821/// E.g., `{ Name: "panelTitleSearch", ... }` → `panelTitleSearch`
2822fn derive_name_from_struct_literal(content: &str) -> Option<String> {
2823    for line in content.lines().skip(1) {
2824        let trimmed = line.trim().trim_end_matches(',');
2825        // Look for `Key: "value"` or `Key: value` pattern
2826        if let Some(colon_pos) = trimmed.find(':') {
2827            let value = trimmed[colon_pos + 1..].trim();
2828            // Strip quotes from string values
2829            let value = value.trim_matches('"').trim_matches('\'');
2830            if !value.is_empty() {
2831                return Some(value.to_string());
2832            }
2833        }
2834    }
2835    None
2836}
2837
2838/// Returns true for data/config file formats where Sesame separator expansion
2839/// (`{`, `}`, `;`) is counterproductive because those chars are structural
2840/// content rather than code block separators.
2841///
2842/// Note: template files like .svelte/.vue are NOT included here because their
2843/// embedded `<script>` sections contain real code where Sesame helps.
2844/// Check if content looks binary (contains null bytes in first 8KB).
2845fn is_binary(content: &str) -> bool {
2846    content.as_bytes().iter().take(8192).any(|&b| b == 0)
2847}
2848
2849/// Check if content already contains git conflict markers.
2850/// This happens with AU/AA conflicts where git stores markers in stage blobs.
2851fn has_conflict_markers(content: &str) -> bool {
2852    content.contains("<<<<<<<") && content.contains(">>>>>>>")
2853}
2854
2855fn skip_sesame(file_path: &str) -> bool {
2856    let path_lower = file_path.to_lowercase();
2857    let extensions = [
2858        // Data/config formats
2859        ".json", ".yaml", ".yml", ".toml", ".lock", ".xml", ".csv", ".tsv",
2860        ".ini", ".cfg", ".conf", ".properties", ".env",
2861        // Markup/document formats
2862        ".md", ".markdown", ".txt", ".rst", ".svg", ".html", ".htm",
2863    ];
2864    extensions.iter().any(|ext| path_lower.ends_with(ext))
2865}
2866
2867/// Expand syntactic separators into separate lines for finer merge alignment.
2868/// Inspired by Sesame (arXiv:2407.18888): isolating separators lets line-based
2869/// merge tools see block boundaries as independent change units.
2870/// Uses byte-level iteration since separators ({, }, ;) and string delimiters
2871/// (", ', `) are all ASCII.
2872fn expand_separators(content: &str) -> String {
2873    let bytes = content.as_bytes();
2874    let mut result = Vec::with_capacity(content.len() * 2);
2875    let mut in_string = false;
2876    let mut escape_next = false;
2877    let mut string_char = b'"';
2878
2879    for &b in bytes {
2880        if escape_next {
2881            result.push(b);
2882            escape_next = false;
2883            continue;
2884        }
2885        if b == b'\\' && in_string {
2886            result.push(b);
2887            escape_next = true;
2888            continue;
2889        }
2890        if !in_string && (b == b'"' || b == b'\'' || b == b'`') {
2891            in_string = true;
2892            string_char = b;
2893            result.push(b);
2894            continue;
2895        }
2896        if in_string && b == string_char {
2897            in_string = false;
2898            result.push(b);
2899            continue;
2900        }
2901
2902        if !in_string && (b == b'{' || b == b'}' || b == b';') {
2903            if result.last() != Some(&b'\n') && !result.is_empty() {
2904                result.push(b'\n');
2905            }
2906            result.push(b);
2907            result.push(b'\n');
2908        } else {
2909            result.push(b);
2910        }
2911    }
2912
2913    // Safe: we only inserted ASCII bytes into valid UTF-8 content
2914    unsafe { String::from_utf8_unchecked(result) }
2915}
2916
2917/// Collapse separator expansion back to original formatting.
2918/// Uses the base formatting as a guide where possible.
2919fn collapse_separators(merged: &str, _base: &str) -> String {
2920    // Simple approach: join lines that contain only a separator with adjacent lines
2921    let lines: Vec<&str> = merged.lines().collect();
2922    let mut result = String::new();
2923    let mut i = 0;
2924
2925    while i < lines.len() {
2926        let trimmed = lines[i].trim();
2927        if (trimmed == "{" || trimmed == "}" || trimmed == ";") && trimmed.len() == 1 {
2928            // This is a separator-only line we may have created
2929            // Try to join with previous line if it doesn't end with a separator
2930            if !result.is_empty() && !result.ends_with('\n') {
2931                // Peek: if it's an opening brace, join with previous
2932                if trimmed == "{" {
2933                    result.push(' ');
2934                    result.push_str(trimmed);
2935                    result.push('\n');
2936                } else if trimmed == "}" {
2937                    result.push('\n');
2938                    result.push_str(trimmed);
2939                    result.push('\n');
2940                } else {
2941                    result.push_str(trimmed);
2942                    result.push('\n');
2943                }
2944            } else {
2945                result.push_str(lines[i]);
2946                result.push('\n');
2947            }
2948        } else {
2949            result.push_str(lines[i]);
2950            result.push('\n');
2951        }
2952        i += 1;
2953    }
2954
2955    // Trim any trailing extra newlines to match original style
2956    while result.ends_with("\n\n") {
2957        result.pop();
2958    }
2959
2960    result
2961}
2962
2963#[cfg(test)]
2964mod tests {
2965    use super::*;
2966
2967    #[test]
2968    fn test_replace_at_word_boundaries() {
2969        // Should replace standalone occurrences
2970        assert_eq!(replace_at_word_boundaries("fn get() {}", "get", "__E__"), "fn __E__() {}");
2971        // Should NOT replace inside longer identifiers
2972        assert_eq!(replace_at_word_boundaries("fn getAll() {}", "get", "__E__"), "fn getAll() {}");
2973        assert_eq!(replace_at_word_boundaries("fn _get() {}", "get", "__E__"), "fn _get() {}");
2974        // Should replace multiple standalone occurrences
2975        assert_eq!(
2976            replace_at_word_boundaries("pub enum Source { Source }", "Source", "__E__"),
2977            "pub enum __E__ { __E__ }"
2978        );
2979        // Should not replace substring at start/end of identifiers
2980        assert_eq!(
2981            replace_at_word_boundaries("SourceManager isSource", "Source", "__E__"),
2982            "SourceManager isSource"
2983        );
2984        // Should handle multi-byte UTF-8 characters (emojis) without panicking
2985        assert_eq!(
2986            replace_at_word_boundaries("❌ get ✅", "get", "__E__"),
2987            "❌ __E__ ✅"
2988        );
2989        assert_eq!(
2990            replace_at_word_boundaries("fn 名前() { get }", "get", "__E__"),
2991            "fn 名前() { __E__ }"
2992        );
2993        // Emoji-only content with no needle match should pass through unchanged
2994        assert_eq!(
2995            replace_at_word_boundaries("🎉🚀✨", "get", "__E__"),
2996            "🎉🚀✨"
2997        );
2998    }
2999
3000    #[test]
3001    fn test_fast_path_identical() {
3002        let content = "hello world";
3003        let result = entity_merge(content, content, content, "test.ts");
3004        assert!(result.is_clean());
3005        assert_eq!(result.content, content);
3006    }
3007
3008    #[test]
3009    fn test_fast_path_only_ours_changed() {
3010        let base = "hello";
3011        let ours = "hello world";
3012        let result = entity_merge(base, ours, base, "test.ts");
3013        assert!(result.is_clean());
3014        assert_eq!(result.content, ours);
3015    }
3016
3017    #[test]
3018    fn test_fast_path_only_theirs_changed() {
3019        let base = "hello";
3020        let theirs = "hello world";
3021        let result = entity_merge(base, base, theirs, "test.ts");
3022        assert!(result.is_clean());
3023        assert_eq!(result.content, theirs);
3024    }
3025
3026    #[test]
3027    fn test_different_functions_no_conflict() {
3028        // Core value prop: two agents add different functions to the same file
3029        let base = r#"export function existing() {
3030    return 1;
3031}
3032"#;
3033        let ours = r#"export function existing() {
3034    return 1;
3035}
3036
3037export function agentA() {
3038    return "added by agent A";
3039}
3040"#;
3041        let theirs = r#"export function existing() {
3042    return 1;
3043}
3044
3045export function agentB() {
3046    return "added by agent B";
3047}
3048"#;
3049        let result = entity_merge(base, ours, theirs, "test.ts");
3050        assert!(
3051            result.is_clean(),
3052            "Should auto-resolve: different functions added. Conflicts: {:?}",
3053            result.conflicts
3054        );
3055        assert!(
3056            result.content.contains("agentA"),
3057            "Should contain agentA function"
3058        );
3059        assert!(
3060            result.content.contains("agentB"),
3061            "Should contain agentB function"
3062        );
3063    }
3064
3065    #[test]
3066    fn test_same_function_modified_by_both_conflict() {
3067        let base = r#"export function shared() {
3068    return "original";
3069}
3070"#;
3071        let ours = r#"export function shared() {
3072    return "modified by ours";
3073}
3074"#;
3075        let theirs = r#"export function shared() {
3076    return "modified by theirs";
3077}
3078"#;
3079        let result = entity_merge(base, ours, theirs, "test.ts");
3080        // This should be a conflict since both modified the same function incompatibly
3081        assert!(
3082            !result.is_clean(),
3083            "Should conflict when both modify same function differently"
3084        );
3085        assert_eq!(result.conflicts.len(), 1);
3086        assert_eq!(result.conflicts[0].entity_name, "shared");
3087    }
3088
3089    #[test]
3090    fn test_fallback_for_unknown_filetype() {
3091        // Non-adjacent changes should merge cleanly with line-level merge
3092        let base = "line 1\nline 2\nline 3\nline 4\nline 5\n";
3093        let ours = "line 1 modified\nline 2\nline 3\nline 4\nline 5\n";
3094        let theirs = "line 1\nline 2\nline 3\nline 4\nline 5 modified\n";
3095        let result = entity_merge(base, ours, theirs, "test.xyz");
3096        assert!(
3097            result.is_clean(),
3098            "Non-adjacent changes should merge cleanly. Conflicts: {:?}",
3099            result.conflicts,
3100        );
3101    }
3102
3103    #[test]
3104    fn test_line_level_fallback() {
3105        // Non-adjacent changes merge cleanly in 3-way merge
3106        let base = "a\nb\nc\nd\ne\n";
3107        let ours = "A\nb\nc\nd\ne\n";
3108        let theirs = "a\nb\nc\nd\nE\n";
3109        let result = line_level_fallback(base, ours, theirs, "test.rs");
3110        assert!(result.is_clean());
3111        assert!(result.stats.used_fallback);
3112        assert_eq!(result.content, "A\nb\nc\nd\nE\n");
3113    }
3114
3115    #[test]
3116    fn test_line_level_fallback_conflict() {
3117        // Same line changed differently → conflict
3118        let base = "a\nb\nc\n";
3119        let ours = "X\nb\nc\n";
3120        let theirs = "Y\nb\nc\n";
3121        let result = line_level_fallback(base, ours, theirs, "test.rs");
3122        assert!(!result.is_clean());
3123        assert!(result.stats.used_fallback);
3124    }
3125
3126    #[test]
3127    fn test_expand_separators() {
3128        let code = "function foo() { return 1; }";
3129        let expanded = expand_separators(code);
3130        // Separators should be on their own lines
3131        assert!(expanded.contains("{\n"), "Opening brace should have newline after");
3132        assert!(expanded.contains(";\n"), "Semicolons should have newline after");
3133        assert!(expanded.contains("\n}"), "Closing brace should have newline before");
3134    }
3135
3136    #[test]
3137    fn test_expand_separators_preserves_strings() {
3138        let code = r#"let x = "hello { world };";"#;
3139        let expanded = expand_separators(code);
3140        // Separators inside strings should NOT be expanded
3141        assert!(
3142            expanded.contains("\"hello { world };\""),
3143            "Separators in strings should be preserved: {}",
3144            expanded
3145        );
3146    }
3147
3148    #[test]
3149    fn test_is_import_region() {
3150        assert!(is_import_region("import foo from 'foo';\nimport bar from 'bar';\n"));
3151        assert!(is_import_region("use std::io;\nuse std::fs;\n"));
3152        assert!(!is_import_region("let x = 1;\nlet y = 2;\n"));
3153        // Mixed: 1 import + 2 non-imports → not import region
3154        assert!(!is_import_region("import foo from 'foo';\nlet x = 1;\nlet y = 2;\n"));
3155        // Empty → not import region
3156        assert!(!is_import_region(""));
3157    }
3158
3159    #[test]
3160    fn test_is_import_line() {
3161        // JS/TS
3162        assert!(is_import_line("import foo from 'foo';"));
3163        assert!(is_import_line("import { bar } from 'bar';"));
3164        assert!(is_import_line("from typing import List"));
3165        // Rust
3166        assert!(is_import_line("use std::io::Read;"));
3167        // C/C++
3168        assert!(is_import_line("#include <stdio.h>"));
3169        // Node require
3170        assert!(is_import_line("const fs = require('fs');"));
3171        // Not imports
3172        assert!(!is_import_line("let x = 1;"));
3173        assert!(!is_import_line("function foo() {}"));
3174    }
3175
3176    #[test]
3177    fn test_commutative_import_merge_both_add_different() {
3178        // The key scenario: both branches add different imports
3179        let base = "import a from 'a';\nimport b from 'b';\n";
3180        let ours = "import a from 'a';\nimport b from 'b';\nimport c from 'c';\n";
3181        let theirs = "import a from 'a';\nimport b from 'b';\nimport d from 'd';\n";
3182        let result = merge_imports_commutatively(base, ours, theirs);
3183        assert!(result.contains("import a from 'a';"));
3184        assert!(result.contains("import b from 'b';"));
3185        assert!(result.contains("import c from 'c';"));
3186        assert!(result.contains("import d from 'd';"));
3187    }
3188
3189    #[test]
3190    fn test_commutative_import_merge_one_removes() {
3191        // Ours removes an import, theirs keeps it → removed
3192        let base = "import a from 'a';\nimport b from 'b';\nimport c from 'c';\n";
3193        let ours = "import a from 'a';\nimport c from 'c';\n";
3194        let theirs = "import a from 'a';\nimport b from 'b';\nimport c from 'c';\n";
3195        let result = merge_imports_commutatively(base, ours, theirs);
3196        assert!(result.contains("import a from 'a';"));
3197        assert!(!result.contains("import b from 'b';"), "Removed import should stay removed");
3198        assert!(result.contains("import c from 'c';"));
3199    }
3200
3201    #[test]
3202    fn test_commutative_import_merge_both_add_same() {
3203        // Both add the same import → should appear only once
3204        let base = "import a from 'a';\n";
3205        let ours = "import a from 'a';\nimport b from 'b';\n";
3206        let theirs = "import a from 'a';\nimport b from 'b';\n";
3207        let result = merge_imports_commutatively(base, ours, theirs);
3208        let count = result.matches("import b from 'b';").count();
3209        assert_eq!(count, 1, "Duplicate import should be deduplicated");
3210    }
3211
3212    #[test]
3213    fn test_inner_entity_merge_different_methods() {
3214        // Two agents modify different methods in the same class
3215        // This would normally conflict with diffy because the changes are adjacent
3216        let base = r#"export class Calculator {
3217    add(a: number, b: number): number {
3218        return a + b;
3219    }
3220
3221    subtract(a: number, b: number): number {
3222        return a - b;
3223    }
3224}
3225"#;
3226        let ours = r#"export class Calculator {
3227    add(a: number, b: number): number {
3228        // Added logging
3229        console.log("adding", a, b);
3230        return a + b;
3231    }
3232
3233    subtract(a: number, b: number): number {
3234        return a - b;
3235    }
3236}
3237"#;
3238        let theirs = r#"export class Calculator {
3239    add(a: number, b: number): number {
3240        return a + b;
3241    }
3242
3243    subtract(a: number, b: number): number {
3244        // Added validation
3245        if (b > a) throw new Error("negative");
3246        return a - b;
3247    }
3248}
3249"#;
3250        let result = entity_merge(base, ours, theirs, "test.ts");
3251        assert!(
3252            result.is_clean(),
3253            "Different methods modified should auto-merge via inner entity merge. Conflicts: {:?}",
3254            result.conflicts,
3255        );
3256        assert!(result.content.contains("console.log"), "Should contain ours changes");
3257        assert!(result.content.contains("negative"), "Should contain theirs changes");
3258    }
3259
3260    #[test]
3261    fn test_inner_entity_merge_both_add_different_methods() {
3262        // Both branches add different methods to the same class
3263        let base = r#"export class Calculator {
3264    add(a: number, b: number): number {
3265        return a + b;
3266    }
3267}
3268"#;
3269        let ours = r#"export class Calculator {
3270    add(a: number, b: number): number {
3271        return a + b;
3272    }
3273
3274    multiply(a: number, b: number): number {
3275        return a * b;
3276    }
3277}
3278"#;
3279        let theirs = r#"export class Calculator {
3280    add(a: number, b: number): number {
3281        return a + b;
3282    }
3283
3284    divide(a: number, b: number): number {
3285        return a / b;
3286    }
3287}
3288"#;
3289        let result = entity_merge(base, ours, theirs, "test.ts");
3290        assert!(
3291            result.is_clean(),
3292            "Both adding different methods should auto-merge. Conflicts: {:?}",
3293            result.conflicts,
3294        );
3295        assert!(result.content.contains("multiply"), "Should contain ours's new method");
3296        assert!(result.content.contains("divide"), "Should contain theirs's new method");
3297    }
3298
3299    #[test]
3300    fn test_inner_entity_merge_same_method_modified_still_conflicts() {
3301        // Both modify the same method differently → should still conflict
3302        let base = r#"export class Calculator {
3303    add(a: number, b: number): number {
3304        return a + b;
3305    }
3306
3307    subtract(a: number, b: number): number {
3308        return a - b;
3309    }
3310}
3311"#;
3312        let ours = r#"export class Calculator {
3313    add(a: number, b: number): number {
3314        return a + b + 1;
3315    }
3316
3317    subtract(a: number, b: number): number {
3318        return a - b;
3319    }
3320}
3321"#;
3322        let theirs = r#"export class Calculator {
3323    add(a: number, b: number): number {
3324        return a + b + 2;
3325    }
3326
3327    subtract(a: number, b: number): number {
3328        return a - b;
3329    }
3330}
3331"#;
3332        let result = entity_merge(base, ours, theirs, "test.ts");
3333        assert!(
3334            !result.is_clean(),
3335            "Both modifying same method differently should still conflict"
3336        );
3337    }
3338
3339    #[test]
3340    fn test_extract_member_chunks() {
3341        let class_body = r#"export class Foo {
3342    bar() {
3343        return 1;
3344    }
3345
3346    baz() {
3347        return 2;
3348    }
3349}
3350"#;
3351        let chunks = extract_member_chunks(class_body).unwrap();
3352        assert_eq!(chunks.len(), 2, "Should find 2 members, found {:?}", chunks.iter().map(|c| &c.name).collect::<Vec<_>>());
3353        assert_eq!(chunks[0].name, "bar");
3354        assert_eq!(chunks[1].name, "baz");
3355    }
3356
3357    #[test]
3358    fn test_extract_member_name() {
3359        assert_eq!(extract_member_name("add(a, b) {"), "add");
3360        assert_eq!(extract_member_name("fn add(&self, a: i32) -> i32 {"), "add");
3361        assert_eq!(extract_member_name("def add(self, a, b):"), "add");
3362        assert_eq!(extract_member_name("public static getValue(): number {"), "getValue");
3363        assert_eq!(extract_member_name("async fetchData() {"), "fetchData");
3364    }
3365
3366    #[test]
3367    fn test_commutative_import_merge_rust_use() {
3368        let base = "use std::io;\nuse std::fs;\n";
3369        let ours = "use std::io;\nuse std::fs;\nuse std::path::Path;\n";
3370        let theirs = "use std::io;\nuse std::fs;\nuse std::collections::HashMap;\n";
3371        let result = merge_imports_commutatively(base, ours, theirs);
3372        assert!(result.contains("use std::path::Path;"));
3373        assert!(result.contains("use std::collections::HashMap;"));
3374        assert!(result.contains("use std::io;"));
3375        assert!(result.contains("use std::fs;"));
3376    }
3377
3378    #[test]
3379    fn test_is_whitespace_only_diff_true() {
3380        // Same content, different indentation
3381        assert!(is_whitespace_only_diff(
3382            "    return 1;\n    return 2;\n",
3383            "      return 1;\n      return 2;\n"
3384        ));
3385        // Same content, extra blank lines
3386        assert!(is_whitespace_only_diff(
3387            "return 1;\nreturn 2;\n",
3388            "return 1;\n\nreturn 2;\n"
3389        ));
3390    }
3391
3392    #[test]
3393    fn test_is_whitespace_only_diff_false() {
3394        // Different content
3395        assert!(!is_whitespace_only_diff(
3396            "    return 1;\n",
3397            "    return 2;\n"
3398        ));
3399        // Added code
3400        assert!(!is_whitespace_only_diff(
3401            "return 1;\n",
3402            "return 1;\nconsole.log('x');\n"
3403        ));
3404    }
3405
3406    #[test]
3407    fn test_ts_interface_both_add_different_fields() {
3408        let base = "interface Config {\n    name: string;\n}\n";
3409        let ours = "interface Config {\n    name: string;\n    age: number;\n}\n";
3410        let theirs = "interface Config {\n    name: string;\n    email: string;\n}\n";
3411        let result = entity_merge(base, ours, theirs, "test.ts");
3412        eprintln!("TS interface: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3413        eprintln!("Content: {:?}", result.content);
3414        assert!(
3415            result.is_clean(),
3416            "Both adding different fields to TS interface should merge. Conflicts: {:?}",
3417            result.conflicts,
3418        );
3419        assert!(result.content.contains("age"));
3420        assert!(result.content.contains("email"));
3421    }
3422
3423    #[test]
3424    fn test_rust_enum_both_add_different_variants() {
3425        let base = "enum Color {\n    Red,\n    Blue,\n}\n";
3426        let ours = "enum Color {\n    Red,\n    Blue,\n    Green,\n}\n";
3427        let theirs = "enum Color {\n    Red,\n    Blue,\n    Yellow,\n}\n";
3428        let result = entity_merge(base, ours, theirs, "test.rs");
3429        eprintln!("Rust enum: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3430        eprintln!("Content: {:?}", result.content);
3431        assert!(
3432            result.is_clean(),
3433            "Both adding different enum variants should merge. Conflicts: {:?}",
3434            result.conflicts,
3435        );
3436        assert!(result.content.contains("Green"));
3437        assert!(result.content.contains("Yellow"));
3438    }
3439
3440    #[test]
3441    fn test_python_both_add_different_decorators() {
3442        // Both add different decorators to the same function
3443        let base = "def foo():\n    return 1\n\ndef bar():\n    return 2\n";
3444        let ours = "@cache\ndef foo():\n    return 1\n\ndef bar():\n    return 2\n";
3445        let theirs = "@deprecated\ndef foo():\n    return 1\n\ndef bar():\n    return 2\n";
3446        let result = entity_merge(base, ours, theirs, "test.py");
3447        assert!(
3448            result.is_clean(),
3449            "Both adding different decorators should merge. Conflicts: {:?}",
3450            result.conflicts,
3451        );
3452        assert!(result.content.contains("@cache"));
3453        assert!(result.content.contains("@deprecated"));
3454        assert!(result.content.contains("def foo()"));
3455    }
3456
3457    #[test]
3458    fn test_decorator_plus_body_change() {
3459        // One adds decorator, other modifies body — should merge both
3460        let base = "def foo():\n    return 1\n";
3461        let ours = "@cache\ndef foo():\n    return 1\n";
3462        let theirs = "def foo():\n    return 42\n";
3463        let result = entity_merge(base, ours, theirs, "test.py");
3464        assert!(
3465            result.is_clean(),
3466            "Decorator + body change should merge. Conflicts: {:?}",
3467            result.conflicts,
3468        );
3469        assert!(result.content.contains("@cache"));
3470        assert!(result.content.contains("return 42"));
3471    }
3472
3473    #[test]
3474    fn test_ts_class_decorator_merge() {
3475        // TypeScript decorators on class methods — both add different decorators
3476        let base = "class Foo {\n    bar() {\n        return 1;\n    }\n}\n";
3477        let ours = "class Foo {\n    @Injectable()\n    bar() {\n        return 1;\n    }\n}\n";
3478        let theirs = "class Foo {\n    @Deprecated()\n    bar() {\n        return 1;\n    }\n}\n";
3479        let result = entity_merge(base, ours, theirs, "test.ts");
3480        assert!(
3481            result.is_clean(),
3482            "Both adding different decorators to same method should merge. Conflicts: {:?}",
3483            result.conflicts,
3484        );
3485        assert!(result.content.contains("@Injectable()"));
3486        assert!(result.content.contains("@Deprecated()"));
3487        assert!(result.content.contains("bar()"));
3488    }
3489
3490    #[test]
3491    fn test_non_adjacent_intra_function_changes() {
3492        let base = r#"export function process(data: any) {
3493    const validated = validate(data);
3494    const transformed = transform(validated);
3495    const saved = save(transformed);
3496    return saved;
3497}
3498"#;
3499        let ours = r#"export function process(data: any) {
3500    const validated = validate(data);
3501    const transformed = transform(validated);
3502    const saved = save(transformed);
3503    console.log("saved", saved);
3504    return saved;
3505}
3506"#;
3507        let theirs = r#"export function process(data: any) {
3508    console.log("input", data);
3509    const validated = validate(data);
3510    const transformed = transform(validated);
3511    const saved = save(transformed);
3512    return saved;
3513}
3514"#;
3515        let result = entity_merge(base, ours, theirs, "test.ts");
3516        assert!(
3517            result.is_clean(),
3518            "Non-adjacent changes within same function should merge via diffy. Conflicts: {:?}",
3519            result.conflicts,
3520        );
3521        assert!(result.content.contains("console.log(\"saved\""));
3522        assert!(result.content.contains("console.log(\"input\""));
3523    }
3524
3525    #[test]
3526    fn test_method_reordering_with_modification() {
3527        // Agent A reorders methods in class, Agent B modifies one method
3528        // Inner entity merge matches by name, so reordering should be transparent
3529        let base = r#"class Service {
3530    getUser(id: string) {
3531        return db.find(id);
3532    }
3533
3534    createUser(data: any) {
3535        return db.create(data);
3536    }
3537
3538    deleteUser(id: string) {
3539        return db.delete(id);
3540    }
3541}
3542"#;
3543        // Ours: reorder methods (move deleteUser before createUser)
3544        let ours = r#"class Service {
3545    getUser(id: string) {
3546        return db.find(id);
3547    }
3548
3549    deleteUser(id: string) {
3550        return db.delete(id);
3551    }
3552
3553    createUser(data: any) {
3554        return db.create(data);
3555    }
3556}
3557"#;
3558        // Theirs: modify getUser
3559        let theirs = r#"class Service {
3560    getUser(id: string) {
3561        console.log("fetching", id);
3562        return db.find(id);
3563    }
3564
3565    createUser(data: any) {
3566        return db.create(data);
3567    }
3568
3569    deleteUser(id: string) {
3570        return db.delete(id);
3571    }
3572}
3573"#;
3574        let result = entity_merge(base, ours, theirs, "test.ts");
3575        eprintln!("Method reorder: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3576        eprintln!("Content:\n{}", result.content);
3577        assert!(
3578            result.is_clean(),
3579            "Method reordering + modification should merge. Conflicts: {:?}",
3580            result.conflicts,
3581        );
3582        assert!(result.content.contains("console.log(\"fetching\""), "Should contain theirs modification");
3583        assert!(result.content.contains("deleteUser"), "Should have deleteUser");
3584        assert!(result.content.contains("createUser"), "Should have createUser");
3585    }
3586
3587    #[test]
3588    fn test_doc_comment_plus_body_change() {
3589        // One side adds JSDoc comment, other modifies function body
3590        // Doc comments are part of the entity region — they should merge with body changes
3591        let base = r#"export function calculate(a: number, b: number): number {
3592    return a + b;
3593}
3594"#;
3595        let ours = r#"/**
3596 * Calculate the sum of two numbers.
3597 * @param a - First number
3598 * @param b - Second number
3599 */
3600export function calculate(a: number, b: number): number {
3601    return a + b;
3602}
3603"#;
3604        let theirs = r#"export function calculate(a: number, b: number): number {
3605    const result = a + b;
3606    console.log("result:", result);
3607    return result;
3608}
3609"#;
3610        let result = entity_merge(base, ours, theirs, "test.ts");
3611        eprintln!("Doc comment + body: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3612        eprintln!("Content:\n{}", result.content);
3613        // This tests whether weave can merge doc comment additions with body changes
3614    }
3615
3616    #[test]
3617    fn test_both_add_different_guard_clauses() {
3618        // Both add different guard clauses at the start of a function
3619        let base = r#"export function processOrder(order: Order): Result {
3620    const total = calculateTotal(order);
3621    return { success: true, total };
3622}
3623"#;
3624        let ours = r#"export function processOrder(order: Order): Result {
3625    if (!order) throw new Error("Order required");
3626    const total = calculateTotal(order);
3627    return { success: true, total };
3628}
3629"#;
3630        let theirs = r#"export function processOrder(order: Order): Result {
3631    if (order.items.length === 0) throw new Error("Empty order");
3632    const total = calculateTotal(order);
3633    return { success: true, total };
3634}
3635"#;
3636        let result = entity_merge(base, ours, theirs, "test.ts");
3637        eprintln!("Guard clauses: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3638        eprintln!("Content:\n{}", result.content);
3639        // Both add at same position — diffy may struggle since they're at the same insertion point
3640    }
3641
3642    #[test]
3643    fn test_both_modify_different_enum_variants() {
3644        // One modifies a variant's value, other adds new variants
3645        let base = r#"enum Status {
3646    Active = "active",
3647    Inactive = "inactive",
3648    Pending = "pending",
3649}
3650"#;
3651        let ours = r#"enum Status {
3652    Active = "active",
3653    Inactive = "disabled",
3654    Pending = "pending",
3655}
3656"#;
3657        let theirs = r#"enum Status {
3658    Active = "active",
3659    Inactive = "inactive",
3660    Pending = "pending",
3661    Deleted = "deleted",
3662}
3663"#;
3664        let result = entity_merge(base, ours, theirs, "test.ts");
3665        eprintln!("Enum modify+add: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3666        eprintln!("Content:\n{}", result.content);
3667        assert!(
3668            result.is_clean(),
3669            "Modify variant + add new variant should merge. Conflicts: {:?}",
3670            result.conflicts,
3671        );
3672        assert!(result.content.contains("\"disabled\""), "Should have modified Inactive");
3673        assert!(result.content.contains("Deleted"), "Should have new Deleted variant");
3674    }
3675
3676    #[test]
3677    fn test_config_object_field_additions() {
3678        // Both add different fields to a config object (exported const)
3679        let base = r#"export const config = {
3680    timeout: 5000,
3681    retries: 3,
3682};
3683"#;
3684        let ours = r#"export const config = {
3685    timeout: 5000,
3686    retries: 3,
3687    maxConnections: 10,
3688};
3689"#;
3690        let theirs = r#"export const config = {
3691    timeout: 5000,
3692    retries: 3,
3693    logLevel: "info",
3694};
3695"#;
3696        let result = entity_merge(base, ours, theirs, "test.ts");
3697        eprintln!("Config fields: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3698        eprintln!("Content:\n{}", result.content);
3699        // This tests whether inner entity merge handles object literals
3700        // (it probably won't since object fields aren't extracted as members the same way)
3701    }
3702
3703    #[test]
3704    fn test_rust_impl_block_both_add_methods() {
3705        // Both add different methods to a Rust impl block
3706        let base = r#"impl Calculator {
3707    fn add(&self, a: i32, b: i32) -> i32 {
3708        a + b
3709    }
3710}
3711"#;
3712        let ours = r#"impl Calculator {
3713    fn add(&self, a: i32, b: i32) -> i32 {
3714        a + b
3715    }
3716
3717    fn multiply(&self, a: i32, b: i32) -> i32 {
3718        a * b
3719    }
3720}
3721"#;
3722        let theirs = r#"impl Calculator {
3723    fn add(&self, a: i32, b: i32) -> i32 {
3724        a + b
3725    }
3726
3727    fn divide(&self, a: i32, b: i32) -> i32 {
3728        a / b
3729    }
3730}
3731"#;
3732        let result = entity_merge(base, ours, theirs, "test.rs");
3733        eprintln!("Rust impl: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3734        eprintln!("Content:\n{}", result.content);
3735        assert!(
3736            result.is_clean(),
3737            "Both adding methods to Rust impl should merge. Conflicts: {:?}",
3738            result.conflicts,
3739        );
3740        assert!(result.content.contains("multiply"), "Should have multiply");
3741        assert!(result.content.contains("divide"), "Should have divide");
3742    }
3743
3744    #[test]
3745    fn test_rust_impl_same_trait_different_types() {
3746        // Two impl blocks for the same trait but different types.
3747        // Each branch modifies a different impl. Both should be preserved.
3748        // Regression: sem-core <0.3.10 named both "Stream", causing collision.
3749        let base = r#"struct Foo;
3750struct Bar;
3751
3752impl Stream for Foo {
3753    type Item = i32;
3754    fn poll_next(&self) -> Option<i32> {
3755        Some(1)
3756    }
3757}
3758
3759impl Stream for Bar {
3760    type Item = String;
3761    fn poll_next(&self) -> Option<String> {
3762        Some("hello".into())
3763    }
3764}
3765
3766fn other() {}
3767"#;
3768        let ours = r#"struct Foo;
3769struct Bar;
3770
3771impl Stream for Foo {
3772    type Item = i32;
3773    fn poll_next(&self) -> Option<i32> {
3774        let x = compute();
3775        Some(x + 1)
3776    }
3777}
3778
3779impl Stream for Bar {
3780    type Item = String;
3781    fn poll_next(&self) -> Option<String> {
3782        Some("hello".into())
3783    }
3784}
3785
3786fn other() {}
3787"#;
3788        let theirs = r#"struct Foo;
3789struct Bar;
3790
3791impl Stream for Foo {
3792    type Item = i32;
3793    fn poll_next(&self) -> Option<i32> {
3794        Some(1)
3795    }
3796}
3797
3798impl Stream for Bar {
3799    type Item = String;
3800    fn poll_next(&self) -> Option<String> {
3801        let s = format!("hello {}", name);
3802        Some(s)
3803    }
3804}
3805
3806fn other() {}
3807"#;
3808        let result = entity_merge(base, ours, theirs, "test.rs");
3809        assert!(
3810            result.is_clean(),
3811            "Same trait, different types should not conflict. Conflicts: {:?}",
3812            result.conflicts,
3813        );
3814        assert!(result.content.contains("impl Stream for Foo"), "Should have Foo impl");
3815        assert!(result.content.contains("impl Stream for Bar"), "Should have Bar impl");
3816        assert!(result.content.contains("compute()"), "Should have ours' Foo change");
3817        assert!(result.content.contains("format!"), "Should have theirs' Bar change");
3818    }
3819
3820    #[test]
3821    fn test_rust_doc_comment_plus_body_change() {
3822        // One side adds Rust doc comment, other modifies body
3823        // Comment bundling ensures the doc comment is part of the entity
3824        let base = r#"fn add(a: i32, b: i32) -> i32 {
3825    a + b
3826}
3827
3828fn subtract(a: i32, b: i32) -> i32 {
3829    a - b
3830}
3831"#;
3832        let ours = r#"/// Adds two numbers together.
3833fn add(a: i32, b: i32) -> i32 {
3834    a + b
3835}
3836
3837fn subtract(a: i32, b: i32) -> i32 {
3838    a - b
3839}
3840"#;
3841        let theirs = r#"fn add(a: i32, b: i32) -> i32 {
3842    a + b
3843}
3844
3845fn subtract(a: i32, b: i32) -> i32 {
3846    a - b - 1
3847}
3848"#;
3849        let result = entity_merge(base, ours, theirs, "test.rs");
3850        assert!(
3851            result.is_clean(),
3852            "Rust doc comment + body change should merge. Conflicts: {:?}",
3853            result.conflicts,
3854        );
3855        assert!(result.content.contains("/// Adds two numbers"), "Should have ours doc comment");
3856        assert!(result.content.contains("a - b - 1"), "Should have theirs body change");
3857    }
3858
3859    #[test]
3860    fn test_both_add_different_doc_comments() {
3861        // Both add doc comments to different functions — should merge cleanly
3862        let base = r#"fn add(a: i32, b: i32) -> i32 {
3863    a + b
3864}
3865
3866fn subtract(a: i32, b: i32) -> i32 {
3867    a - b
3868}
3869"#;
3870        let ours = r#"/// Adds two numbers.
3871fn add(a: i32, b: i32) -> i32 {
3872    a + b
3873}
3874
3875fn subtract(a: i32, b: i32) -> i32 {
3876    a - b
3877}
3878"#;
3879        let theirs = r#"fn add(a: i32, b: i32) -> i32 {
3880    a + b
3881}
3882
3883/// Subtracts b from a.
3884fn subtract(a: i32, b: i32) -> i32 {
3885    a - b
3886}
3887"#;
3888        let result = entity_merge(base, ours, theirs, "test.rs");
3889        assert!(
3890            result.is_clean(),
3891            "Both adding doc comments to different functions should merge. Conflicts: {:?}",
3892            result.conflicts,
3893        );
3894        assert!(result.content.contains("/// Adds two numbers"), "Should have add's doc comment");
3895        assert!(result.content.contains("/// Subtracts b from a"), "Should have subtract's doc comment");
3896    }
3897
3898    #[test]
3899    fn test_go_import_block_both_add_different() {
3900        // Go uses import (...) blocks — both add different imports
3901        let base = "package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n)\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n";
3902        let ours = "package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"strings\"\n)\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n";
3903        let theirs = "package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"io\"\n)\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n";
3904        let result = entity_merge(base, ours, theirs, "main.go");
3905        eprintln!("Go import block: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3906        eprintln!("Content:\n{}", result.content);
3907        // This tests whether Go import blocks (a single entity) get inner-merged
3908    }
3909
3910    #[test]
3911    fn test_python_class_both_add_methods() {
3912        // Python class — both add different methods
3913        let base = "class Calculator:\n    def add(self, a, b):\n        return a + b\n";
3914        let ours = "class Calculator:\n    def add(self, a, b):\n        return a + b\n\n    def multiply(self, a, b):\n        return a * b\n";
3915        let theirs = "class Calculator:\n    def add(self, a, b):\n        return a + b\n\n    def divide(self, a, b):\n        return a / b\n";
3916        let result = entity_merge(base, ours, theirs, "test.py");
3917        eprintln!("Python class: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3918        eprintln!("Content:\n{}", result.content);
3919        assert!(
3920            result.is_clean(),
3921            "Both adding methods to Python class should merge. Conflicts: {:?}",
3922            result.conflicts,
3923        );
3924        assert!(result.content.contains("multiply"), "Should have multiply");
3925        assert!(result.content.contains("divide"), "Should have divide");
3926    }
3927
3928    #[test]
3929    fn test_interstitial_conflict_not_silently_embedded() {
3930        // Regression test: when interstitial content between entities has a
3931        // both-modified conflict, merge_interstitials must report it as a real
3932        // conflict instead of silently embedding raw diffy markers and claiming
3933        // is_clean=true.
3934        //
3935        // Scenario: a barrel export file (index.ts) with comments between
3936        // export statements. Both sides modify the SAME interstitial comment
3937        // block differently. The exports are the entities; the comment between
3938        // them is interstitial content that goes through merge_interstitials
3939        // → diffy, which cannot auto-merge conflicting edits.
3940        let base = r#"export { alpha } from "./alpha";
3941
3942// Section: data utilities
3943// TODO: add more exports here
3944
3945export { beta } from "./beta";
3946"#;
3947        let ours = r#"export { alpha } from "./alpha";
3948
3949// Section: data utilities (sorting)
3950// Sorting helpers for list views
3951
3952export { beta } from "./beta";
3953"#;
3954        let theirs = r#"export { alpha } from "./alpha";
3955
3956// Section: data utilities (filtering)
3957// Filtering helpers for search views
3958
3959export { beta } from "./beta";
3960"#;
3961        let result = entity_merge(base, ours, theirs, "index.ts");
3962
3963        // The key assertions:
3964        // 1. If the content has conflict markers, is_clean() MUST be false
3965        let has_markers = result.content.contains("<<<<<<<") || result.content.contains(">>>>>>>");
3966        if has_markers {
3967            assert!(
3968                !result.is_clean(),
3969                "BUG: is_clean()=true but merged content has conflict markers!\n\
3970                 stats: {}\nconflicts: {:?}\ncontent:\n{}",
3971                result.stats, result.conflicts, result.content
3972            );
3973            assert!(
3974                result.stats.entities_conflicted > 0,
3975                "entities_conflicted should be > 0 when markers are present"
3976            );
3977        }
3978
3979        // 2. If it was resolved cleanly, no markers should exist
3980        if result.is_clean() {
3981            assert!(
3982                !has_markers,
3983                "Clean merge should not contain conflict markers!\ncontent:\n{}",
3984                result.content
3985            );
3986        }
3987    }
3988
3989    #[test]
3990    fn test_pre_conflicted_input_not_treated_as_clean() {
3991        // Regression test for AU/AA conflicts: git can store conflict markers
3992        // directly into stage blobs. Weave must not return is_clean=true.
3993        let base = "";
3994        let theirs = "";
3995        let ours = r#"/**
3996 * MIT License
3997 */
3998
3999<<<<<<<< HEAD:src/lib/exports/index.ts
4000export { renderDocToBuffer } from "./doc-exporter";
4001export type { ExportOptions, ExportMetadata, RenderContext } from "./types";
4002========
4003export * from "./editor";
4004export * from "./types";
4005>>>>>>>> feature:packages/core/src/editor/index.ts
4006"#;
4007        let result = entity_merge(base, ours, theirs, "index.ts");
4008
4009        assert!(
4010            !result.is_clean(),
4011            "Pre-conflicted input must not be reported as clean!\n\
4012             stats: {}\nconflicts: {:?}",
4013            result.stats, result.conflicts,
4014        );
4015        assert!(result.stats.entities_conflicted > 0);
4016        assert!(!result.conflicts.is_empty());
4017    }
4018
4019    #[test]
4020    fn test_multi_line_signature_classified_as_syntax() {
4021        // Multi-line parameter list: changing a param should be Syntax, not Functional
4022        let base = "function process(\n    a: number,\n    b: string\n) {\n    return a;\n}\n";
4023        let ours = "function process(\n    a: number,\n    b: string,\n    c: boolean\n) {\n    return a;\n}\n";
4024        let theirs = "function process(\n    a: number,\n    b: number\n) {\n    return a;\n}\n";
4025        let complexity = crate::conflict::classify_conflict(Some(base), Some(ours), Some(theirs));
4026        assert_eq!(
4027            complexity,
4028            crate::conflict::ConflictComplexity::Syntax,
4029            "Multi-line signature change should be classified as Syntax, got {:?}",
4030            complexity
4031        );
4032    }
4033
4034    #[test]
4035    fn test_grouped_import_merge_preserves_groups() {
4036        let base = "import os\nimport sys\n\nfrom collections import OrderedDict\nfrom typing import List\n";
4037        let ours = "import os\nimport sys\nimport json\n\nfrom collections import OrderedDict\nfrom typing import List\n";
4038        let theirs = "import os\nimport sys\n\nfrom collections import OrderedDict\nfrom collections import defaultdict\nfrom typing import List\n";
4039        let result = merge_imports_commutatively(base, ours, theirs);
4040        // json should be in the first group (stdlib), defaultdict in the second (collections)
4041        let lines: Vec<&str> = result.lines().collect();
4042        let json_idx = lines.iter().position(|l| l.contains("json"));
4043        let blank_idx = lines.iter().position(|l| l.trim().is_empty());
4044        let defaultdict_idx = lines.iter().position(|l| l.contains("defaultdict"));
4045        assert!(json_idx.is_some(), "json import should be present");
4046        assert!(blank_idx.is_some(), "blank line separator should be present");
4047        assert!(defaultdict_idx.is_some(), "defaultdict import should be present");
4048        // json should come before the blank line, defaultdict after
4049        assert!(json_idx.unwrap() < blank_idx.unwrap(), "json should be in first group");
4050        assert!(defaultdict_idx.unwrap() > blank_idx.unwrap(), "defaultdict should be in second group");
4051    }
4052
4053    #[test]
4054    fn test_configurable_duplicate_threshold() {
4055        // Create entities with 15 same-name entities
4056        let entities: Vec<SemanticEntity> = (0..15).map(|i| SemanticEntity {
4057            id: format!("test::function::test_{}", i),
4058            file_path: "test.ts".to_string(),
4059            entity_type: "function".to_string(),
4060            name: "test".to_string(),
4061            parent_id: None,
4062            content: format!("function test() {{ return {}; }}", i),
4063            content_hash: format!("hash_{}", i),
4064            structural_hash: None,
4065            start_line: i * 3 + 1,
4066            end_line: i * 3 + 3,
4067            metadata: None,
4068        }).collect();
4069        // Default threshold (10): should trigger
4070        assert!(has_excessive_duplicates(&entities));
4071        // Set threshold to 20: should not trigger
4072        std::env::set_var("WEAVE_MAX_DUPLICATES", "20");
4073        assert!(!has_excessive_duplicates(&entities));
4074        std::env::remove_var("WEAVE_MAX_DUPLICATES");
4075    }
4076
4077    #[test]
4078    fn test_ts_multiline_import_consolidation() {
4079        // Issue #24: when incoming consolidates two imports into one multi-line import,
4080        // the `import {` opening line can get dropped.
4081        let base = "\
4082import type { Foo } from \"./foo\"
4083import {
4084     type a,
4085     type b,
4086     type c,
4087} from \"./foo\"
4088
4089export function bar() {
4090    return 1;
4091}
4092";
4093        let ours = base;
4094        let theirs = "\
4095import {
4096     type Foo,
4097     type a,
4098     type b,
4099     type c,
4100} from \"./foo\"
4101
4102export function bar() {
4103    return 1;
4104}
4105";
4106        let result = entity_merge(base, ours, theirs, "test.ts");
4107        eprintln!("TS import consolidation: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4108        eprintln!("Content:\n{}", result.content);
4109        // Theirs is the only change, result should match theirs exactly
4110        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4111        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4112        assert!(result.content.contains("} from \"./foo\""), "closing must be present");
4113        assert!(!result.content.contains("import type { Foo }"), "old separate import should be removed");
4114    }
4115
4116    #[test]
4117    fn test_ts_multiline_import_both_modify() {
4118        // Issue #24 variant: both sides modify the import block
4119        let base = "\
4120import type { Foo } from \"./foo\"
4121import {
4122     type a,
4123     type b,
4124     type c,
4125} from \"./foo\"
4126
4127export function bar() {
4128    return 1;
4129}
4130";
4131        // Ours: consolidates imports + adds type d
4132        let ours = "\
4133import {
4134     type Foo,
4135     type a,
4136     type b,
4137     type c,
4138     type d,
4139} from \"./foo\"
4140
4141export function bar() {
4142    return 1;
4143}
4144";
4145        // Theirs: consolidates imports + adds type e
4146        let theirs = "\
4147import {
4148     type Foo,
4149     type a,
4150     type b,
4151     type c,
4152     type e,
4153} from \"./foo\"
4154
4155export function bar() {
4156    return 1;
4157}
4158";
4159        let result = entity_merge(base, ours, theirs, "test.ts");
4160        eprintln!("TS import both modify: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4161        eprintln!("Content:\n{}", result.content);
4162        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4163        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4164        assert!(result.content.contains("type d,"), "ours addition must be present");
4165        assert!(result.content.contains("type e,"), "theirs addition must be present");
4166        assert!(result.content.contains("} from \"./foo\""), "closing must be present");
4167    }
4168
4169    #[test]
4170    fn test_ts_multiline_import_no_entities() {
4171        // Issue #24: file with only imports, no other entities
4172        let base = "\
4173import type { Foo } from \"./foo\"
4174import {
4175     type a,
4176     type b,
4177     type c,
4178} from \"./foo\"
4179";
4180        let ours = base;
4181        let theirs = "\
4182import {
4183     type Foo,
4184     type a,
4185     type b,
4186     type c,
4187} from \"./foo\"
4188";
4189        let result = entity_merge(base, ours, theirs, "test.ts");
4190        eprintln!("TS import no entities: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4191        eprintln!("Content:\n{}", result.content);
4192        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4193        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4194    }
4195
4196    #[test]
4197    fn test_ts_multiline_import_export_variable() {
4198        // Issue #24: import block near an export variable entity
4199        let base = "\
4200import type { Foo } from \"./foo\"
4201import {
4202     type a,
4203     type b,
4204     type c,
4205} from \"./foo\"
4206
4207export const X = 1;
4208
4209export function bar() {
4210    return 1;
4211}
4212";
4213        let ours = "\
4214import type { Foo } from \"./foo\"
4215import {
4216     type a,
4217     type b,
4218     type c,
4219     type d,
4220} from \"./foo\"
4221
4222export const X = 1;
4223
4224export function bar() {
4225    return 1;
4226}
4227";
4228        let theirs = "\
4229import {
4230     type Foo,
4231     type a,
4232     type b,
4233     type c,
4234} from \"./foo\"
4235
4236export const X = 2;
4237
4238export function bar() {
4239    return 1;
4240}
4241";
4242        let result = entity_merge(base, ours, theirs, "test.ts");
4243        eprintln!("TS import + export var: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4244        eprintln!("Content:\n{}", result.content);
4245        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4246    }
4247
4248    #[test]
4249    fn test_ts_multiline_import_adjacent_to_entity() {
4250        // Issue #24: import block directly adjacent to entity (no blank line)
4251        let base = "\
4252import type { Foo } from \"./foo\"
4253import {
4254     type a,
4255     type b,
4256     type c,
4257} from \"./foo\"
4258export function bar() {
4259    return 1;
4260}
4261";
4262        let ours = base;
4263        let theirs = "\
4264import {
4265     type Foo,
4266     type a,
4267     type b,
4268     type c,
4269} from \"./foo\"
4270export function bar() {
4271    return 1;
4272}
4273";
4274        let result = entity_merge(base, ours, theirs, "test.ts");
4275        eprintln!("TS import adjacent: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4276        eprintln!("Content:\n{}", result.content);
4277        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4278        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4279    }
4280
4281    #[test]
4282    fn test_ts_multiline_import_both_consolidate_differently() {
4283        // Issue #24: both sides consolidate imports but add different specifiers
4284        let base = "\
4285import type { Foo } from \"./foo\"
4286import {
4287     type a,
4288     type b,
4289} from \"./foo\"
4290
4291export function bar() {
4292    return 1;
4293}
4294";
4295        let ours = "\
4296import {
4297     type Foo,
4298     type a,
4299     type b,
4300     type c,
4301} from \"./foo\"
4302
4303export function bar() {
4304    return 1;
4305}
4306";
4307        let theirs = "\
4308import {
4309     type Foo,
4310     type a,
4311     type b,
4312     type d,
4313} from \"./foo\"
4314
4315export function bar() {
4316    return 1;
4317}
4318";
4319        let result = entity_merge(base, ours, theirs, "test.ts");
4320        eprintln!("TS both consolidate: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4321        eprintln!("Content:\n{}", result.content);
4322        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4323        assert!(result.content.contains("type Foo,"), "type Foo must be present");
4324        assert!(result.content.contains("} from \"./foo\""), "closing must be present");
4325    }
4326
4327    #[test]
4328    fn test_ts_multiline_import_ours_adds_theirs_consolidates() {
4329        // Issue #24 variant: ours adds new import, theirs consolidates
4330        let base = "\
4331import type { Foo } from \"./foo\"
4332import {
4333     type a,
4334     type b,
4335     type c,
4336} from \"./foo\"
4337
4338export function bar() {
4339    return 1;
4340}
4341";
4342        // Ours: adds a new specifier to the multiline import
4343        let ours = "\
4344import type { Foo } from \"./foo\"
4345import {
4346     type a,
4347     type b,
4348     type c,
4349     type d,
4350} from \"./foo\"
4351
4352export function bar() {
4353    return 1;
4354}
4355";
4356        // Theirs: consolidates into one import
4357        let theirs = "\
4358import {
4359     type Foo,
4360     type a,
4361     type b,
4362     type c,
4363} from \"./foo\"
4364
4365export function bar() {
4366    return 1;
4367}
4368";
4369        let result = entity_merge(base, ours, theirs, "test.ts");
4370        eprintln!("TS import ours-adds theirs-consolidates: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
4371        eprintln!("Content:\n{}", result.content);
4372        assert!(result.content.contains("import {"), "import {{ must not be dropped");
4373        assert!(result.content.contains("type d,"), "ours addition must be present");
4374        assert!(result.content.contains("} from \"./foo\""), "closing must be present");
4375    }
4376}
weave_core/merge.rs

weave_core/
merge.rs